Esempio n. 1
0
    def __init__(self,
                 feature_hdf5_path,
                 train_csv,
                 validate_csv,
                 holdout_fold,
                 batch_size,
                 seed=1234):
        '''Data generator for training and validation. 
        
        Args:
          feature_hdf5_path: string, path of hdf5 feature file
          train_csv: string, path of train csv file
          validate_csv: string, path of validate csv file
          holdout_fold: set 1 for development and none for training 
              on all data without validation
          scalar: object, containing mean and std value
          batch_size: int
          seed: int, random seed
        '''

        self.batch_size = batch_size
        self.random_state = np.random.RandomState(seed)

        # self.classes_num = classes_num
        self.in_domain_classes_num = len(config.labels)
        self.all_classes_num = len(config.labels)
        self.lb_to_idx = config.lb_to_idx
        self.idx_to_lb = config.idx_to_lb

        # Load training data
        load_time = time.time()

        self.data_dict = self.load_hdf5(feature_hdf5_path)

        train_meta = read_metadata(train_csv)
        validate_meta = read_metadata(validate_csv)

        self.train_audio_indexes = self.get_audio_indexes(
            train_meta, self.data_dict, holdout_fold, 'train')

        self.validate_audio_indexes = self.get_audio_indexes(
            validate_meta, self.data_dict, holdout_fold, 'validate')

        if holdout_fold == 'none':
            self.train_audio_indexes = np.concatenate(
                (self.train_audio_indexes, self.validate_audio_indexes),
                axis=0)

            self.validate_audio_indexes = np.array([])

        logging.info('Load data time: {:.3f} s'.format(time.time() -
                                                       load_time))
        logging.info('Training audio num: {}'.format(
            len(self.train_audio_indexes)))
        logging.info('Validation audio num: {}'.format(
            len(self.validate_audio_indexes)))

        self.random_state.shuffle(self.train_audio_indexes)
        self.pointer = 0
Esempio n. 2
0
File: main.py Progetto: tqbl/gccaps
def evaluate_sed(dataset):
    """Evaluate the sound event detection predictions and print results.

    Args:
        dataset: Dataset for retrieving ground truth.
    """
    import evaluation
    import inference

    names, ground_truth = utils.read_metadata(dataset.metadata_path,
                                              weakly_labeled=False)

    # Load and binarize predictions
    path = cfg.predictions_path.format('sed', dataset.name)
    _, y_pred = utils.read_predictions(path)
    threshold = _determine_threshold(cfg.sed_threshold)
    y_pred_b = inference.binarize_predictions_3d(y_pred,
                                                 threshold=threshold,
                                                 n_dilation=cfg.sed_dilation,
                                                 n_erosion=cfg.sed_erosion)

    # Convert to event list format and evaluate SED performance
    resolution = cfg.clip_duration / y_pred.shape[2]
    predictions = inference.generate_event_lists(y_pred_b, resolution)
    metrics = evaluation.evaluate_sed(ground_truth, predictions, names)

    # Ensure output directory exist and write results
    os.makedirs(os.path.dirname(cfg.results_path), exist_ok=True)
    output_path = cfg.results_path.format('sed', dataset.name)
    with open(output_path, 'w') as f:
        f.write(metrics.result_report_overall())
        f.write(metrics.result_report_class_wise())
Esempio n. 3
0
File: main.py Progetto: tqbl/gccaps
def evaluate_audio_tagging(dataset, compute_thresholds=False):
    """Evaluate the audio tagging predictions and write results.

    Args:
        dataset: Dataset for retrieving ground truth.
        compute_thresholds (bool): Whether to compute and record
            per-class optimal thresholds.

    See Also:
        :func:`evaluation.compute_thresholds`
    """
    import evaluation

    _, y_true = utils.read_metadata(dataset.metadata_path)
    path = cfg.predictions_path.format('at', dataset.name)
    _, y_pred = utils.read_predictions(path)

    # Compute thresholds if flag is set
    if compute_thresholds:
        thresholds = evaluation.compute_thresholds(y_true, y_pred)
        output_path = os.path.join(os.path.dirname(cfg.predictions_path),
                                   'thresholds.p')
        with open(output_path, 'wb') as f:
            pickle.dump(thresholds, f)

    # Evaluate audio tagging performance
    threshold = _determine_threshold(cfg.at_threshold)
    scores = evaluation.evaluate_audio_tagging(
        y_true, y_pred, threshold=threshold)

    # Ensure output directory exist and write results
    os.makedirs(os.path.dirname(cfg.results_path), exist_ok=True)
    output_path = cfg.results_path.format('at', dataset.name)
    evaluation.write_audio_tagging_results(scores, output_path)
Esempio n. 4
0
File: main.py Progetto: tqbl/gccaps
def _load_data(dataset, is_training=False):
    """Load input data, target values and file names for a dataset.

    The input data is assumed to be a dataset of feature vectors. These
    feature vectors are standardized using a scaler that is either
    loaded from disk (if it exists) or computed on-the-fly. The latter
    is only possible if the input data is training data, which is
    indicated by the `is_training` parameter.

    Target values and file names are read from the metadata file.

    Args:
        dataset: Structure encapsulating dataset information.
        training (bool): Whether the input data is training data.

    Returns:
        x (np.ndarray): The input data.
        y (np.ndarray): The target values.
        names (list): The associated file names.
    """
    import data_augmentation as aug
    import features

    features_path = os.path.join(cfg.extraction_path, dataset.name + '.h5')
    x = utils.timeit(lambda: features.load_features(features_path),
                     'Loaded features of %s dataset' % dataset.name)

    # Clip dynamic range to 90 dB
    x = np.maximum(x, x.max() - 90.0)

    # Load scaler from file if cached, or else compute it.
    scaler_path = cfg.scaler_path
    if os.path.exists(scaler_path) or not is_training:
        with open(scaler_path, 'rb') as f:
            scaler = pickle.load(f)
    else:
        scaler = utils.timeit(lambda: utils.compute_scaler(x),
                              'Computed standard scaler')
        with open(scaler_path, 'wb') as f:
            pickle.dump(scaler, f)

    x = utils.timeit(lambda: utils.standardize(x, scaler),
                     'Standardized %s features' % dataset.name)

    names, y = utils.timeit(lambda: utils.read_metadata(dataset.metadata_path),
                            'Loaded %s metadata' % dataset.name)
    if dataset == cfg.training_set and cfg.enable_augmentation:
        names, y = aug.expand_metadata((names, y))

    return x, y, names
Esempio n. 5
0
File: main.py Progetto: tqbl/gccaps
def extract(dataset):
    """Extract feature vectors from the given dataset.

    Args:
        dataset: Dataset to extract features from.
    """
    import data_augmentation as aug
    import features

    # Use a logmel representation for feature extraction
    extractor = features.LogmelExtractor(sample_rate=cfg.sample_rate,
                                         n_window=cfg.n_window,
                                         hop_length=cfg.hop_length,
                                         n_mels=cfg.n_mels,
                                         )

    # Prepare for data augmentation if enabled
    file_names, target_values = utils.read_metadata(dataset.metadata_path)
    if dataset == cfg.training_set and cfg.enable_augmentation:
        n_transforms_iter = aug.transform_counts(target_values)
        file_names = aug.expand_metadata((file_names, target_values))[0]
    else:
        n_transforms_iter = None

    # Ensure output directory exists and set file path
    os.makedirs(cfg.extraction_path, exist_ok=True)
    output_path = os.path.join(cfg.extraction_path, dataset.name + '.h5')

    # Save free parameters to disk
    utils.log_parameters(cfg.logmel, os.path.join(cfg.extraction_path,
                                                  'parameters.json'))

    # Generate features for each audio clip in the dataset
    features.extract_dataset(dataset.path,
                             file_names,
                             extractor,
                             cfg.clip_duration,
                             output_path,
                             n_transforms_iter=n_transforms_iter,
                             )
Esempio n. 6
0
def calculate_feature_for_all_audio_files(args):
    '''Calculate feature of audio files and write out features to a hdf5 file. 
    
    Args:
      dataset_dir: string
      workspace: string
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arguments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    mini_data = args.mini_data

    sample_rate = config.sample_rate
    window_size = config.window_size
    hop_size = config.hop_size
    mel_bins = config.mel_bins
    fmin = config.fmin
    fmax = config.fmax
    frames_per_second = config.frames_per_second
    frames_num = config.frames_num
    total_samples = config.total_samples
    lb_to_idx = config.lb_to_idx
    audio_duration_clip = config.audio_duration_clip
    audio_stride_clip = config.audio_stride_clip
    audio_duration = config.audio_duration
    audio_num = config.audio_num
    total_frames = config.total_frames
    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    audios_dir = os.path.join(dataset_dir, 'audio')
    metadata_path = os.path.join(dataset_dir, 'meta', 'esc50.csv')
    feature_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins.h5'.format(prefix, frames_per_second,
                                                mel_bins))
    create_folder(os.path.dirname(feature_path))
    # Feature extractor
    feature_extractor = LogMelExtractor(sample_rate=sample_rate,
                                        window_size=window_size,
                                        hop_size=hop_size,
                                        mel_bins=mel_bins,
                                        fmin=fmin,
                                        fmax=fmax)

    # Read metadata
    meta_dict = read_metadata(metadata_path)

    # Extract features and targets
    if mini_data:
        mini_num = 10
        total_num = len(meta_dict['filename'])
        random_state = np.random.RandomState(1234)
        indexes = random_state.choice(total_num, size=mini_num, replace=False)
        for key in meta_dict.keys():
            meta_dict[key] = meta_dict[key][indexes]

    print('Extracting features of all audio files ...')
    extract_time = time.time()
    # Hdf5 file for storing features and targets
    hf = h5py.File(feature_path, 'w')

    hf.create_dataset(
        name='filename',
        data=[filename.encode() for filename in meta_dict['filename']],
        dtype='S80')

    if 'fold' in meta_dict.keys():
        hf.create_dataset(name='fold',
                          data=[fold for fold in meta_dict['fold']],
                          dtype=np.int64)

    if 'target' in meta_dict.keys():
        hf.create_dataset(name='target',
                          data=[target for target in meta_dict['target']],
                          dtype=np.int64)

    if 'category' in meta_dict.keys():
        hf.create_dataset(
            name='category',
            data=[category.encode() for category in meta_dict['category']],
            dtype='S80')
    if 'esc10' in meta_dict.keys():
        hf.create_dataset(name='esc10',
                          data=[esc10 for esc10 in meta_dict['esc10']],
                          dtype=np.bool)
    if 'src_file' in meta_dict.keys():
        hf.create_dataset(
            name='src_file',
            data=[src_file for src_file in meta_dict['src_file']],
            dtype=np.int64)
    if 'take' in meta_dict.keys():
        hf.create_dataset(name='take',
                          data=[take.encode() for take in meta_dict['take']],
                          dtype='S24')

    hf.create_dataset(name='feature',
                      shape=(0, audio_num, frames_num, mel_bins),
                      maxshape=(None, audio_num, frames_num, mel_bins),
                      dtype=np.float32)

    for (n, filename) in enumerate(meta_dict['filename']):
        audio_path = os.path.join(audios_dir, filename)
        print(n, audio_path)

        # Read audio
        (audio, _) = read_audio(audio_path=audio_path, target_fs=sample_rate)

        # Pad or truncate audio recording to the same length
        audio = pad_truncate_sequence(audio, total_samples)
        # Extract feature
        fea_list = []
        #         for i in range(audio_num):
        #             audio_clip = audio[i*sample_rate*audio_stride_clip: (i+2)*sample_rate*audio_stride_clip]
        #             feature = feature_extractor.transform(audio_clip)
        #             feature = feature[0 : frames_per_second*audio_duration_clip]
        #             fea_list.append(feature)
        feature = feature_extractor.transform(audio)
        #         # Remove the extra log mel spectrogram frames caused by padding zero
        feature = feature[0:total_frames]
        for i in range(audio_num):
            feature_clip = feature[i * frames_per_second *
                                   audio_stride_clip:(i +
                                                      audio_duration_clip) *
                                   frames_per_second * audio_stride_clip]
            fea_list.append(feature_clip)

        hf['feature'].resize((n + 1, audio_num, frames_num, mel_bins))
        hf['feature'][n] = fea_list

    hf.close()

    print('Write hdf5 file to {} using {:.3f} s'.format(
        feature_path,
        time.time() - extract_time))
Esempio n. 7
0
data = read_csv('https://ocgptweb.azurewebsites.net/CSVDownload')
data = data.drop(columns=['CountryName', 'ConfirmedCases', 'ConfirmedDeaths'])
data = data.drop(
    columns=[col for col in data.columns if col.endswith('_Notes')])
data = data.drop(
    columns=[col for col in data.columns if col.endswith('_IsGeneral')])
data['Date'] = data['Date'].apply(lambda x: datetime_isoformat(x, '%Y%m%d'))

# Join with ISO data
iso = read_csv(ROOT / 'input' /
               'ISO-3166-2.csv')[['3166-2-Alpha-2', '3166-2-Alpha-3']]
data = data.rename(columns={'CountryCode': '3166-2-Alpha-3'}).merge(iso)

# Join with our metadata
metadata = read_metadata()[['Key', 'CountryCode']]
data = data.rename(columns={'3166-2-Alpha-2': 'CountryCode'}).merge(metadata)

# Use consistent naming convention for columns
data = data[[
    col for col in data.columns
    if '_' in col or col in ('Date', 'Key', 'StringencyIndex')
]]
data.columns = [col.split('_')[-1] for col in data.columns]
data.columns = [
    re.sub(r'\s(\w)', lambda m: m.group(1).upper(), col)
    for col in data.columns
]

# Fix column typo
data = data.rename(columns={'ClosePublicTransport': 'PublicTransportClosing'})
Esempio n. 8
0
        'MaximumTemperature', 'Rainfall', 'Snowfall'
    ]
    return data[[col for col in output_columns if col in data.columns]]


# Get all the weather stations with data up until 2020
stations_url = 'https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-inventory.txt'
stations = read_csv(stations_url,
                    sep=r'\s+',
                    names=('id', 'lat', 'lon', 'measurement', 'year_start',
                           'year_end'))
stations = stations[stations.year_end == 2020][[
    'id', 'lat', 'lon', 'measurement'
]]

# Filter stations that at least provide max and min temps
measurements = ['TMIN', 'TMAX']
stations = stations.groupby(['id', 'lat', 'lon']).sum()
stations = stations[stations.measurement.apply(
    lambda x: all(m in x for m in measurements))]
stations = stations.reset_index()

# Get all the POI from metadata and go through each key
metadata = read_metadata()[['Key', 'Latitude', 'Longitude']]
# Bottleneck is network so we can use lots of threads in parallel
records = list(
    tqdm(ThreadPool(8).imap_unordered(station_records, metadata.iterrows()),
         total=len(metadata)))

concat(records).sort_values(['Key', 'Date']).to_csv(sys.stdout, index=False)
Esempio n. 9
0
                      root / 'output' / ('%s_latest.json' % name),
                      orient='records')


# Root path of the project
ROOT = Path(os.path.dirname(__file__)) / '..'

# Read the minimal data file and write to JSON output
minimal = read_csv(ROOT / 'output' / 'data_minimal.csv').sort_values(
    ['Date', 'Key'])
dataframe_to_json(minimal,
                  ROOT / 'output' / 'data_minimal.json',
                  orient='records')

# Read the metadata file and write to output
metadata = read_metadata()
metadata = metadata[[
    col for col in metadata.columns if not col.startswith('_')
]]
metadata.to_csv(ROOT / 'output' / 'metadata.csv', index=False)
dataframe_to_json(metadata,
                  ROOT / 'output' / 'metadata.json',
                  orient='records')

# Merge minimal with the metadata file to create full file and write to output
full = minimal.merge(metadata).sort_values(['Date', 'Key'])
full = full[[
    'Date',
    'Key',
    'CountryCode',
    'CountryName',
Esempio n. 10
0
parser = argparse.ArgumentParser()
parser.add_argument('pred_path', help='path to predictions directory')
parser.add_argument('output_path', help='output file path')
args = parser.parse_args()

# Trim the time length of the input files
model_dirs = []
for model in MODELS:
    model_dirs.append(os.path.join(args.pred_path, model))
utils.trim_clips(model_dirs)

# Collect predictions for each model
preds = []
feats = []
for model in MODELS:
    df = utils.read_metadata(os.path.join(args.pred_path, model))
    preds.append((df > 0.5).astype(int).unstack())
    feats.append(df)

# Print correlation matrix
print(pd.concat(preds, axis=1).corr())

# Save meta-features to disk
feats = np.stack(feats, axis=1)
feats = np.reshape(feats, (feats.shape[0], -1))
with h5py.File(args.output_path, 'w') as f:
    f.create_dataset('F', data=feats)
    f.create_dataset('names',
                     data=preds[0].index.levels[1],
                     dtype=h5py.special_dtype(vlen=str))
Esempio n. 11
0
    dlib_detector, dlib_predictor = utils.`load_dlib_detector_and_predictor()
elif config.USING_DLIB_OR_FACE_ALIGNMENT == 'face_alignment':
    face_alignment_3D_object = utils.load_face_alignment_object(d='3D', enable_cuda=config.ENABLE_CUDA)
    face_alignment_2D_object = utils.load_face_alignment_object(d='2D', enable_cuda=config.ENABLE_CUDA)

# Clip videos by dialogue times from metadata,
# extract faces and landmarks from video clips,
# blacken mouth and draw mouth polygon
# save combined frame + frame_with_blackened_mouth_and_polygon
for language in tqdm.tqdm(sorted(os.listdir(os.path.join(config.MOVIE_TRANSLATION_DATASET_DIR, 'metadata')))):
    # Read all metadata files in the language,
    # containing the columns: | output_file_name.mp4 | youtubeID | start_time | duration |
    for metadata_txt_file in tqdm.tqdm(sorted(glob.glob(os.path.join(config.MOVIE_TRANSLATION_DATASET_DIR, 'metadata', language, "*")))):
        actor = os.path.splitext(os.path.basename(metadata_txt_file))[0]
        print("\n", actor, "\n")
        metadata = utils.read_metadata(metadata_txt_file)
        # Extract video clips
        print("Extracting video clips...")
        extract_video_clips(language, actor, metadata, verbose=False)
        video_clips_dir = os.path.join(config.MOVIE_TRANSLATION_DATASET_DIR, 'videos', language, actor)
        # Extract faces and landmarks from video clips
        print("Extracting faces and landmarks from video clips...")
        for v, video_file in enumerate(tqdm.tqdm(sorted(glob.glob(os.path.join(video_clips_dir, "*.mp4"))))):
            # if v < 15:
            #     continue
            if config.USING_DLIB_OR_FACE_ALIGNMENT == 'dlib':
                extract_face_frames_and_landmarks_from_video(video_file, config.USING_DLIB_OR_FACE_ALIGNMENT, dlib_detector=dlib_detector, dlib_predictor=dlib_predictor,
                                                             save_with_blackened_mouths_and_polygons=True, save_gif=False, save_landmarks_as_txt=True)
            elif config.USING_DLIB_OR_FACE_ALIGNMENT == 'face_alignment':
                extract_face_frames_and_landmarks_from_video(video_file, config.USING_DLIB_OR_FACE_ALIGNMENT, face_alignment_3D_object=face_alignment_3D_object,
                                                             face_alignment_2D_object=face_alignment_2D_object, save_with_blackened_mouths_and_polygons=True,
Esempio n. 12
0
    dfs = [
        DataFrame.from_dict(data_[name]).rename(columns={'value': name})
        for name in data_.keys()
    ]

    # Merge all datasets together
    data = reduce(lambda df1, df2: df1.merge(df2, on='date'), dfs)

    # Use consistent naming convention for columns
    data.columns = map(lambda name: name[0].upper() + name[1:], data.columns)

    # Add key column and return
    data['Key'] = key
    first_columns = ['Date', 'Key']
    return data[first_columns + list(set(data.columns) - set(first_columns))]


def _get_mobility_report(key: str):
    try:
        return get_mobility_report(key)
    except:
        return DataFrame()


# Load all available keys from metadata and output mobility report all keys
keys = read_metadata().Key.unique()
data = list(
    tqdm(ThreadPool(4).imap_unordered(_get_mobility_report, keys),
         total=len(keys)))
concat(data).sort_values(['Date', 'Key']).to_csv(sys.stdout, index=False)