Esempio n. 1
0
    with np.warnings.catch_warnings():
        np.warnings.filterwarnings('ignore')
        for path, name in SAMPLED_WAV_FILE:
            feat = recipe.transform(path)
            assert feat['bnf'].shape[0] == feat['mspec'].shape[0]
            V.plot_multiple_features(feat, title=feat['name'])
        V.plot_save(os.path.join(PATH_EXP, 'features_%s.pdf' % args.recipe))
        exit()
# ===========================================================================
# Prepare the processor
# ===========================================================================
with np.warnings.catch_warnings():
    np.warnings.filterwarnings('ignore')
    jobs = list(WAV_FILES.keys())
    processor = pp.FeatureProcessor(
        jobs=jobs,
        path=os.path.join(PATH_ACOUSTIC_FEAT, args.recipe),
        extractor=recipe,
        n_cache=1200,
        ncpu=min(18,
                 cpu_count() - 2),
        override=True,
        identifier='name',
        log_path=os.path.join(PATH_EXP, 'processor_%s.log' % args.recipe),
        stop_on_failure=False)
    processor.run()
    pp.validate_features(processor,
                         nb_samples=12,
                         path=os.path.join(PATH_EXP, args.recipe),
                         override=True)
Esempio n. 2
0
            pp.base.DeleteFeatures(input_name=('stft', 'spec', 'sad_threshold')
                                   ),
            pp.speech.AcousticNorm(mean_var_norm=True,
                                   windowed_mean_var_norm=True,
                                   input_name=('mspec', 'mfcc')),
            # ====== post processing ====== #
            pp.base.AsType(dtype='float16'),
        ],
        debug=False)
    with np.warnings.catch_warnings():
        np.warnings.filterwarnings('ignore')
        processor = pp.FeatureProcessor(jobs=all_files,
                                        path=PATH_ACOUSTIC_FEATURES,
                                        extractor=extractors,
                                        n_cache=120,
                                        ncpu=None,
                                        override=True,
                                        identifier='name',
                                        log_path=os.path.join(
                                            EXP_DIR, 'processor.log'),
                                        stop_on_failure=True)
        processor.run()
        # pp.validate_features(processor,
        #                      nb_samples=12,
        #                      path=os.path.join(EXP_DIR, 'feature_validation'),
        #                      override=True)
ds = F.Dataset(PATH_ACOUSTIC_FEATURES, read_only=True)
print(ds)
indices = list(ds['indices_%s' % args.feat].items())
print("Utterances length:")
print("   ",
      describe([end - start for name, (start, end) in indices], shorten=True))
# ===========================================================================
# ====== basic path ====== #
output_dataset_path = os.path.join(PATH_ACOUSTIC_FEATURES, FEATURE_RECIPE)

processor_log_path = os.path.join(EXP_DIR, 'processor_%s.log' % FEATURE_RECIPE)
if os.path.exists(processor_log_path):
    os.remove(processor_log_path)
print("Log path:", ctext(processor_log_path, 'cyan'))

ds_validation_path = os.path.join(EXP_DIR, 'validate_%s.pdf' % FEATURE_RECIPE)
if os.path.exists(ds_validation_path):
    os.remove(ds_validation_path)
print("Validation path:", ctext(ds_validation_path, 'cyan'))

# ====== running the processing ====== #
with catch_warnings_ignore(Warning):
    processor = pp.FeatureProcessor(jobs=ALL_FILES,
                                    path=output_dataset_path,
                                    extractor=recipe,
                                    n_cache=320,
                                    ncpu=NCPU,
                                    override=True,
                                    identifier='name',
                                    log_path=processor_log_path,
                                    stop_on_failure=False)
    processor.run()
# ===========================================================================
# Make some visualization
# ===========================================================================
validate_features_dataset(output_dataset_path, ds_validation_path)
Esempio n. 4
0
if not os.path.exists(outpath) or args.ds:
    extractors = pp.make_pipeline(steps=[
        pp.speech.AudioReader(sr=None,
                              sr_new=8000,
                              best_resample=True,
                              remove_dc=True),
        pp.base.Converter(
            converter=lambda x: os.path.basename(x).split('.')[0],
            input_name='path',
            output_name='name'),
        pp.base.AsType(dtype='float16', input_name='raw')
    ],
                                  debug=False)
    processor = pp.FeatureProcessor(jobs=jobs,
                                    path=outpath,
                                    extractor=extractors,
                                    n_cache=0.08,
                                    ncpu=None,
                                    override=True)
    processor.run()
    pp.validate_features(processor,
                         path='/tmp/tidigits',
                         nb_samples=12,
                         override=True)
    with open(os.path.join(outpath, 'README'), 'w') as f:
        f.write(README)
# ====== check the preprocessed dataset ====== #
ds = F.Dataset(outpath, read_only=True)
print(ds)
print(ctext(ds.md5, 'yellow'))
ds.close()
# ====== compress ====== #
Esempio n. 5
0
        continue
    # extract acoustic feature from scratch
    feat_dir = os.path.join(PATH_ACOUSTIC_FEATURES,
                            '%s_%s' % (dsname, EXTRACTOR_NAME))
    log_path = get_logpath(name='%s_%s.log' % (dsname, EXTRACTOR_NAME),
                           increasing=True,
                           odin_base=False,
                           root=EXP_DIR)
    # check if need running the feature extraction
    if _check_running_feature_extraction(feat_dir, n_files=len(file_list)):
        with np.warnings.catch_warnings():
            np.warnings.filterwarnings('ignore')
            processor = pp.FeatureProcessor(jobs=file_list,
                                            path=feat_dir,
                                            extractor=extractor,
                                            ncpu=NCPU,
                                            override=True,
                                            identifier='name',
                                            log_path=log_path,
                                            stop_on_failure=False)
            processor.run()
    # store the extracted dataset
    ds = F.Dataset(path=feat_dir, read_only=True)
    assert FEATURE_NAME in ds, \
        "Cannot find feature with name: %s, from: %s" % (FEATURE_NAME, ds.path)
    acoustic_features[dsname] = [
        ds[FEATURE_NAME],
        dict(ds['indices_%s' % FEATURE_NAME].items()),
        dict(ds['spkid'].items()),
        dict(ds['path'].items()),
    ]
# ====== print log ====== #
Esempio n. 6
0
                exit()
            else:
                V.plot_multiple_features(tmp, title=name)
        V.plot_save(os.path.join(PATH_EXP, 'feature_debug.pdf'))
        exit()
# ===========================================================================
# Processor
# ===========================================================================
with np.warnings.catch_warnings():
    np.warnings.filterwarnings('ignore')
    processor = pp.FeatureProcessor(
        jobs=all_files,
        path=PATH_ACOUSTIC,
        extractor=extractors,
        n_cache=0.12,
        ncpu=min(18,
                 cpu_count() - 2) if args.ncpu <= 0 else int(args.ncpu),
        override=True,
        identifier='name',
        log_path=os.path.join(PATH_EXP, 'processor.log'),
        stop_on_failure=True  # small dataset, enable stop on failure
    )
    with UnitTimer():
        processor.run()
    n_error = len(processor.error_log)
    print(processor)
# ====== copy readme and check the preprocessed dataset ====== #
if n_error == 0:
    readme_path = os.path.join(
        audio.path, [i for i in os.listdir(audio.path) if 'README' in i][0])
    shutil.copy(readme_path, os.path.join(PATH_ACOUSTIC, 'README.md'))