def load_vox_forge_files(vox_dir, max_files, vox_file = 'voxforge.pcl', feature_func = extract_mfcc_features): feature_storage = FeatureStorage( name = vox_file, base_dir = FEATURE_STORAGE ) if feature_storage.exists(): print "Loading voxforge from pickle" return feature_storage.load() else: pool = Pool() files = get_voxforge_total_files(vox_dir, max_files) print "Started processing voxforge data {}".format( len(files) ) voxforge_features = pool.map( feature_func, files ) pool.close() pool.terminate() print "Dumping voxforge features" feature_storage.save( voxforge_features ) return voxforge_features
def load_train_features( train_filename, feature_func, limit = None ): feature_storage = FeatureStorage( name = train_filename, base_dir = FEATURE_STORAGE ) if not feature_storage.exists( ): all_train_data = return_all_train_files() if limit is not None: all_train_data = all_train_data[:limit] print "Started processing train" pool = Pool() X_train_transformed = pool.map( feature_func, all_train_data ) pool.close() pool.terminate() print "Dumping train features" feature_storage.save( X_train_transformed ) else: print "Loading train from cache" X_train_transformed = feature_storage.load() return X_train_transformed
def load_test_features(test_filename, feature_func, limit = None): pool = Pool() test_data = get_all_test_data() if limit: test_data = test_data[:limit] feature_storage = FeatureStorage( name = test_filename, base_dir = FEATURE_STORAGE ) if not feature_storage.exists(): print "Loading test from scratch" for_features = [ (path, None) for (path, filename ) in test_data ] X_test_transformed = pool.map( feature_func, for_features ) print "Dumping test features" feature_storage.save( X_test_transformed ) print "Finished dumping" else: print "Loading test from cache" X_test_transformed = feature_storage.load() pool.close() pool.terminate() return X_test_transformed