def load_vox_forge_files(vox_dir, max_files, vox_file = 'voxforge.pcl', feature_func = extract_mfcc_features):
    feature_storage = FeatureStorage( name = vox_file, base_dir = FEATURE_STORAGE )
    if feature_storage.exists():
        print "Loading voxforge from pickle"
        return feature_storage.load()
    else:
        pool = Pool()
        
        files = get_voxforge_total_files(vox_dir, max_files)
        print "Started processing voxforge data {}".format( len(files) )
        voxforge_features = pool.map( feature_func, files )

        pool.close()
        pool.terminate()
        print "Dumping voxforge features"
        feature_storage.save( voxforge_features )
        return voxforge_features
def load_train_features( train_filename, feature_func, limit = None ):

    feature_storage = FeatureStorage( name = train_filename, base_dir = FEATURE_STORAGE )
    if not feature_storage.exists(  ):

        all_train_data = return_all_train_files()
        if limit is not None:
            all_train_data = all_train_data[:limit]
        print "Started processing train"
        pool = Pool()
        X_train_transformed = pool.map( feature_func, all_train_data )
        pool.close()
        pool.terminate()
        print "Dumping train features"
        feature_storage.save( X_train_transformed )
    else:
        print "Loading train from cache"
        X_train_transformed = feature_storage.load()

    return X_train_transformed
def load_test_features(test_filename, feature_func, limit = None):
    pool = Pool()
    test_data = get_all_test_data()
    if limit:
        test_data = test_data[:limit]
    feature_storage = FeatureStorage( name = test_filename, base_dir = FEATURE_STORAGE )
    if not  feature_storage.exists():
        print "Loading test from scratch"
        for_features = [ (path, None)  for (path, filename ) in test_data  ]
        X_test_transformed = pool.map( feature_func, for_features )
        print "Dumping test features"


        feature_storage.save( X_test_transformed )
        print "Finished dumping"
    else:
       print "Loading test from cache"
       X_test_transformed =  feature_storage.load()
    pool.close()
    pool.terminate()
    return X_test_transformed