# for filename, lbl, features in X_test_transformed: # if features is not None: # X_total = np.vstack([ X_total, features ]) # else: # print "Null test", filename X_total = np.vstack([X_total, X_test]) print X_total.shape print "Ready with test" print "Loading voxforge features" if VOX_FEATURES: if LIMIT is None: vox_limit = 4000 else: vox_limit = LIMIT voxforge_features = load_vox_forge_files( "/store/egor/voxforge", vox_limit, "bow_voxfeatures.pcl", extract_mfcc_features ) vox_features = np.vstack([f for (_, _, f) in voxforge_features if f is not None]) X_total = np.vstack([X_total, vox_features]) else: voxforge_features = [] print "Transformed everyting for dictionary learning", X_total.shape dictionary = learn_dictionary(sofia_path, X_total, "dictionary.pcl", DICTIONARY_SIZE) X = np.array( [ features for (_, _, features) in itertools.chain(X_train_transformed, voxforge_features) if features is not None
X_train_transformed = load_train_features('gmm_train_features.pcl', extract_gmm_feature, limit = LIMIT ) print "Loaded train" test_data = get_all_test_data() filename_by_path = {path : filename for (path, filename) in test_data } X_test_transformed = load_test_features('gmm_test_features.pcl', extract_gmm_feature, limit = LIMIT) print "Loaded test" if VOX_FEATURES: print "Loading voxforge features" if LIMIT is None: vox_limit = 4000 else: vox_limit = LIMIT voxforge_features = load_vox_forge_files('/store/egor/voxforge', vox_limit, 'gmm_voxfeatures.pcl', extract_gmm_feature ) else: voxforge_features = [] #X = np.array([features for (_,_, features) in itertools.chain(X_train_transformed, voxforge_features)]) X = np.array([features for (_,_, features) in itertools.chain(X_train_transformed, voxforge_features) if features is not None ]) y = np.array([lbl for (_,lbl, _) in itertools.chain(X_train_transformed, voxforge_features) if lbl is not None ]) print "X = ", X.shape, "y=", y.shape print "Loaded X", X.shape #shuffle X, y = shuffle_in_unison_inplace(X,y) #X = StandardScaler().fit_transform(X) #binarizer.fit( y ) #y = binarizer.transform( y ) print "Preprocessed data" if VALIDATE: