LIMIT = 9000 DICTIONARY_SIZE = 600 sofia_path = None # sys.argv[1] if len(sys.argv) > 1: VOX_FEATURES = sys.argv[1] == "use_voxforge" else: VOX_FEATURES = False print "Using Vox features", VOX_FEATURES print "LIMIT", LIMIT print "Checking class names" _, REVERSE_CLASSES = load_classes_info() print "Loading train" X_train_transformed = load_train_features("bow_train_features.pcl", extract_mfcc_features, limit=LIMIT) X_train = np.vstack([f for (_, _, f) in X_train_transformed if f is not None]) # X_train = np.reshape(X_train, ( len(X_train_transformed), num_features ) ) print X_train.shape # for filename, lbl, features in X_train_transformed: # if features is None: # print "Train",filename, "is none" # continue # if X_total is not None: # X_total = np.vstack([ X_total, features ]) # else: # X_total = features X_total = X_train print "Ready with train" print "Starting train" test_data = get_all_test_data()
from sklearn.grid_search import GridSearchCV from sknn.mlp import Classifier, Layer from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC __author__ = 'egor' VALIDATE = True np.random.seed(100500) VOX_FEATURES = True LIMIT = 5000 print "Using Vox features", VOX_FEATURES print "LIMIT", LIMIT X_train_transformed = load_train_features('gmm_train_features.pcl', extract_gmm_feature, limit = LIMIT ) print "Loaded train" test_data = get_all_test_data() filename_by_path = {path : filename for (path, filename) in test_data } X_test_transformed = load_test_features('gmm_test_features.pcl', extract_gmm_feature, limit = LIMIT) print "Loaded test" if VOX_FEATURES: print "Loading voxforge features" if LIMIT is None: vox_limit = 4000 else: vox_limit = LIMIT voxforge_features = load_vox_forge_files('/store/egor/voxforge', vox_limit, 'gmm_voxfeatures.pcl', extract_gmm_feature ) else: