LIMIT = 9000
    DICTIONARY_SIZE = 600
    sofia_path = None  # sys.argv[1]
    if len(sys.argv) > 1:
        VOX_FEATURES = sys.argv[1] == "use_voxforge"
    else:
        VOX_FEATURES = False

    print "Using Vox features", VOX_FEATURES
    print "LIMIT", LIMIT

    print "Checking class names"
    _, REVERSE_CLASSES = load_classes_info()

    print "Loading train"
    X_train_transformed = load_train_features("bow_train_features.pcl", extract_mfcc_features, limit=LIMIT)
    X_train = np.vstack([f for (_, _, f) in X_train_transformed if f is not None])
    # X_train = np.reshape(X_train, ( len(X_train_transformed), num_features ) )
    print X_train.shape
    # for filename, lbl, features in X_train_transformed:
    #    if features is None:
    #        print "Train",filename, "is none"
    #        continue
    #    if X_total is not None:
    #        X_total =  np.vstack([ X_total, features ])
    #    else:
    #        X_total = features
    X_total = X_train
    print "Ready with train"
    print "Starting train"
    test_data = get_all_test_data()
from sklearn.grid_search import GridSearchCV
from sknn.mlp import Classifier, Layer
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
__author__ = 'egor'

VALIDATE = True
np.random.seed(100500)
VOX_FEATURES = True
LIMIT = 5000

print "Using Vox features", VOX_FEATURES
print "LIMIT", LIMIT

X_train_transformed = load_train_features('gmm_train_features.pcl', extract_gmm_feature, limit = LIMIT )
print "Loaded train"

test_data = get_all_test_data()
filename_by_path = {path : filename  for (path, filename) in test_data }
X_test_transformed = load_test_features('gmm_test_features.pcl', extract_gmm_feature, limit = LIMIT)
print "Loaded test"

if VOX_FEATURES:
    print "Loading voxforge features"
    if LIMIT is None:
        vox_limit = 4000
    else:
        vox_limit = LIMIT
    voxforge_features = load_vox_forge_files('/store/egor/voxforge', vox_limit, 'gmm_voxfeatures.pcl', extract_gmm_feature )
else: