# for filename, lbl, features in X_test_transformed:
    #    if features is not None:
    #        X_total = np.vstack([ X_total, features ])
    #    else:
    #        print "Null test", filename
    X_total = np.vstack([X_total, X_test])
    print X_total.shape
    print "Ready with test"
    print "Loading voxforge features"
    if VOX_FEATURES:
        if LIMIT is None:
            vox_limit = 4000
        else:
            vox_limit = LIMIT
        voxforge_features = load_vox_forge_files(
            "/store/egor/voxforge", vox_limit, "bow_voxfeatures.pcl", extract_mfcc_features
        )
        vox_features = np.vstack([f for (_, _, f) in voxforge_features if f is not None])
        X_total = np.vstack([X_total, vox_features])
    else:
        voxforge_features = []

    print "Transformed everyting for dictionary learning", X_total.shape

    dictionary = learn_dictionary(sofia_path, X_total, "dictionary.pcl", DICTIONARY_SIZE)

    X = np.array(
        [
            features
            for (_, _, features) in itertools.chain(X_train_transformed, voxforge_features)
            if features is not None
X_train_transformed = load_train_features('gmm_train_features.pcl', extract_gmm_feature, limit = LIMIT )
print "Loaded train"

test_data = get_all_test_data()
filename_by_path = {path : filename  for (path, filename) in test_data }
X_test_transformed = load_test_features('gmm_test_features.pcl', extract_gmm_feature, limit = LIMIT)
print "Loaded test"

if VOX_FEATURES:
    print "Loading voxforge features"
    if LIMIT is None:
        vox_limit = 4000
    else:
        vox_limit = LIMIT
    voxforge_features = load_vox_forge_files('/store/egor/voxforge', vox_limit, 'gmm_voxfeatures.pcl', extract_gmm_feature )
else:
    voxforge_features = []

#X = np.array([features for (_,_, features) in itertools.chain(X_train_transformed, voxforge_features)])
X = np.array([features for (_,_, features) in itertools.chain(X_train_transformed, voxforge_features) if features is not None ])
y = np.array([lbl for (_,lbl, _) in itertools.chain(X_train_transformed, voxforge_features) if lbl is not None ])
print "X = ", X.shape, "y=", y.shape
print "Loaded X", X.shape
#shuffle
X, y = shuffle_in_unison_inplace(X,y)
#X = StandardScaler().fit_transform(X)
#binarizer.fit( y )
#y = binarizer.transform( y )
print "Preprocessed data"
if VALIDATE: