('feature_selection', SelectFromModel(LinearSVC(penalty="l2"))),
            ('classification', KNeighborsClassifier(n_neighbors=k, metric='manhattan'))
        ])
    for i in range(10):
        print "round %d %d" % (k, i)
        train_data = data[train[i]]
        train_labels = labels[[train[i]]]
        cv_data = data[test[i]]
        cv_labels = labels[test[i]]

        #######TRAIN#####
        # convert from panda frame to numpy matrix
        nn.fit(train_data, train_labels.T[0].tolist())
        # Scoring
        cv_predicted = nn.predict(cv_data)
        cv_mis_rate = s.misclassification_rate(np.array(cv_predicted, dtype=int), cv_labels.T[0])
        cv_error.append(cv_mis_rate)

    #Test set
    test_predicted = nn.predict(test_data).tolist()
    test_mis_rate = s.misclassification_rate(np.array(test_predicted, dtype=int), test_labels)
    test_error.append(test_mis_rate)
    #confusion matrix
    if k == 1 :
        print confusion_matrix(y_true= test_labels, y_pred=test_predicted, labels = np.unique(np.concatenate((test_labels, test_predicted), axis=1)).tolist())

    #F1 score test set
    f1score.append([f1_score(test_labels, test_predicted, average='micro'), k , 'Micro'])
    f1score.append([f1_score(test_labels, test_predicted, average='macro'), k , 'Macro'])

    total_cv_error.append([np.mean(np.abs(cv_error)), k, 'Cross Validation Set'])
    for i in range(10):
        print "round %d %d" % (k, i)
        train_data = ndata.ix[train[i]]
        test_data = ndata.ix[test[i]]

        #######TRAIN#####
        nn = NearestNeighborsClassifier(n_neighbors=k)

        # convert from panda frame to numpy matrix
        nn.fit(train_data[features].as_matrix(), train_data[target_feature].as_matrix())

        train_predicted = nn.predict(train_data[features].as_matrix())
        test_predicted = nn.predict(test_data[features].as_matrix())

        # Scoring
        test_mis_rate = s.misclassification_rate(np.array(test_predicted), test_data[target_feature].values.T[0])
        train_mis_rate = s.misclassification_rate(np.array(train_predicted), train_data[target_feature].values.T[0])
        test_error.append(test_mis_rate)
        train_error.append(train_mis_rate)

    total_train_error.append([np.mean(np.abs(train_error)), k, 1])
    total_test_error.append([np.mean(np.abs(test_error)), k, 0])

np_stage = np.vstack((np.array(total_train_error), np.array(total_test_error)))

plot_data = DataFrame()
plot_data['x'] = np_stage[:, 1].astype(int)
plot_data['y'] = np_stage[:, 0]

plot_data['Train=1/Test=0'] = np_stage[:, 2].astype(int)
# plot_data = pd.read_csv('plot.csv')