for index, row in data.iterrows():#iterate over csv file
        if index==limit:
            break
        img=cv2.imread(root_path + '/images/' + str(row['image_id']) + '.jpg')
        histogram=np.zeros((3, 256))
        for i in range(3):#calc hist for each channel
            histogram[i] = cv2.calcHist([img],[i],None,[256],[0,255]).ravel()
        X[index]=histogram.ravel()#to 1d array
    return X
    
X = extract_features(train_data, 1500)
y = train_data['image_label'].values[:X.shape[0]].ravel()
    
grid = {
    'knn__n_neighbors': [1, 10, 20, 30, 40, 60, 75, 100, 120, 160, 200],
    'knn__metric': ['euclidean', 'manhattan', 'chebyshev'],
    'knn__weights': ['uniform', 'distance'],
    'preprocess__norm': ['l1', 'l2', 'max']
}
pipeline = Pipeline(steps=[
    ('preprocess', preprocessing.Normalizer()),
    ('knn', neighbors.KNeighborsClassifier())
])

model = EvolutionaryAlgorithmSearchCV(pipeline, grid, scoring='roc_auc', verbose=True, n_jobs=4, population_size=10)
model.fit(X, y)

preds = model.predict_proba(extract_features(test_data))[:, 1]
test_data = test_data.drop('image_url', 1)
test_data['image_label'] = preds
test_data.to_csv(root_path + '/res.csv', index=False)
            try:
                clf = EvolutionaryAlgorithmSearchCV(estimator=clf(),
                                                    params=params,
                                                    **generic_args).fit(
                                                        X_train, y_train)
            except:
                print 'failed to do: {}'.format(name)
                continue
            score = clf.score(X_test, y_test)

            # Plot the decision boundary. For that, we will assign a color to each
            # point in the mesh [x_min, x_max]x[y_min, y_max].
            if hasattr(clf, "decision_function"):
                Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
            else:
                Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]

            # Put the result into a color plot
            Z = Z.reshape(xx.shape)
            plot_data[ds_cnt][name] = {
                'Z': Z,
                'v_score': score,
                't_score': clf.best_score_,
                'all_logbooks': clf.all_logbooks_[0]
            }
            ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)

            # Plot also the training points
            ax.scatter(X_train[:, 0],
                       X_train[:, 1],
                       c=y_train,