for index, row in data.iterrows():#iterate over csv file if index==limit: break img=cv2.imread(root_path + '/images/' + str(row['image_id']) + '.jpg') histogram=np.zeros((3, 256)) for i in range(3):#calc hist for each channel histogram[i] = cv2.calcHist([img],[i],None,[256],[0,255]).ravel() X[index]=histogram.ravel()#to 1d array return X X = extract_features(train_data, 1500) y = train_data['image_label'].values[:X.shape[0]].ravel() grid = { 'knn__n_neighbors': [1, 10, 20, 30, 40, 60, 75, 100, 120, 160, 200], 'knn__metric': ['euclidean', 'manhattan', 'chebyshev'], 'knn__weights': ['uniform', 'distance'], 'preprocess__norm': ['l1', 'l2', 'max'] } pipeline = Pipeline(steps=[ ('preprocess', preprocessing.Normalizer()), ('knn', neighbors.KNeighborsClassifier()) ]) model = EvolutionaryAlgorithmSearchCV(pipeline, grid, scoring='roc_auc', verbose=True, n_jobs=4, population_size=10) model.fit(X, y) preds = model.predict_proba(extract_features(test_data))[:, 1] test_data = test_data.drop('image_url', 1) test_data['image_label'] = preds test_data.to_csv(root_path + '/res.csv', index=False)
try: clf = EvolutionaryAlgorithmSearchCV(estimator=clf(), params=params, **generic_args).fit( X_train, y_train) except: print 'failed to do: {}'.format(name) continue score = clf.score(X_test, y_test) # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, x_max]x[y_min, y_max]. if hasattr(clf, "decision_function"): Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) else: Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] # Put the result into a color plot Z = Z.reshape(xx.shape) plot_data[ds_cnt][name] = { 'Z': Z, 'v_score': score, 't_score': clf.best_score_, 'all_logbooks': clf.all_logbooks_[0] } ax.contourf(xx, yy, Z, cmap=cm, alpha=.8) # Plot also the training points ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train,