def predict(train_x, train_y, test_x): labels = toLabels(train_y) print('classifying') # pred_probs=np.genfromtxt(open(dir + '/classify.csv','rb'), delimiter=',') # print(pred_probs.shape) labels = Imputer().fit_transform(labels.reshape(-1, 1)) gbc = GradientBoostingClassifier(n_estimators=3000, max_depth=9) gbc.fit(train_x, labels) pred_probs = gbc.predict_proba(test_x)[:, 1] output_classify(pred_probs) ind_train = np.where(labels > 0.55)[0] ind_test = np.where(pred_probs > 0.55)[0] print('gbm regression...') gbm_predict = gbm_predict_func(train_x[ind_train], train_y[ind_train], test_x[ind_test]) gbm = np.zeros(len(test_x)) gbm[ind_test] = gbm_predict np.savetxt("gbr.csv", gbm, delimiter=',') print('svm regression...') svm_predict = svm_predict_func(train_x[ind_train], train_y[ind_train], test_x[ind_test]) svm = np.zeros(len(test_x)) svm[ind_test] = svm_predict np.savetxt("svr.csv", svm, delimiter=',') return 0.6 * gbm + 0.4 * svm
def print_evaluation_metrics(trained_model,trained_model_name,X_test,y_test): print '--------- For Model : ', trained_model_name,' ---------------\n' predicted_values = trained_model.predict(X_test) print metrics.classification_report(y_test,predicted_values) print "Accuracy Score : ",metrics.accuracy_score(y_test,predicted_values) print "---------------------------------------\n" order_product_filename = 'order_products__train.csv' orders_filename = 'orders.csv' order_product_frame = pd.read_csv(order_product_filename) order_frame = pd.read_csv(orders_filename) order_master_frame = pd.merge(order_product_frame,order_frame,how='outer',left_on='order_id',right_on='order_id') columns_to_drop = ['user_id','eval_set','order_id','reordered'] target_class_labels = order_master_frame['reordered'].values order_master_frame.drop(columns_to_drop,axis=1,inplace=True) del order_product_frame del order_frame order_master_frame_values = Imputer().fit_transform(order_master_frame.values) target_class_labels = Imputer().fit_transform(target_class_labels.reshape(-1,1)) target_class_labels = map(lambda x:int(x),target_class_labels) X_train,X_test,y_train,y_test = train_test_split(order_master_frame_values,target_class_labels,test_size=0.2,random_state=42) classifier_list, classifier_name_list = get_ensemble_models() for classifier,classifier_name in zip(classifier_list,classifier_name_list): classifier.fit(X_train,y_train) print_evaluation_metrics(classifier,classifier_name,X_test,y_test)