def predict(train_x, train_y, test_x):
    labels = toLabels(train_y)
    print('classifying')
    #   pred_probs=np.genfromtxt(open(dir + '/classify.csv','rb'), delimiter=',')
    #   print(pred_probs.shape)
    labels = Imputer().fit_transform(labels.reshape(-1, 1))
    gbc = GradientBoostingClassifier(n_estimators=3000, max_depth=9)
    gbc.fit(train_x, labels)
    pred_probs = gbc.predict_proba(test_x)[:, 1]
    output_classify(pred_probs)
    ind_train = np.where(labels > 0.55)[0]
    ind_test = np.where(pred_probs > 0.55)[0]
    print('gbm regression...')
    gbm_predict = gbm_predict_func(train_x[ind_train], train_y[ind_train],
                                   test_x[ind_test])
    gbm = np.zeros(len(test_x))
    gbm[ind_test] = gbm_predict
    np.savetxt("gbr.csv", gbm, delimiter=',')
    print('svm regression...')
    svm_predict = svm_predict_func(train_x[ind_train], train_y[ind_train],
                                   test_x[ind_test])
    svm = np.zeros(len(test_x))
    svm[ind_test] = svm_predict
    np.savetxt("svr.csv", svm, delimiter=',')
    return 0.6 * gbm + 0.4 * svm
Exemple #2
0

def print_evaluation_metrics(trained_model,trained_model_name,X_test,y_test):
    print '--------- For Model : ', trained_model_name,' ---------------\n'
    predicted_values = trained_model.predict(X_test)
    print metrics.classification_report(y_test,predicted_values)
    print "Accuracy Score : ",metrics.accuracy_score(y_test,predicted_values)
    print "---------------------------------------\n"


order_product_filename = 'order_products__train.csv'
orders_filename = 'orders.csv'

order_product_frame = pd.read_csv(order_product_filename)
order_frame = pd.read_csv(orders_filename)

order_master_frame = pd.merge(order_product_frame,order_frame,how='outer',left_on='order_id',right_on='order_id')
columns_to_drop = ['user_id','eval_set','order_id','reordered']
target_class_labels = order_master_frame['reordered'].values
order_master_frame.drop(columns_to_drop,axis=1,inplace=True)
del order_product_frame
del order_frame
order_master_frame_values = Imputer().fit_transform(order_master_frame.values)
target_class_labels = Imputer().fit_transform(target_class_labels.reshape(-1,1))
target_class_labels = map(lambda x:int(x),target_class_labels)
X_train,X_test,y_train,y_test = train_test_split(order_master_frame_values,target_class_labels,test_size=0.2,random_state=42)
classifier_list, classifier_name_list = get_ensemble_models()
for classifier,classifier_name in zip(classifier_list,classifier_name_list):
    classifier.fit(X_train,y_train)
    print_evaluation_metrics(classifier,classifier_name,X_test,y_test)