def knn(train_data,train_label,val_data,val_label,test_data,name = "knn_submission.csv"): print "Start training KNN Classifier..." knnClf = KNeighborsClassifier(n_neighbors=20) knnClf.fit(train_data,train_label) #evaluate on validation set val_pred_label = knnClf.predict_proba(val_data) logloss = preprocess.evaluation(val_label,val_pred_label) print "logloss of validation set:",logloss print "Start classify test set..." test_label = knnClf.predict_proba(test_data) preprocess.saveResult(test_label,filename = name)
def et(train_data,train_label,val_data,val_label,test_data,name="extratrees_submission.csv"): print "start training ExtraTrees..." etClf = ExtraTreesClassifier(n_estimators=10) etClf.fit(train_data,train_label) #evaluate on validation set val_pred_label = etClf.predict_proba(val_data) logloss = preprocess.evaluation(val_label,val_pred_label) print "logloss of validation set:",logloss print "Start classify test set..." test_label = etClf.predict_proba(test_data) preprocess.saveResult(test_label,filename = name)
def rf(train_data,train_label,val_data,val_label,test_data,name="RandomForest_submission.csv"): print "Start training Random forest..." rfClf = RandomForestClassifier(n_estimators=400,n_jobs=-1) rfClf.fit(train_data,train_label) #evaluate on validation set val_pred_label = rfClf.predict_proba(val_data) logloss = preprocess.evaluation(val_label,val_pred_label) print "logloss of validation set:",logloss print "Start classify test set..." test_label = rfClf.predict_proba(test_data) preprocess.saveResult(test_label,filename = name)
def ab(train_data,train_label,val_data,val_label,test_data,name="adaboost_submission.csv"): print "Start training AdaBoost..." abClf = AdaBoostClassifier() abClf.fit(train_data,train_label) #evaluate on validation set val_pred_label = abClf.predict_proba(val_data) logloss = preprocess.evaluation(val_label,val_pred_label) print "logloss of validation set:",logloss print "Start classify test set..." test_label = abClf.predict_proba(test_data) preprocess.saveResult(test_label,filename = name)
def gb(train_data,train_label,val_data,val_label,test_data,name="GradientBoosting_submission.csv"): print "start training GradientBoosting..." gbClf = GradientBoostingClassifier() # params: by default gbClf.fit(train_data,train_label) #evaluate on validation set val_pred_label = gbClf.predict_proba(val_data) logloss = preprocess.evaluation(val_label,val_pred_label) print "logloss of validation set:",logloss print "Start classify test set..." test_label = gbClf.predict_proba(test_data) preprocess.saveResult(test_label,filename = name)
def knn(train_data, train_label, val_data, val_label, test_data, name="knn_submission.csv"): print "Start training KNN Classifier..." knnClf = KNeighborsClassifier(n_neighbors=20) knnClf.fit(train_data, train_label) #evaluate on validation set val_pred_label = knnClf.predict_proba(val_data) logloss = preprocess.evaluation(val_label, val_pred_label) print "logloss of validation set:", logloss print "Start classify test set..." test_label = knnClf.predict_proba(test_data) preprocess.saveResult(test_label, filename=name)
def et(train_data, train_label, val_data, val_label, test_data, name="extratrees_submission.csv"): print "start training ExtraTrees..." etClf = ExtraTreesClassifier(n_estimators=10) etClf.fit(train_data, train_label) #evaluate on validation set val_pred_label = etClf.predict_proba(val_data) logloss = preprocess.evaluation(val_label, val_pred_label) print "logloss of validation set:", logloss print "Start classify test set..." test_label = etClf.predict_proba(test_data) preprocess.saveResult(test_label, filename=name)
def ab(train_data, train_label, val_data, val_label, test_data, name="adaboost_submission.csv"): print "Start training AdaBoost..." abClf = AdaBoostClassifier() abClf.fit(train_data, train_label) #evaluate on validation set val_pred_label = abClf.predict_proba(val_data) logloss = preprocess.evaluation(val_label, val_pred_label) print "logloss of validation set:", logloss print "Start classify test set..." test_label = abClf.predict_proba(test_data) preprocess.saveResult(test_label, filename=name)
def gb(train_data, train_label, val_data, val_label, test_data, name="GradientBoosting_submission.csv"): print "start training GradientBoosting..." gbClf = GradientBoostingClassifier() # params: by default gbClf.fit(train_data, train_label) #evaluate on validation set val_pred_label = gbClf.predict_proba(val_data) logloss = preprocess.evaluation(val_label, val_pred_label) print "logloss of validation set:", logloss print "Start classify test set..." test_label = gbClf.predict_proba(test_data) preprocess.saveResult(test_label, filename=name)
def rf(train_data, train_label, val_data, val_label, test_data, name="RandomForest_submission.csv"): print("Start training Random forest...") rfClf = RandomForestClassifier(n_jobs=4, n_estimators=1000, max_features=20, min_samples_split=3, bootstrap=False, verbose=3, random_state=23) #建立分类型决策树 rfClf.fit(train_data, train_label) #训练模型 val_pred_label = rfClf.predict_proba(val_data) #判定结果 logloss = preprocess.evaluation(val_label, val_pred_label) #根据官方评估公式计算 print("logloss of validation set:", logloss) print("Start classify test set...") test_label = rfClf.predict_proba(test_data) preprocess.saveResult(test_label, filename=name)