Example #1
0
def rf(train_data,train_label,val_data,val_label,test_data,name="RandomForest_submission.csv"):
	print "Start training Random forest..."
	rfClf = RandomForestClassifier(n_estimators=400,n_jobs=-1)
	rfClf.fit(train_data,train_label)
	#evaluate on validation set
	val_pred_label = rfClf.predict_proba(val_data)
	logloss = preprocess.evaluation(val_label,val_pred_label)
	print "logloss of validation set:",logloss

	print "Start classify test set..."
	test_label = rfClf.predict_proba(test_data)
	preprocess.saveResult(test_label,filename = name)
Example #2
0
def knn(train_data,train_label,val_data,val_label,test_data,name = "knn_submission.csv"):
	print "Start training KNN Classifier..."
	knnClf = KNeighborsClassifier(n_neighbors=20)
	knnClf.fit(train_data,train_label)
	#evaluate on validation set
	val_pred_label = knnClf.predict_proba(val_data)
	logloss = preprocess.evaluation(val_label,val_pred_label)
	print "logloss of validation set:",logloss

	print "Start classify test set..."
	test_label = knnClf.predict_proba(test_data)
	preprocess.saveResult(test_label,filename = name)
Example #3
0
def ab(train_data,train_label,val_data,val_label,test_data,name="adaboost_submission.csv"):
	print "Start training AdaBoost..."
	abClf = AdaBoostClassifier()
	abClf.fit(train_data,train_label)
	#evaluate on validation set
	val_pred_label = abClf.predict_proba(val_data)
	logloss = preprocess.evaluation(val_label,val_pred_label)
	print "logloss of validation set:",logloss

	print "Start classify test set..."
	test_label = abClf.predict_proba(test_data)
	preprocess.saveResult(test_label,filename = name)
Example #4
0
def et(train_data,train_label,val_data,val_label,test_data,name="extratrees_submission.csv"):
	print "start training ExtraTrees..."
	etClf = ExtraTreesClassifier(n_estimators=10)
	etClf.fit(train_data,train_label)
	#evaluate on validation set
	val_pred_label = etClf.predict_proba(val_data)
	logloss = preprocess.evaluation(val_label,val_pred_label)
	print "logloss of validation set:",logloss

	print "Start classify test set..."
	test_label = etClf.predict_proba(test_data)
	preprocess.saveResult(test_label,filename = name)
Example #5
0
def gb(train_data,train_label,val_data,val_label,test_data,name="GradientBoosting_submission.csv"):
	print "start training GradientBoosting..."
	gbClf = GradientBoostingClassifier()       # params: by default
	gbClf.fit(train_data,train_label)
	#evaluate on validation set
	val_pred_label = gbClf.predict_proba(val_data)
	logloss = preprocess.evaluation(val_label,val_pred_label)
	print "logloss of validation set:",logloss

	print "Start classify test set..."
	test_label = gbClf.predict_proba(test_data)
	preprocess.saveResult(test_label,filename = name)
Example #6
0
def rf(train_data,train_label,val_data,val_label,test_data,name="RandomForest_submission.csv"):
	print "Start training Random forest..."
	rfClf = RandomForestClassifier(n_estimators=400,n_jobs=-1)
	rfClf.fit(train_data,train_label)
	#evaluate on validation set
	val_pred_label = rfClf.predict_proba(val_data)
	logloss = preprocess.evaluation(val_label,val_pred_label)
	print "logloss of validation set:",logloss

	print "Start classify test set..."
	test_label = rfClf.predict_proba(test_data)
	preprocess.saveResult(test_label,filename = name)
Example #7
0
def knn(train_data,
        train_label,
        val_data,
        val_label,
        test_data,
        name="knn_submission.csv"):
    print "Start training KNN Classifier..."
    knnClf = KNeighborsClassifier(n_neighbors=20)
    knnClf.fit(train_data, train_label)
    #evaluate on validation set
    val_pred_label = knnClf.predict_proba(val_data)
    logloss = preprocess.evaluation(val_label, val_pred_label)
    print "logloss of validation set:", logloss

    print "Start classify test set..."
    test_label = knnClf.predict_proba(test_data)
    preprocess.saveResult(test_label, filename=name)
Example #8
0
def et(train_data,
       train_label,
       val_data,
       val_label,
       test_data,
       name="extratrees_submission.csv"):
    print "start training ExtraTrees..."
    etClf = ExtraTreesClassifier(n_estimators=10)
    etClf.fit(train_data, train_label)
    #evaluate on validation set
    val_pred_label = etClf.predict_proba(val_data)
    logloss = preprocess.evaluation(val_label, val_pred_label)
    print "logloss of validation set:", logloss

    print "Start classify test set..."
    test_label = etClf.predict_proba(test_data)
    preprocess.saveResult(test_label, filename=name)
Example #9
0
def ab(train_data,
       train_label,
       val_data,
       val_label,
       test_data,
       name="adaboost_submission.csv"):
    print "Start training AdaBoost..."
    abClf = AdaBoostClassifier()
    abClf.fit(train_data, train_label)
    #evaluate on validation set
    val_pred_label = abClf.predict_proba(val_data)
    logloss = preprocess.evaluation(val_label, val_pred_label)
    print "logloss of validation set:", logloss

    print "Start classify test set..."
    test_label = abClf.predict_proba(test_data)
    preprocess.saveResult(test_label, filename=name)
def gb(train_data,
       train_label,
       val_data,
       val_label,
       test_data,
       name="GradientBoosting_submission.csv"):
    print "start training GradientBoosting..."
    gbClf = GradientBoostingClassifier()  # params: by default
    gbClf.fit(train_data, train_label)
    #evaluate on validation set
    val_pred_label = gbClf.predict_proba(val_data)
    logloss = preprocess.evaluation(val_label, val_pred_label)
    print "logloss of validation set:", logloss

    print "Start classify test set..."
    test_label = gbClf.predict_proba(test_data)
    preprocess.saveResult(test_label, filename=name)
Example #11
0
def rf(train_data,train_label,val_data,val_label,test_data,name="kdd-randomforest.csv"):
	print "Start training Random forest..."
	rfClf = RandomForestClassifier(n_estimators=300,n_jobs=-1)
	rfClf.fit(train_data,train_label)
	
	
	#evaluate on validation set
	print np.mean(cross_validation.cross_val_score(rfClf, val_data, val_label, cv=10, scoring='roc_auc'))
	
	'''#evaluate on validation set
	val_pred_label = rfClf.predict_proba(val_data)
	logloss = preprocess.evaluation(val_label,val_pred_label)
	print "logloss of validation set:",logloss
	'''

	print "Start classify test set..."
	test_label = rfClf.predict_proba(test_data)
	preprocess.saveResult(test_label,filename = name)
def rf(train_data,
       train_label,
       val_data,
       val_label,
       test_data,
       name="RandomForest_submission.csv"):
    print("Start training Random forest...")
    rfClf = RandomForestClassifier(n_jobs=4,
                                   n_estimators=1000,
                                   max_features=20,
                                   min_samples_split=3,
                                   bootstrap=False,
                                   verbose=3,
                                   random_state=23)  #建立分类型决策树
    rfClf.fit(train_data, train_label)  #训练模型
    val_pred_label = rfClf.predict_proba(val_data)  #判定结果
    logloss = preprocess.evaluation(val_label, val_pred_label)  #根据官方评估公式计算
    print("logloss of validation set:", logloss)
    print("Start classify test set...")
    test_label = rfClf.predict_proba(test_data)
    preprocess.saveResult(test_label, filename=name)