Example #1
0
def test():
    test_df = pd.read_csv("test_processed.csv",index_col="PassengerId")
    Xtest = test_df[feature_names]

    gbdt = common.load_predictor("gbdt.pkl")
    predictions = gbdt.predict(Xtest)
    common.make_submission(Xtest.index,predictions,"submit_gbdt.csv")
Example #2
0
def fit_train_data(allresults,name,needscale):
    traindata = Xtrain_scaled if needscale else Xtrain

    estimator = common.load_predictor("%s.pkl"%name)
    print "[%s] training score: %f"%(name,estimator.score(traindata,ytrain))

    predictions = estimator.predict(traindata)
    allresults.append(predictions)
Example #3
0
def fit_estimator(name, need_scale, y, X, scaledX):
    temp = common.load_predictor("%s.pkl" % name)
    estimator = skclone(temp)

    if need_scale:
        estimator.fit(scaledX, y)
    else:
        estimator.fit(X, y)

    return Estimator(estimator, name, need_scale)
Example #4
0
def train_whole():
    train_df = pd.read_csv("train_processed.csv",index_col="PassengerId")
    Xtrain = train_df[feature_names]
    ytrain = train_df["Survived"]

    # ------------------------------ load
    # this estimator is trained on partial dataset, without using the valiation part
    prev_estimator = common.load_predictor("gbdt-cv.pkl")
    print "cross-validation score: %f"%(prev_estimator.score(Xtrain,ytrain))

    # ------------------------------ train
    # after we get the paramters, we should train another estimator with all data
    gbdt = GradientBoostingClassifier(verbose=1,
                                      loss=prev_estimator.loss,
                                      learning_rate = prev_estimator.learning_rate,
                                      n_estimators = prev_estimator.n_estimators,
                                      max_depth = prev_estimator.max_depth)
    print gbdt
    gbdt.fit(Xtrain,ytrain)
    print "training with all data, get score: ",gbdt.score(Xtrain,ytrain)

    # ------------------------------ save
    common.dump_predictor("gbdt.pkl",gbdt)