def test(): test_df = pd.read_csv("test_processed.csv",index_col="PassengerId") Xtest = test_df[feature_names] gbdt = common.load_predictor("gbdt.pkl") predictions = gbdt.predict(Xtest) common.make_submission(Xtest.index,predictions,"submit_gbdt.csv")
def fit_train_data(allresults,name,needscale): traindata = Xtrain_scaled if needscale else Xtrain estimator = common.load_predictor("%s.pkl"%name) print "[%s] training score: %f"%(name,estimator.score(traindata,ytrain)) predictions = estimator.predict(traindata) allresults.append(predictions)
def fit_train_data(allresults, name, needscale): traindata = Xtrain_scaled if needscale else Xtrain estimator = common.load_predictor("%s.pkl" % name) print "[%s] training score: %f" % (name, estimator.score( traindata, ytrain)) predictions = estimator.predict(traindata) allresults.append(predictions)
def fit_estimator(name, need_scale, y, X, scaledX): temp = common.load_predictor("%s.pkl" % name) estimator = skclone(temp) if need_scale: estimator.fit(scaledX, y) else: estimator.fit(X, y) return Estimator(estimator, name, need_scale)
def fit_estimator(name,need_scale,y,X,scaledX): temp = common.load_predictor("%s.pkl"%name) estimator = skclone(temp) if need_scale: estimator.fit(scaledX,y) else: estimator.fit(X,y) return Estimator(estimator,name,need_scale)
def train_whole(): train_df = pd.read_csv("train_processed.csv",index_col="PassengerId") Xtrain = train_df[feature_names] ytrain = train_df["Survived"] # ------------------------------ load # this estimator is trained on partial dataset, without using the valiation part prev_estimator = common.load_predictor("gbdt-cv.pkl") print "cross-validation score: %f"%(prev_estimator.score(Xtrain,ytrain)) # ------------------------------ train # after we get the paramters, we should train another estimator with all data gbdt = GradientBoostingClassifier(verbose=1, loss=prev_estimator.loss, learning_rate = prev_estimator.learning_rate, n_estimators = prev_estimator.n_estimators, max_depth = prev_estimator.max_depth) print gbdt gbdt.fit(Xtrain,ytrain) print "training with all data, get score: ",gbdt.score(Xtrain,ytrain) # ------------------------------ save common.dump_predictor("gbdt.pkl",gbdt)
def train_whole(): prev_knn = common.load_predictor("knn-cv.pkl") ()