def train(Xtrain, Ytrain, fmodel, isfile=True): isfile = bool(int(isfile)) if isfile: Xtrain = pd.read_csv(Xtrain) Ytrain = pd.read_csv(Ytrain) Xtrain = rain_feature.filter_features(Xtrain) Xtrain = Xtrain.set_index('Id') Xtrain[np.isnan(Xtrain)] = 9999 Xtrain[np.isinf(Xtrain)] = 9999 Ytrain = Ytrain.set_index('Id') Ytrain.fillna(9999, inplace=True) # Needed for SciKit Gradient Boosting Regressor print 'Read the input: %d' % len(Xtrain) learning_rate = 0.1 ntrees = 1000 num_features = 5 max_depth = 8 alpha = 0.9 v = 1 gbr = GBR_train(Xtrain, Ytrain, learning_rate, ntrees, num_features, max_depth, alpha) gbr_save_model(gbr, fmodel) # Needed for XGBoost '''
def cross_validate(Xtrain, Ytrain, op_file='run.cv', isfile=True): isfile = bool(int(isfile)) if isfile: Xtrain = pd.read_csv(Xtrain) Ytrain = pd.read_csv(Ytrain) Xtrain = rain_feature.filter_features(Xtrain) Xtrain = Xtrain.set_index('Id') Xtrain[np.isnan(Xtrain)] = 9999 Xtrain[np.isinf(Xtrain)] = 9999 Ytrain = Ytrain.set_index('Id') Ytrain.fillna(9999, inplace=True) # Run SciKit Cross Validation for Gradient Boosting Regressor GBR_cv(Xtrain, Ytrain, op_file) # Needed for XGBoost '''
def cross_validate(Xtrain,Ytrain,op_file='run.cv',isfile=True): isfile = bool(int(isfile)) if isfile: Xtrain = pd.read_csv(Xtrain); Ytrain = pd.read_csv(Ytrain); Xtrain = rain_feature.filter_features(Xtrain); Xtrain = Xtrain.set_index('Id'); Xtrain[np.isnan(Xtrain)] = 9999; Xtrain[np.isinf(Xtrain)] = 9999; Ytrain = Ytrain.set_index('Id'); Ytrain.fillna(9999,inplace=True); # Run SciKit Cross Validation for Gradient Boosting Regressor GBR_cv(Xtrain,Ytrain,op_file); # Needed for XGBoost '''
def train(Xtrain,Ytrain,fmodel,isfile=True): isfile = bool(int(isfile)); if isfile: Xtrain = pd.read_csv(Xtrain); Ytrain = pd.read_csv(Ytrain); Xtrain = rain_feature.filter_features(Xtrain); Xtrain = Xtrain.set_index('Id'); Xtrain[np.isnan(Xtrain)] = 9999; Xtrain[np.isinf(Xtrain)] = 9999; Ytrain = Ytrain.set_index('Id'); Ytrain.fillna(9999,inplace=True); # Needed for SciKit Gradient Boosting Regressor print 'Read the input: %d'%len(Xtrain); learning_rate = 0.1; ntrees = 1000; num_features = 5; max_depth = 8; alpha = 0.9; v = 1; gbr = GBR_train(Xtrain, Ytrain, learning_rate, ntrees, num_features, max_depth, alpha); gbr_save_model(gbr,fmodel); # Needed for XGBoost '''
def predict(fmodel, testX, op_file='data/output.csv', isfile=True): if isfile: testX = pd.read_csv(testX) testX = rain_feature.filter_features(testX) testX = testX.set_index('Id') testX[np.isnan(testX)] = 9999 testX[np.isinf(testX)] = 9999 print "Predicting" ''' bst = xgboost_load_model(fmodel); xg_val = xgb.DMatrix(testX.as_matrix(),missing=np.nan); pred = bst.predict(xg_val); ''' gbr = gbr_load_model(fmodel) pred = GBR_predict(gbr, testX) pred = pd.DataFrame(pred) pred.index = range(1, len(pred) + 1) pred.columns = ['Expected'] pred.index.name = 'Id' pred.to_csv(op_file)
def predict(fmodel,testX,op_file='data/output.csv',isfile=True): if isfile: testX = pd.read_csv(testX); testX = rain_feature.filter_features(testX); testX = testX.set_index('Id'); testX[np.isnan(testX)] = 9999; testX[np.isinf(testX)] = 9999; print "Predicting"; ''' bst = xgboost_load_model(fmodel); xg_val = xgb.DMatrix(testX.as_matrix(),missing=np.nan); pred = bst.predict(xg_val); ''' gbr = gbr_load_model(fmodel); pred = GBR_predict(gbr,testX); pred = pd.DataFrame(pred); pred.index = range(1,len(pred)+1); pred.columns = ['Expected']; pred.index.name = 'Id'; pred.to_csv(op_file);