Example #1
0
def train(Xtrain, Ytrain, fmodel, isfile=True):
    isfile = bool(int(isfile))
    if isfile:
        Xtrain = pd.read_csv(Xtrain)
        Ytrain = pd.read_csv(Ytrain)
    Xtrain = rain_feature.filter_features(Xtrain)
    Xtrain = Xtrain.set_index('Id')
    Xtrain[np.isnan(Xtrain)] = 9999
    Xtrain[np.isinf(Xtrain)] = 9999
    Ytrain = Ytrain.set_index('Id')
    Ytrain.fillna(9999, inplace=True)

    # Needed for SciKit Gradient Boosting Regressor
    print 'Read the input: %d' % len(Xtrain)
    learning_rate = 0.1
    ntrees = 1000
    num_features = 5
    max_depth = 8
    alpha = 0.9
    v = 1
    gbr = GBR_train(Xtrain, Ytrain, learning_rate, ntrees, num_features,
                    max_depth, alpha)
    gbr_save_model(gbr, fmodel)

    # Needed for XGBoost
    '''
Example #2
0
def cross_validate(Xtrain, Ytrain, op_file='run.cv', isfile=True):
    isfile = bool(int(isfile))
    if isfile:
        Xtrain = pd.read_csv(Xtrain)
        Ytrain = pd.read_csv(Ytrain)
    Xtrain = rain_feature.filter_features(Xtrain)
    Xtrain = Xtrain.set_index('Id')
    Xtrain[np.isnan(Xtrain)] = 9999
    Xtrain[np.isinf(Xtrain)] = 9999
    Ytrain = Ytrain.set_index('Id')
    Ytrain.fillna(9999, inplace=True)

    # Run SciKit Cross Validation for Gradient Boosting Regressor
    GBR_cv(Xtrain, Ytrain, op_file)

    # Needed for XGBoost
    '''
Example #3
0
def cross_validate(Xtrain,Ytrain,op_file='run.cv',isfile=True):
	isfile = bool(int(isfile))
	if isfile:
		Xtrain = pd.read_csv(Xtrain);
		Ytrain = pd.read_csv(Ytrain);
	Xtrain = rain_feature.filter_features(Xtrain);
	Xtrain = Xtrain.set_index('Id');
	Xtrain[np.isnan(Xtrain)] = 9999;
	Xtrain[np.isinf(Xtrain)] = 9999;
	Ytrain = Ytrain.set_index('Id');
	Ytrain.fillna(9999,inplace=True);
	
	# Run SciKit Cross Validation for Gradient Boosting Regressor 
	GBR_cv(Xtrain,Ytrain,op_file);

	# Needed for XGBoost 
	'''
Example #4
0
def train(Xtrain,Ytrain,fmodel,isfile=True):
	isfile = bool(int(isfile));
	if isfile:
		Xtrain = pd.read_csv(Xtrain);
		Ytrain = pd.read_csv(Ytrain);
	Xtrain = rain_feature.filter_features(Xtrain);
	Xtrain = Xtrain.set_index('Id'); 
	Xtrain[np.isnan(Xtrain)] = 9999;
	Xtrain[np.isinf(Xtrain)] = 9999;
	Ytrain = Ytrain.set_index('Id');
	Ytrain.fillna(9999,inplace=True);
	
	# Needed for SciKit Gradient Boosting Regressor
	print 'Read the input: %d'%len(Xtrain);
	learning_rate = 0.1; ntrees = 1000; num_features = 5; max_depth = 8; alpha = 0.9; v = 1;
	gbr = GBR_train(Xtrain, Ytrain, learning_rate, ntrees, num_features, max_depth, alpha);
	gbr_save_model(gbr,fmodel);

	# Needed for XGBoost 
	'''
Example #5
0
def predict(fmodel, testX, op_file='data/output.csv', isfile=True):
    if isfile:
        testX = pd.read_csv(testX)
    testX = rain_feature.filter_features(testX)
    testX = testX.set_index('Id')
    testX[np.isnan(testX)] = 9999
    testX[np.isinf(testX)] = 9999
    print "Predicting"
    '''
	bst = xgboost_load_model(fmodel);
	xg_val = xgb.DMatrix(testX.as_matrix(),missing=np.nan);
	pred = bst.predict(xg_val);
	'''

    gbr = gbr_load_model(fmodel)
    pred = GBR_predict(gbr, testX)

    pred = pd.DataFrame(pred)
    pred.index = range(1,
                       len(pred) + 1)
    pred.columns = ['Expected']
    pred.index.name = 'Id'
    pred.to_csv(op_file)
Example #6
0
def predict(fmodel,testX,op_file='data/output.csv',isfile=True):
	if isfile:
		testX = pd.read_csv(testX);
	testX = rain_feature.filter_features(testX);
	testX = testX.set_index('Id');
	testX[np.isnan(testX)] = 9999;
	testX[np.isinf(testX)] = 9999;
	print "Predicting";

	'''
	bst = xgboost_load_model(fmodel);
	xg_val = xgb.DMatrix(testX.as_matrix(),missing=np.nan);
	pred = bst.predict(xg_val);
	'''

	gbr = gbr_load_model(fmodel);
	pred = GBR_predict(gbr,testX);

	pred = pd.DataFrame(pred);
	pred.index = range(1,len(pred)+1);
	pred.columns = ['Expected'];
	pred.index.name = 'Id';
	pred.to_csv(op_file);