def train(self, ds): datassc = ds[ds['StoreType_Shopping Center'] == 1] datasoth = ds[ds['StoreType_Shopping Center'] == 0] datasc = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datassc) \ .exclude('NumberOfSales', 'Month') \ .build() print("SHOPPING CENTERS MODEL TRAINING: ") for i in range(self.nsc): print("SC :", i) x, y = datasc.random_sampling(1.0) mod = self.model_shopc() mod.fit(x, y) pr = mod.predict(datasc.xtr) self.models_sc.append(mod) print("SC ", i, " TRAIN R2: ", eval.evaluate(datasc.ytr, pr)) dataoth = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datasoth) \ .exclude('NumberOfSales', 'Month') \ .build() print("OTHER SHOPS MODEL TRAINING: ") for i in range(self.noth): print("OTH :", i) x, y = dataoth.random_sampling(1.0) mod = self.model_others() mod.fit(x, y) pr = mod.predict(dataoth.xtr) self.models_o.append(mod) print("OTH ", i, " TRAIN R2: ", eval.evaluate(dataoth.ytr, pr))
def test(self, ds): datassc = ds[ds['StoreType_Shopping Center'] == 1] datasoth = ds[ds['StoreType_Shopping Center'] == 0] datasc = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datassc) \ .exclude('NumberOfSales', 'Month') \ .build() print("SHOPPING CENTERS MODEL EVALUATION") preds = self.predict_sc(datasc.xts) print("SC TEST R2: ", eval.evaluate(datasc.yts, preds)) dataoth = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datasoth) \ .exclude('NumberOfSales', 'Month') \ .build() print("OTHERS MODEL EVALUATION") preds = self.predict_oth(dataoth.xts) print("OTH TEST R2: ", eval.evaluate(dataoth.yts, preds))
from numpy import loadtxt from xgboost import XGBRegressor from sklearn import linear_model from sklearn.pipeline import make_pipeline from sklearn.preprocessing import PolynomialFeatures import dataset.setbuilder as sb import models.sklearn.evaluator as eval data = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True).exclude('NumberOfSales', 'Month').build() model = XGBRegressor(n_estimators=100, learning_rate=0.2, colsample_bytree=1, max_depth=4, silent=False, n_jobs=8) model.fit(data.xtr, data.ytr) pred_tr = model.predict(data.xtr) pred_ts = model.predict(data.xts) print('R2 TRAIN = %s' % eval.evaluate(data.ytr, pred_tr)) print('R2 TEST = %s' % eval.evaluate(data.yts, pred_ts))
'Month').build() #data = sb.SetBuilder(target='NumberOfSales').exclude('Day').build() nn = neural_network.MLPRegressor(hidden_layer_sizes=(100, 5), activation='relu', solver='adam', batch_size='auto', learning_rate='adaptive', learning_rate_init=0.001, max_iter=50, shuffle=True, random_state=9, tol=0.000001, verbose=True, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08) n = nn.fit(data.xtr, data.ytr.ravel()) ypred = nn.predict(data.xts) ds.save_dataset(pd.DataFrame(ypred), 'customer_pred_jan_feb_NN.csv') print('R2 = %s' % eval.evaluate(data.yts, ypred))
max_iter=50, shuffle=True, tol=0.000001, verbose=True, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08) dt.fit(bagx, bagy.ravel()) models.append(dt) y = dt.predict(data.xtr) print('it = %s, TRAIN R2 = %s' % (i, eval.evaluate(data.ytr, y))) yy.append(y) yy = np.array(yy) pred = yy.mean(axis=0) print('Bagging R2 = %s' % eval.evaluate(data.ytr, pred)) re, totr, totp = ev_cust.region_error(data.ytr, pred, regions, ids, dates) diff = totr - totp print("REG_ERR: ", re * 100) print("REG_MEAN_ERR: ", re.mean() * 100) print("REAL_SUM: ", totr.sum()) print("PRED_SUM: ", totp.sum()) print("SUM_OF_DIFFS: ", diff.sum())
axis=1)).squeeze() clean_row = clean_row.reshape([1, -1]) if sc: return self.predict_sc(clean_row).squeeze() else: return self.predict_oth(clean_row).squeeze() def model1(): return linear_model.Ridge(alpha=10) def model2(): return tree.DecisionTreeRegressor(max_depth=9) data = data_manager.read_dataset("best_for_customers.csv") model = CustomersPredoctorSeparateShopCenters(model1, model1, 10, 10) model.train(data) model.test(data) data = utils.get_frame_in_range(data, 1, 2018, 2, 2018) preds = [] for i in range(len(data)): irow = data.iloc[[i]] preds.append(model.predict(irow)) preds = np.array(preds) data = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=data) \ .exclude('NumberOfSales', 'Month') \ .build() print("FINAL R2: ", eval.evaluate(data.yts, preds))
# data = sb.SetBuilder(target='NumberOfSales', autoexclude=True, dataset='mean_var_on_cust_from_tain.csv').build() # Performs simple linear regression depth = 8 dtree = tree.DecisionTreeRegressor(max_depth=depth) dtree.fit(data.xtr, data.ytr) ypred = dtree.predict(data.xts) pr.save_model(dtree, 'decision_tree_cust') dtree = pr.load_model('decision_tree_cust') ypred = dtree.predict(data.xts) print('R2 train = %s' % eval.evaluate(data.ytr, dtree.predict(data.xtr))) print('R2 test = %s' % eval.evaluate(data.yts, ypred)) print("Plain Decision regression tree without bagging") it = 10 yy = [] for i in range(it): bagx, bagy = data.random_sampling(1) dt = tree.DecisionTreeRegressor(max_depth=depth) dt.fit(bagx, bagy) pr.save_model(dt, 'dt_cust_bootstraping_%s' % i) y = dt.predict(data.xts) print('it = %s, R2 = %s' % (i, eval.evaluate(data.yts, y))) yy.append(y) yy = np.array(yy)
import dataset.setbuilder as sb from sklearn import linear_model import pandas as pd import numpy as np import matplotlib.pyplot as plt import models.sklearn.evaluator as eval from sklearn.preprocessing import PolynomialFeatures data = sb.SetBuilder(target="NumberOfSales", dataset="final_for_sales_train.csv", autoexclude=False, split=[[(3, 2016, 1, 2018)], [(3, 2016, 2, 2018)] ]).only('NearestCompetitor').build() poly_degree = 2 # Performs simple linear regression print("Linear regression started, polynomial degree = %s" % poly_degree) poly = PolynomialFeatures(degree=poly_degree) xtr_ = poly.fit_transform(data.xtr) xts_ = poly.fit_transform(data.xts) model = linear_model.LinearRegression() model.fit(data.xtr, data.ytr) print(eval.evaluate(data.ytr, model.predict(data.xtr))) print(eval.evaluate(data.yts, model.predict(data.xts))) print(model.coef_) print(model.intercept_)