def fit(self): for i in range(self.count): x_sampled, y_sampled = self.training_ds.random_sampling(1.0) curr = tree.DecisionTreeRegressor() curr.fit(x_sampled, y_sampled) tr_predictions = curr.predict(self.training_ds.xtr) self.training_predictions.append(tr_predictions) print("Model %s :: Sales R2 on training set: %s" % (i, eval.r2_score(self.training_ds.ytr, tr_predictions))) ts_predictions = curr.predict(self.testing_ds.xts) self.predictions.append(ts_predictions) # We cannot evaluate our predictions when building the final model, we don't have a test set :) # print("Customer R2 on testing set: ", eval.r2_score(self.ds.yts, ts_predictions)) self.save_partial(curr, i) self.compute_final_predictions() print("Overall sales R2 on training set: %s" % eval.r2_score( self.training_ds.ytr, self.final_training_predictions)) print( "Done with sales bagging, models have been saved in 'saved' dir.") print("When ready, execute save_predictions.")
def fit(self): for i in range(self.count): x_sampled, y_sampled = self.training_ds.random_sampling(1.0) curr = skc.LinearSklearn(1, CustomerModel.model) curr.train(x_sampled, y_sampled) tr_predictions = curr.predict(self.training_ds.xtr).squeeze() self.training_predictions.append(tr_predictions) print("Model %s :: Customer R2 on training set: %s" % (i, eval.r2_score(self.training_ds.ytr, tr_predictions))) ts_predictions = curr.predict(self.testing_ds.xts) self.predictions.append(ts_predictions) # We cannot evaluate our predictions when building the final model, we don't have a test set :) # print("Customer R2 on testing set: ", eval.r2_score(self.ds.yts, ts_predictions)) self.save_partial(curr, i) self.compute_final_predictions() print("Overall customer R2 on training set: %s" % eval.r2_score( self.training_ds.ytr, self.final_training_predictions)) print( "Done with customer bagging, models have been saved in 'saved' dir." ) print("When ready, execute save_predictions.")
]: cols.remove(att) datas = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datas)\ .exclude('NumberOfSales', 'Month')\ .build() n = 1 mods = [] for i in range(n): print(i + 1) x, y = datas.random_sampling(1.0) mod = skc.LinearSklearn(1, model) mod.train(x, y) mods.append(mod) p = mod.predict(x).squeeze() print("TRAIN R2: ", eval.r2_score(y, p)) print("TEST R2: ", eval.r2_score(datas.yts, mod.predict(datas.xts))) print("##########################") #tree.export_graphviz(mod.models[0]) #print("SAVED") preds = [] for i in range(n): preds.append(mods[i].predict(datas.xts)) custpred = np.array(preds).mean(axis=0) print("TEST R2: ", eval.r2_score(datas.yts, custpred)) print("############################################") for i in range(n): for j in range(len(cols)):
n = len(model) mods = [] modpreds = [] modpreds_t = [] for i in range(n): print(i + 1) x, y = datas.xtr, datas.ytr mod = skc.LinearSklearn(1, model[i]) mod.train(x, y) mods.append(mod) p = mod.predict(x).squeeze() print(p) p_t = mod.predict(datas.xts).squeeze() modpreds.append(p) modpreds_t.append(p_t) print("TRAIN R2: ", eval.r2_score(y, p)) print("TEST R2: ", eval.r2_score(datas.yts, p_t)) print("##########################") modpreds = np.array(modpreds).transpose() modpreds_t = np.array(modpreds_t).transpose() x = np.hstack((datas.xtr, modpreds)) x_t = np.hstack((datas.xts, modpreds_t)) fin = skc.LinearSklearn(1, final) fin.train(x, datas.ytr) custpred = fin.predict(x_t) print(custpred) print("TEST R2: ", eval.r2_score(datas.yts, custpred)) new = pandas.DataFrame()
datas, utils.get_frame_in_range(datas, 3, 2016, 12, 2017)) datas = preu.mean_cust_per_shop_if_holiday( datas, utils.get_frame_in_range(datas, 3, 2016, 12, 2017)) datas = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datas)\ .exclude('NumberOfSales', 'Month')\ .build() n = 10 mods = [] for i in range(n): print(i + 1) x, y = datas.random_sampling(1.0) mod = skc.LinearSklearn(1, model) mod.train(x, y) mods.append(mod) p = mod.predict(x).squeeze() print("TRAIN R2: ", eval.r2_score(y, p)) print("TEST R2: ", eval.r2_score(datas.yts, mod.predict(datas.xts))) print("##########################") preds = [] for i in range(n): preds.append(mods[i].predict(datas.xtr)) trainpreds = np.array(preds).mean(axis=0).squeeze() preds = [] for i in range(n): preds.append(mods[i].predict(datas.xts)) custpred = np.array(preds).mean(axis=0).squeeze()
print("TYPE " + str(i)) d_reg = utils.get_frames_per_assortmenttype(train, i) d_reg_t = utils.get_frames_per_assortmenttype(test, i) print("N_SAMPLES: ", len(d_reg) + len(d_reg_t)) if len(d_reg) == 0: continue y = prepare_out(d_reg) x = drop_useless(d_reg, 1) y_t = prepare_out(d_reg_t) x_t = drop_useless(d_reg_t, 1) mod = skc.LinearSklearn(1, model) mod.train(x, y) models[i] = mod p = mod.predict(x).squeeze() pt = mod.predict(x_t).squeeze() r2_t = eval.r2_score(y_t, pt) sum += r2_t * (len(d_reg) + len(d_reg_t)) print("TRAIN R2: ", eval.r2_score(y, p)) print("TEST R2: ", r2_t) print("##########################") print("AVG TEST R2: ", sum / len(datas)) custpred = [] for i in test.index.tolist(): row = test.loc[i] val = "" for t in types: if row[t] == 1: val = t row = drop_useless(row).reshape([1, -1])
from sklearn.neighbors import KNeighborsRegressor as knn from sklearn import preprocessing import pandas as pd def model(): return knn(n_neighbors=10, weights='uniform', algorithm='ball_tree', leaf_size=15) if __name__ == '__main__': datas = ds.read_dataset("best_for_customers.csv") datas = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datas).exclude('NumberOfSales', 'Month').build() mod = skc.LinearSklearn(1, model) x = datas.xtr y = datas.ytr mod.train(x, y) custpred = mod.predict(datas.xts) print("TEST R2: ", eval.r2_score(datas.yts, custpred)) print("##########################") new = pandas.DataFrame() new['NumberOfCustomers'] = pandas.Series(custpred) ds.save_dataset(new, "knncustpreds1.csv")