Example #1
0
    def train(self, ds):
        datassc = ds[ds['StoreType_Shopping Center'] == 1]
        datasoth = ds[ds['StoreType_Shopping Center'] == 0]
        datasc = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datassc) \
            .exclude('NumberOfSales', 'Month') \
            .build()
        print("SHOPPING CENTERS MODEL TRAINING: ")
        for i in range(self.nsc):
            print("SC :", i)
            x, y = datasc.random_sampling(1.0)
            mod = self.model_shopc()
            mod.fit(x, y)
            pr = mod.predict(datasc.xtr)
            self.models_sc.append(mod)
            print("SC ", i, " TRAIN R2: ", eval.evaluate(datasc.ytr, pr))

        dataoth = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datasoth) \
            .exclude('NumberOfSales', 'Month') \
            .build()
        print("OTHER SHOPS MODEL TRAINING: ")
        for i in range(self.noth):
            print("OTH :", i)
            x, y = dataoth.random_sampling(1.0)
            mod = self.model_others()
            mod.fit(x, y)
            pr = mod.predict(dataoth.xtr)
            self.models_o.append(mod)
            print("OTH ", i, " TRAIN R2: ", eval.evaluate(dataoth.ytr, pr))
Example #2
0
    def test(self, ds):
        datassc = ds[ds['StoreType_Shopping Center'] == 1]
        datasoth = ds[ds['StoreType_Shopping Center'] == 0]
        datasc = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datassc) \
            .exclude('NumberOfSales', 'Month') \
            .build()
        print("SHOPPING CENTERS MODEL EVALUATION")
        preds = self.predict_sc(datasc.xts)
        print("SC TEST R2: ", eval.evaluate(datasc.yts, preds))

        dataoth = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datasoth) \
            .exclude('NumberOfSales', 'Month') \
            .build()

        print("OTHERS MODEL EVALUATION")
        preds = self.predict_oth(dataoth.xts)
        print("OTH TEST R2: ", eval.evaluate(dataoth.yts, preds))
Example #3
0
from numpy import loadtxt
from xgboost import XGBRegressor
from sklearn import linear_model
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures
import dataset.setbuilder as sb
import models.sklearn.evaluator as eval

data = sb.SetBuilder(target='NumberOfCustomers',
                     autoexclude=True).exclude('NumberOfSales',
                                               'Month').build()

model = XGBRegressor(n_estimators=100,
                     learning_rate=0.2,
                     colsample_bytree=1,
                     max_depth=4,
                     silent=False,
                     n_jobs=8)

model.fit(data.xtr, data.ytr)

pred_tr = model.predict(data.xtr)
pred_ts = model.predict(data.xts)

print('R2 TRAIN = %s' % eval.evaluate(data.ytr, pred_tr))
print('R2 TEST = %s' % eval.evaluate(data.yts, pred_ts))
Example #4
0
                                               'Month').build()
#data = sb.SetBuilder(target='NumberOfSales').exclude('Day').build()

nn = neural_network.MLPRegressor(hidden_layer_sizes=(100, 5),
                                 activation='relu',
                                 solver='adam',
                                 batch_size='auto',
                                 learning_rate='adaptive',
                                 learning_rate_init=0.001,
                                 max_iter=50,
                                 shuffle=True,
                                 random_state=9,
                                 tol=0.000001,
                                 verbose=True,
                                 warm_start=False,
                                 momentum=0.9,
                                 nesterovs_momentum=True,
                                 early_stopping=False,
                                 validation_fraction=0.1,
                                 beta_1=0.9,
                                 beta_2=0.999,
                                 epsilon=1e-08)

n = nn.fit(data.xtr, data.ytr.ravel())

ypred = nn.predict(data.xts)

ds.save_dataset(pd.DataFrame(ypred), 'customer_pred_jan_feb_NN.csv')

print('R2 = %s' % eval.evaluate(data.yts, ypred))
Example #5
0
                         max_iter=50,
                         shuffle=True,
                         tol=0.000001,
                         verbose=True,
                         warm_start=False,
                         momentum=0.9,
                         nesterovs_momentum=True,
                         early_stopping=False,
                         validation_fraction=0.1,
                         beta_1=0.9,
                         beta_2=0.999,
                         epsilon=1e-08)
    dt.fit(bagx, bagy.ravel())
    models.append(dt)
    y = dt.predict(data.xtr)
    print('it = %s, TRAIN R2 = %s' % (i, eval.evaluate(data.ytr, y)))
    yy.append(y)

yy = np.array(yy)

pred = yy.mean(axis=0)

print('Bagging R2 = %s' % eval.evaluate(data.ytr, pred))

re, totr, totp = ev_cust.region_error(data.ytr, pred, regions, ids, dates)
diff = totr - totp
print("REG_ERR: ", re * 100)
print("REG_MEAN_ERR: ", re.mean() * 100)
print("REAL_SUM: ", totr.sum())
print("PRED_SUM: ", totp.sum())
print("SUM_OF_DIFFS: ", diff.sum())
Example #6
0
                     axis=1)).squeeze()
        clean_row = clean_row.reshape([1, -1])
        if sc:
            return self.predict_sc(clean_row).squeeze()
        else:
            return self.predict_oth(clean_row).squeeze()


def model1():
    return linear_model.Ridge(alpha=10)


def model2():
    return tree.DecisionTreeRegressor(max_depth=9)


data = data_manager.read_dataset("best_for_customers.csv")
model = CustomersPredoctorSeparateShopCenters(model1, model1, 10, 10)
model.train(data)
model.test(data)
data = utils.get_frame_in_range(data, 1, 2018, 2, 2018)
preds = []
for i in range(len(data)):
    irow = data.iloc[[i]]
    preds.append(model.predict(irow))
preds = np.array(preds)
data = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=data) \
            .exclude('NumberOfSales', 'Month') \
            .build()
print("FINAL R2: ", eval.evaluate(data.yts, preds))
Example #7
0
# data = sb.SetBuilder(target='NumberOfSales', autoexclude=True, dataset='mean_var_on_cust_from_tain.csv').build()

# Performs simple linear regression

depth = 8

dtree = tree.DecisionTreeRegressor(max_depth=depth)
dtree.fit(data.xtr, data.ytr)
ypred = dtree.predict(data.xts)

pr.save_model(dtree, 'decision_tree_cust')

dtree = pr.load_model('decision_tree_cust')
ypred = dtree.predict(data.xts)

print('R2 train = %s' % eval.evaluate(data.ytr, dtree.predict(data.xtr)))
print('R2 test = %s' % eval.evaluate(data.yts, ypred))
print("Plain Decision regression tree without bagging")

it = 10
yy = []
for i in range(it):
    bagx, bagy = data.random_sampling(1)
    dt = tree.DecisionTreeRegressor(max_depth=depth)
    dt.fit(bagx, bagy)
    pr.save_model(dt, 'dt_cust_bootstraping_%s' % i)
    y = dt.predict(data.xts)
    print('it = %s, R2 = %s' % (i, eval.evaluate(data.yts, y)))
    yy.append(y)

yy = np.array(yy)
Example #8
0
import dataset.setbuilder as sb
from sklearn import linear_model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import models.sklearn.evaluator as eval
from sklearn.preprocessing import PolynomialFeatures

data = sb.SetBuilder(target="NumberOfSales",
                     dataset="final_for_sales_train.csv",
                     autoexclude=False,
                     split=[[(3, 2016, 1, 2018)], [(3, 2016, 2, 2018)]
                            ]).only('NearestCompetitor').build()

poly_degree = 2

# Performs simple linear regression
print("Linear regression started, polynomial degree = %s" % poly_degree)
poly = PolynomialFeatures(degree=poly_degree)
xtr_ = poly.fit_transform(data.xtr)
xts_ = poly.fit_transform(data.xts)

model = linear_model.LinearRegression()

model.fit(data.xtr, data.ytr)

print(eval.evaluate(data.ytr, model.predict(data.xtr)))
print(eval.evaluate(data.yts, model.predict(data.xts)))

print(model.coef_)
print(model.intercept_)