Ejemplo n.º 1
0
    def train(self, ds):
        datassc = ds[ds['StoreType_Shopping Center'] == 1]
        datasoth = ds[ds['StoreType_Shopping Center'] == 0]
        datasc = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datassc) \
            .exclude('NumberOfSales', 'Month') \
            .build()
        print("SHOPPING CENTERS MODEL TRAINING: ")
        for i in range(self.nsc):
            print("SC :", i)
            x, y = datasc.random_sampling(1.0)
            mod = self.model_shopc()
            mod.fit(x, y)
            pr = mod.predict(datasc.xtr)
            self.models_sc.append(mod)
            print("SC ", i, " TRAIN R2: ", eval.evaluate(datasc.ytr, pr))

        dataoth = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datasoth) \
            .exclude('NumberOfSales', 'Month') \
            .build()
        print("OTHER SHOPS MODEL TRAINING: ")
        for i in range(self.noth):
            print("OTH :", i)
            x, y = dataoth.random_sampling(1.0)
            mod = self.model_others()
            mod.fit(x, y)
            pr = mod.predict(dataoth.xtr)
            self.models_o.append(mod)
            print("OTH ", i, " TRAIN R2: ", eval.evaluate(dataoth.ytr, pr))
Ejemplo n.º 2
0
    def test(self, ds):
        datassc = ds[ds['StoreType_Shopping Center'] == 1]
        datasoth = ds[ds['StoreType_Shopping Center'] == 0]
        datasc = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datassc) \
            .exclude('NumberOfSales', 'Month') \
            .build()
        print("SHOPPING CENTERS MODEL EVALUATION")
        preds = self.predict_sc(datasc.xts)
        print("SC TEST R2: ", eval.evaluate(datasc.yts, preds))

        dataoth = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datasoth) \
            .exclude('NumberOfSales', 'Month') \
            .build()

        print("OTHERS MODEL EVALUATION")
        preds = self.predict_oth(dataoth.xts)
        print("OTH TEST R2: ", eval.evaluate(dataoth.yts, preds))
Ejemplo n.º 3
0
    def __init__(self, count):
        Bagging.__init__(self, count)

        self.description = "customers"

        self.training_ds = sb.SetBuilder(
            target='NumberOfCustomers',
            autoexclude=True,
            split=[[(3, 2016, 2, 2018)], []],
            dataset="final_for_customer_train.csv").exclude(
                'NumberOfSales', 'Month').build()

        self.testing_ds = sb.SetBuilder(
            target='NumberOfCustomers',
            autoexclude=True,
            split=[[], [(3, 2018, 4, 2018)]],
            dataset="final_for_customer_test_r.csv").exclude(
                'NumberOfSales', 'Month').build()
Ejemplo n.º 4
0
    def __init__(self, count, target):
        Bagging.__init__(self, count)

        print(
            "Remember: IS IS MANDATORY THAT THE COLUMN ORDER OF TARGET SET IS THE SAME OF final_for_sales_train.csv"
        )

        self.description = "sales"

        self.target = target

        self.training_ds = sb.SetBuilder(
            target='NumberOfSales',
            autoexclude=True,
            split=[[(3, 2016, 2, 2018)], []],
            dataset="final_for_sales_train.csv").exclude('Month').build()

        self.testing_ds = sb.SetBuilder(target='NumberOfSales',
                                        autoexclude=True,
                                        split=[[], [(3, 2018, 4, 2018)]],
                                        df=target).exclude('Month').build()
Ejemplo n.º 5
0
from numpy import loadtxt
from xgboost import XGBRegressor
from sklearn import linear_model
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures
import dataset.setbuilder as sb
import models.sklearn.evaluator as eval

data = sb.SetBuilder(target='NumberOfCustomers',
                     autoexclude=True).exclude('NumberOfSales',
                                               'Month').build()

model = XGBRegressor(n_estimators=100,
                     learning_rate=0.2,
                     colsample_bytree=1,
                     max_depth=4,
                     silent=False,
                     n_jobs=8)

model.fit(data.xtr, data.ytr)

pred_tr = model.predict(data.xtr)
pred_ts = model.predict(data.xts)

print('R2 TRAIN = %s' % eval.evaluate(data.ytr, pred_tr))
print('R2 TEST = %s' % eval.evaluate(data.yts, pred_ts))
Ejemplo n.º 6
0
        preds += p
    preds[preds < 0] = 0
    for i in range(min(len(preds), number_print)):
        print("PRED: ", preds[i], "   y: ", y[i])

    print("R2: ", eva.r2(ds, preds, 'NumberOfCustomers'))


if __name__ == '__main__':
    TRAIN = True
    LOAD = False
    SAVE_DS = True
    name = "lin_"
    dts = d.read_dataset("best_for_customers.csv")
    ds = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=dts)\
        .exclude('NumberOfSales', 'Month')\
        .build()
    models = []
    x, y = ds.xtr, ds.ytr
    real_y = np.array(y)
    x_test, y_test = ds.xts, ds.yts
    for i in range(number_of_model):
        if not LOAD:
            models.append(m.single_relu(x.shape[1], i == 0))
        else:
            models.append(k.models.load_model("mod" + name + str(i) + ".h5"))
        opt = k.optimizers.adam(lr=2e-4)
        models[i].compile(optimizer=opt, loss='mean_squared_error', metrics=['mae'])
        models[i].summary()
        if TRAIN:
            models[i].fit(x=x, y=y, batch_size=20000, epochs=80, verbose=2, validation_data=(x_test, y_test))
Ejemplo n.º 7
0
    return ensemble.GradientBoostingRegressor(max_depth=8, n_estimators=5)


if __name__ == '__main__':
    datas = ds.read_dataset("mean_var_on_customers_from_tain.csv")
    datas['Month'] = datas['Date']
    datas['Month'] = datas['Month'].apply(lambda x: x.split("-")[1])
    datas = imp.one_hot_numeric(datas, 'Month', 'Month_')
    datas = imp.one_hot_numeric(datas, 'Region', 'Region_')
    datas = preprocessing_utils.mean_cust_per_month_per_region(
        datas, utils.get_frame_in_range(datas, 3, 2016, 12, 2017))
    datas = preprocessing_utils.mean_cust_per_month_per_shop(
        datas, utils.get_frame_in_range(datas, 3, 2016, 12, 2017))
    datas = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datas)\
        .exclude('NumberOfSales', 'Month', 'Max_Humidity', 'Max_Sea_Level_PressurehPa', 'Max_TemperatureC',
                 'Max_VisibilityKm', 'Max_Wind_SpeedKm_h', 'Mean_Humidity', 'Mean_Sea_Level_PressurehPa',
                 'Mean_VisibilityKm', 'Mean_Wind_SpeedKm_h', 'Min_Dew_PointC',
                 'Min_Humidity', 'Min_Sea_Level_PressurehPa', 'Min_TemperatureC', 'Min_VisibilitykM')\
        .build()
    model = [
        ridge, linear_model.LinearRegression, lasso, regtree, gradboostreg
    ]
    final = ridge
    n = len(model)
    mods = []
    modpreds = []
    modpreds_t = []
    for i in range(n):
        print(i + 1)
        x, y = datas.xtr, datas.ytr
        mod = skc.LinearSklearn(1, model[i])
        mod.train(x, y)
Ejemplo n.º 8
0
def excluded_feats():
    return [
        "Month", 'Max_Humidity', 'Max_TemperatureC', 'Max_VisibilityKm',
        'Max_Wind_SpeedKm_h', 'Min_Dew_PointC', 'Min_Humidity',
        'Min_TemperatureC', 'Min_VisibilitykM', 'NumberOfCustomers'
    ]


datas = d.read_dataset("final_sales_only_train.csv")
regions = d.to_numpy(datas[['Region']]).squeeze()
dates = d.to_numpy(datas[['Date']]).squeeze()
ids = d.to_numpy(datas[['StoreID']]).squeeze()

data = sb.SetBuilder(target='NumberOfSales', autoexclude=True, df=datas.copy(), split=(3, 2016, 2, 2018, 12, 2018, 12, 2018))\
    .exclude_list(excluded_feats())\
    .build()

it = 1
yy = []
models = []
for i in range(it):
    bagx, bagy = data.xtr, data.ytr
    dt = nn.MLPRegressor(hidden_layer_sizes=(400, 3),
                         activation='identity',
                         solver='adam',
                         batch_size=50000,
                         learning_rate='adaptive',
                         learning_rate_init=0.002,
                         max_iter=50,
                         shuffle=True,
Ejemplo n.º 9
0
    das = preu.eliminate_IsOpen_zeros(das)
    das = preu.mean_std_cust_per_shop_per_day(das, dfrom)
    das = preu.add_avg_cust_per_shop(das, dfrom)
    das = preu.add_std_cust_per_shop(das, dfrom)
    das = preu.add_max_cust_per_shop(das, dfrom)
    das = preu.add_min_cust_per_shop(das, dfrom)
    das = preu.mean_cust_per_month_per_shop(das, dfrom)
    das = preu.mean_cust_per_month_per_region(das, dfrom)
    return das


if __name__ == '__main__':
    split = [[(3, 2016, 12, 2016), (3, 2017, 2, 2018)], [(1, 2017, 2, 2017)]]
    datas = build_cust_predictor_train_dataset(1, 2017, 2, 2017)
    datas = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datas, split=split)\
        .exclude("Month", "NumberOfSales")\
        .build()
    n = 10
    mods = []
    for i in range(n):
        print(i + 1)
        x, y = datas.random_sampling(1.0)
        mod = skc.LinearSklearn(1, model)
        mod.train(x, y)
        mods.append(mod)
        p = mod.predict(datas.xtr).squeeze()
        print("TRAIN R2: ", eval.r2_score(datas.ytr, p))
        print("TEST R2: ", eval.r2_score(datas.yts, mod.predict(datas.xts)))
        print("##########################")

    preds = []
Ejemplo n.º 10
0
ds_name = "fully_preprocessed_ds.csv"
NMODELS = 3
SET_OF_MODELS_DIM = 1
models = []
datas = ds.read_dataset(ds_name)
datas = utils.get_frame_in_range(datas, 1, 2018, 2, 2018)
regions = ds.to_numpy(datas[['Region']]).squeeze()
dates = ds.to_numpy(datas[['Date']]).squeeze()
ids = ds.to_numpy(datas[['StoreID']]).squeeze()

def mod():
    return skl.linear_model.Ridge(200)


for i in range(SET_OF_MODELS_DIM):
    data = sb.SetBuilder(target='NumberOfSales', dataset=ds_name).build()
    data.random_sampling(1.0)
    model = skc.LinearSklearn(NMODELS, mod)
    # Performs simple linear regression
    model.train(data.xtr, data.ytr)
    models.append(model)

data = sb.SetBuilder(target='NumberOfSales', dataset=ds_name).build()
ypred = np.zeros(shape=data.yts.shape[0])
for i in range(SET_OF_MODELS_DIM):
    ypred += models[i].predict(data.xts)

ypred = ypred/SET_OF_MODELS_DIM

print('R2 = %s' % eval.evaluate(data.yts, ypred))
Ejemplo n.º 11
0
from sklearn import tree
import dataset.setbuilder as sb
import models.sklearn.evaluator as eval
import numpy as np
import models.sklearn.persistence as pr

print("Plain Decision regression tree without bagging")

# Build training & test sets
#
data = sb.SetBuilder(
    target='NumberOfCustomers',
    autoexclude=True,
    dataset='best_for_customers.csv',
).exclude('NumberOfSales', 'Month').build()

# data = sb.SetBuilder(target='NumberOfSales', autoexclude=True, dataset='mean_var_on_cust_from_tain.csv').build()

# Performs simple linear regression

depth = 8

dtree = tree.DecisionTreeRegressor(max_depth=depth)
dtree.fit(data.xtr, data.ytr)
ypred = dtree.predict(data.xts)

pr.save_model(dtree, 'decision_tree_cust')

dtree = pr.load_model('decision_tree_cust')
ypred = dtree.predict(data.xts)
Ejemplo n.º 12
0
import dataset.setbuilder as sb
import models.sklearn.evaluator as eval
from sklearn import svm

data = sb.SetBuilder(target='NumberOfSales').build().random_sampling(
    percentage=0.1)
clf = svm.SVR()
clf.fit(data.xtr, data.ytr.ravel())

ypred = clf.predict(data.xts)

print('R2 = %s' % eval.evaluate(data.yts, ypred))
Ejemplo n.º 13
0
import dataset.utility as dsutil
import dataset.dataset as ds
import seaborn as sea
import dataset.setbuilder as sb
from sklearn import linear_model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import models.sklearn.evaluator as eval
from sklearn.preprocessing import PolynomialFeatures

data = sb.SetBuilder(target="NumberOfSales",
                     dataset="final_for_sales_train.csv",
                     autoexclude=False,
                     split=[[(3, 2016, 1, 2018)], [(3, 2016, 2, 2018)]
                            ]).only('NearestCompetitor').build()

poly_degree = 2

# Performs simple linear regression
print("Linear regression started, polynomial degree = %s" % poly_degree)
poly = PolynomialFeatures(degree=poly_degree)
xtr_ = poly.fit_transform(data.xtr)
xts_ = poly.fit_transform(data.xts)

model = linear_model.LinearRegression()

model.fit(data.xtr, data.ytr)

print(eval.evaluate(data.ytr, model.predict(data.xtr)))
print(eval.evaluate(data.yts, model.predict(data.xts)))