Ejemplo n.º 1
0
    def fit(self):
        for i in range(self.count):
            x_sampled, y_sampled = self.training_ds.random_sampling(1.0)
            curr = tree.DecisionTreeRegressor()
            curr.fit(x_sampled, y_sampled)
            tr_predictions = curr.predict(self.training_ds.xtr)

            self.training_predictions.append(tr_predictions)

            print("Model %s :: Sales R2 on training set: %s" %
                  (i, eval.r2_score(self.training_ds.ytr, tr_predictions)))

            ts_predictions = curr.predict(self.testing_ds.xts)
            self.predictions.append(ts_predictions)

            # We cannot evaluate our predictions when building the final model, we don't have a test set :)
            # print("Customer R2 on testing set: ", eval.r2_score(self.ds.yts, ts_predictions))
            self.save_partial(curr, i)

        self.compute_final_predictions()

        print("Overall sales R2 on training set: %s" % eval.r2_score(
            self.training_ds.ytr, self.final_training_predictions))

        print(
            "Done with sales bagging, models have been saved in 'saved' dir.")
        print("When ready, execute save_predictions.")
Ejemplo n.º 2
0
    def fit(self):

        for i in range(self.count):

            x_sampled, y_sampled = self.training_ds.random_sampling(1.0)
            curr = skc.LinearSklearn(1, CustomerModel.model)
            curr.train(x_sampled, y_sampled)
            tr_predictions = curr.predict(self.training_ds.xtr).squeeze()

            self.training_predictions.append(tr_predictions)

            print("Model %s :: Customer R2 on training set: %s" %
                  (i, eval.r2_score(self.training_ds.ytr, tr_predictions)))

            ts_predictions = curr.predict(self.testing_ds.xts)
            self.predictions.append(ts_predictions)

            # We cannot evaluate our predictions when building the final model, we don't have a test set :)
            # print("Customer R2 on testing set: ", eval.r2_score(self.ds.yts, ts_predictions))
            self.save_partial(curr, i)

        self.compute_final_predictions()

        print("Overall customer R2 on training set: %s" % eval.r2_score(
            self.training_ds.ytr, self.final_training_predictions))

        print(
            "Done with customer bagging, models have been saved in 'saved' dir."
        )
        print("When ready, execute save_predictions.")
Ejemplo n.º 3
0
    ]:

        cols.remove(att)
    datas = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datas)\
        .exclude('NumberOfSales', 'Month')\
        .build()
    n = 1
    mods = []
    for i in range(n):
        print(i + 1)
        x, y = datas.random_sampling(1.0)
        mod = skc.LinearSklearn(1, model)
        mod.train(x, y)
        mods.append(mod)
        p = mod.predict(x).squeeze()
        print("TRAIN R2: ", eval.r2_score(y, p))
        print("TEST R2: ", eval.r2_score(datas.yts, mod.predict(datas.xts)))
        print("##########################")

    #tree.export_graphviz(mod.models[0])
    #print("SAVED")
    preds = []
    for i in range(n):
        preds.append(mods[i].predict(datas.xts))

    custpred = np.array(preds).mean(axis=0)

    print("TEST R2: ", eval.r2_score(datas.yts, custpred))
    print("############################################")
    for i in range(n):
        for j in range(len(cols)):
Ejemplo n.º 4
0
    n = len(model)
    mods = []
    modpreds = []
    modpreds_t = []
    for i in range(n):
        print(i + 1)
        x, y = datas.xtr, datas.ytr
        mod = skc.LinearSklearn(1, model[i])
        mod.train(x, y)
        mods.append(mod)
        p = mod.predict(x).squeeze()
        print(p)
        p_t = mod.predict(datas.xts).squeeze()
        modpreds.append(p)
        modpreds_t.append(p_t)
        print("TRAIN R2: ", eval.r2_score(y, p))
        print("TEST R2: ", eval.r2_score(datas.yts, p_t))
        print("##########################")

    modpreds = np.array(modpreds).transpose()
    modpreds_t = np.array(modpreds_t).transpose()
    x = np.hstack((datas.xtr, modpreds))
    x_t = np.hstack((datas.xts, modpreds_t))

    fin = skc.LinearSklearn(1, final)
    fin.train(x, datas.ytr)
    custpred = fin.predict(x_t)
    print(custpred)
    print("TEST R2: ", eval.r2_score(datas.yts, custpred))

    new = pandas.DataFrame()
Ejemplo n.º 5
0
        datas, utils.get_frame_in_range(datas, 3, 2016, 12, 2017))
    datas = preu.mean_cust_per_shop_if_holiday(
        datas, utils.get_frame_in_range(datas, 3, 2016, 12, 2017))
    datas = sb.SetBuilder(target='NumberOfCustomers', autoexclude=True, df=datas)\
        .exclude('NumberOfSales', 'Month')\
        .build()
    n = 10
    mods = []
    for i in range(n):
        print(i + 1)
        x, y = datas.random_sampling(1.0)
        mod = skc.LinearSklearn(1, model)
        mod.train(x, y)
        mods.append(mod)
        p = mod.predict(x).squeeze()
        print("TRAIN R2: ", eval.r2_score(y, p))
        print("TEST R2: ", eval.r2_score(datas.yts, mod.predict(datas.xts)))
        print("##########################")

    preds = []
    for i in range(n):
        preds.append(mods[i].predict(datas.xtr))

    trainpreds = np.array(preds).mean(axis=0).squeeze()

    preds = []
    for i in range(n):
        preds.append(mods[i].predict(datas.xts))

    custpred = np.array(preds).mean(axis=0).squeeze()
Ejemplo n.º 6
0
    print("TYPE " + str(i))
    d_reg = utils.get_frames_per_assortmenttype(train, i)
    d_reg_t = utils.get_frames_per_assortmenttype(test, i)
    print("N_SAMPLES: ", len(d_reg) + len(d_reg_t))
    if len(d_reg) == 0:
        continue
    y = prepare_out(d_reg)
    x = drop_useless(d_reg, 1)
    y_t = prepare_out(d_reg_t)
    x_t = drop_useless(d_reg_t, 1)
    mod = skc.LinearSklearn(1, model)
    mod.train(x, y)
    models[i] = mod
    p = mod.predict(x).squeeze()
    pt = mod.predict(x_t).squeeze()
    r2_t = eval.r2_score(y_t, pt)
    sum += r2_t * (len(d_reg) + len(d_reg_t))
    print("TRAIN R2: ", eval.r2_score(y, p))
    print("TEST R2: ", r2_t)
    print("##########################")

print("AVG TEST R2: ", sum / len(datas))

custpred = []
for i in test.index.tolist():
    row = test.loc[i]
    val = ""
    for t in types:
        if row[t] == 1:
            val = t
    row = drop_useless(row).reshape([1, -1])
Ejemplo n.º 7
0
from sklearn.neighbors import KNeighborsRegressor as knn
from sklearn import preprocessing
import pandas as pd


def model():
    return knn(n_neighbors=10,
               weights='uniform',
               algorithm='ball_tree',
               leaf_size=15)


if __name__ == '__main__':

    datas = ds.read_dataset("best_for_customers.csv")

    datas = sb.SetBuilder(target='NumberOfCustomers',
                          autoexclude=True,
                          df=datas).exclude('NumberOfSales', 'Month').build()

    mod = skc.LinearSklearn(1, model)
    x = datas.xtr
    y = datas.ytr
    mod.train(x, y)
    custpred = mod.predict(datas.xts)
    print("TEST R2: ", eval.r2_score(datas.yts, custpred))
    print("##########################")
    new = pandas.DataFrame()
    new['NumberOfCustomers'] = pandas.Series(custpred)
    ds.save_dataset(new, "knncustpreds1.csv")