Ejemplo n.º 1
0
def full_prep_test_ds_to_cust_pred():
    test_datas = d.read_imputed_onehot_test_dataset()
    data_from = d.read_imputed_onehot_dataset()
    data_from['Date'] = p.to_datetime(data_from['Date'], format='%d/%m/%Y')
    data_from['Day'] = data_from['Date'].dt.weekday_name
    datas = prepare_ds_to_customer_prediction(test_datas, data_from)
    d.save_dataset(datas, "test_dataset_for_customers_prediction.csv")
Ejemplo n.º 2
0
def build_sales_predictor_train_dataset(name):
    ds = d.read_imputed_onehot_dataset()
    ds = __prepare_sales_train_ds(ds)
    d.save_dataset(ds, name)
Ejemplo n.º 3
0
    for i in range(number_of_model):
        p = models[i].predict(x, 500).squeeze()
        preds += p
    preds[preds < 0] = 0
    for i in range(min(len(preds), number_print)):
        print("PRED: ", preds[i], "   y: ", y[i])

    print("R2: ", eva.r2(ds, preds, 'NumberOfSales'))


if __name__ == '__main__':
    TRAIN = True
    LOAD = False
    SAVE_DF = False
    name = "test"
    ds = d.read_imputed_onehot_dataset()
    ds = prepare_ds(ds)
    d.save_dataset(ds, "fully_preprocessed_ds.csv")
    ds_train = utils.get_frame_in_range(ds, 3, 2016, 12, 2017)
    ds_test = utils.get_frame_in_range(ds, 1, 2018, 2, 2018)
    y = prepare_out(ds_train)
    real_y = np.array(y)
    dy = np.zeros(y.shape)
    x = drop_useless(ds_train)
    y_test = prepare_out(ds_test)
    if SAVE_DF:
        d.save_dataset(ds_test, "dataset_to_predict_sales.csv")
    x_test = drop_useless(ds_test)

    models = []
    for i in range(number_of_model):
Ejemplo n.º 4
0
def build_cust_predictor_train_dataset(m1, a1, m2, a2):
    das = ds.read_imputed_onehot_dataset()
    das = __prepare_customers_train_ds(das, m1, a1, m2, a2)
    return das
Ejemplo n.º 5
0
    correlation_analysis = False
    PCA_analysis = False
    PCA_correlation_attributes = False
    PCA_analysis_attribute = 'Region_AreaKM2'

    sb.set_style("whitegrid")
    sb.set(style="white", color_codes=True)
    sb.set_context(
        rc={
            "font.family": 'sans',
            "font.size": 5,
            "axes.titlesize": 8,
            "axes.labelsize": 8
        })

    data = datasetfun.read_imputed_onehot_dataset()

    data_nominal = datasetfun.nominal_only(data)

    data_numeric = datasetfun.numeric_only(data)

    if correlation_analysis == True:

        numeric_corr = data_numeric.corr(method="pearson")

        numeric_heatmap = sb.heatmap(numeric_corr,
                                     square=True,
                                     annot=True,
                                     cmap="Blues")
        pl.show()