Esempio n. 1
0
def reorder_attributes(ds, list_order):
    new = p.DataFrame(index=ds.index)
    for attr in list_order:
        try:
            new[attr] = p.Series(d.to_numpy(ds[[attr]]).squeeze(), index=new.index)
        except Exception:
            new[attr] = p.Series(np.zeros(len(new)), index=new.index)
    return new
Esempio n. 2
0
def drop_useless(ds):
    x = d.to_numpy(
        ds.drop([
            'NumberOfSales', 'StoreID', 'Date', 'IsOpen', 'Region',
            'CloudCover', 'Max_Sea_Level_PressurehPa', 'WindDirDegrees',
            'Max_Dew_PointC', 'Mean_Sea_Level_PressurehPa',
            'Min_Sea_Level_PressurehPa', 'Day'
        ],
                axis=1))
    return x
Esempio n. 3
0
 def predict(self, row):
     sc = (row['StoreType_Shopping Center'] == 1).all()
     clean_row = data_manager.to_numpy(
         row.drop([
             'NumberOfSales', 'Month', 'NumberOfCustomers', 'StoreID',
             'Date', 'IsOpen', 'Region', 'CloudCover',
             'Max_Sea_Level_PressurehPa', 'WindDirDegrees',
             'Max_Dew_PointC', 'Mean_Sea_Level_PressurehPa',
             'Min_Sea_Level_PressurehPa', 'Day'
         ],
                  axis=1)).squeeze()
     clean_row = clean_row.reshape([1, -1])
     if sc:
         return self.predict_sc(clean_row).squeeze()
     else:
         return self.predict_oth(clean_row).squeeze()
Esempio n. 4
0
def opendaybeforegeneralplot(df, storeID, show=True, save=False):
    IsOpenList = list(to_numpy(df["IsOpen"]).squeeze())
    IsOpenList.pop()
    IsOpenList.insert(0, 1)
    dftoplot = pd.DataFrame(np.array(IsOpenList).reshape(523021, 1),
                            columns=["OpenDayBefore"])
    dftoplot = df.assign(OpenDayBefore=dftoplot)
    dftoplotpershop = dftoplot[dftoplot["StoreID"] == storeID]
    dftoplotpershop = dftoplot[dftoplot["IsOpen"] == 1]
    sb.boxplot(x="OpenDayBefore", y="NumberOfSales",
               data=dftoplotpershop).set_title("Sales / Shop Availability")
    fig = plt.gcf()
    fig.set_size_inches(18, 9)

    if show:
        plt.show()
    if save:
        fig.savefig("opendaybeforegeneralplot.png")
Esempio n. 5
0
def opendaybeforeonweekplot(df, storeID, show=True, save=False):
    df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y')
    df['Day'] = df['Date'].dt.weekday_name
    df = df.drop(df[df["Day"] == "Sunday"].index)
    IsOpenList = list(to_numpy(df["IsOpen"]).squeeze())
    IsOpenList.pop()
    IsOpenList.insert(0, 1)
    dftoplot = pd.DataFrame(np.array(IsOpenList).reshape(448375, 1),
                            columns=["OpenDayBefore"])
    dftoplot = df.assign(OpenDayBefore=dftoplot)
    dftoplotpershop = dftoplot[dftoplot["StoreID"] == storeID]
    dftoplotpershop = dftoplot[dftoplot["IsOpen"] == 1]
    sb.boxplot(
        x="OpenDayBefore", y="NumberOfSales",
        data=dftoplotpershop).set_title("Sales / Shop Availability (-Sun)")
    fig = plt.gcf()
    fig.set_size_inches(18, 9)

    if show:
        plt.show()
    if save:
        fig.savefig("opendaybeforeonweekplot.png")
Esempio n. 6
0
def gen_pandas_cols():
    shops_col = []
    months_col = []
    sales_col = []
    for i in range(len(shops)):
        for j in range(len(months)):
            shops_col.append(shops[i])
            months_col.append(months[j])
            sales_col.append(totp[i][j])
    return np.array(shops_col), np.array(months_col), np.array(sales_col)


name_precitions_csv = "final_sales_predictions_new1.csv"
name_original_csv = "final_for_sales_test_r.csv"

preds = d.to_numpy(d.read_dataset(name_precitions_csv)).squeeze()


orig = d.read_dataset(name_original_csv)
dates = d.to_numpy(orig[['Date']]).squeeze()
regions = d.to_numpy(orig[['Region']]).squeeze()
ids = d.to_numpy(orig[['StoreID']]).squeeze()


error, totp, totr, shops, months = eva.region_error(preds, preds, regions, ids, dates, True)

print(error)
print(totp)
print(shops)
print(months)
Esempio n. 7
0
def tss(df, attr):
    arr = d.to_numpy(df[[attr]]).squeeze()
    mean = arr.mean()
    return np.sum(np.square(arr - mean))
Esempio n. 8
0
def rss(df, preds, attr):
    arr = d.to_numpy(df[[attr]]).squeeze()
    return np.sum(np.square(arr - preds))
Esempio n. 9
0
def prepare_out(ds):
    y = d.to_numpy(ds[['NumberOfSales']])
    return y
Esempio n. 10
0
import evaluation.evaluation as ev_cust
import dataset.utility as utils
import pandas
import sklearn.neural_network as nn


def excluded_feats():
    return [
        "Month", 'Max_Humidity', 'Max_TemperatureC', 'Max_VisibilityKm',
        'Max_Wind_SpeedKm_h', 'Min_Dew_PointC', 'Min_Humidity',
        'Min_TemperatureC', 'Min_VisibilitykM', 'NumberOfCustomers'
    ]


datas = d.read_dataset("final_sales_only_train.csv")
regions = d.to_numpy(datas[['Region']]).squeeze()
dates = d.to_numpy(datas[['Date']]).squeeze()
ids = d.to_numpy(datas[['StoreID']]).squeeze()

data = sb.SetBuilder(target='NumberOfSales', autoexclude=True, df=datas.copy(), split=(3, 2016, 2, 2018, 12, 2018, 12, 2018))\
    .exclude_list(excluded_feats())\
    .build()

it = 1
yy = []
models = []
for i in range(it):
    bagx, bagy = data.xtr, data.ytr
    dt = nn.MLPRegressor(hidden_layer_sizes=(400, 3),
                         activation='identity',
                         solver='adam',
Esempio n. 11
0
def prepare_out(df):
    y = ds.to_numpy(df[['NumberOfCustomers']]).squeeze()
    return y
Esempio n. 12
0
def prepare_out(ds):
    y = d.to_numpy(ds[['NumberOfCustomers']])
    return y
Esempio n. 13
0
    df = ds.read_dataset("best_for_customers.csv")
    sb.lmplot(x="meancustshop",
              y="meancust_std_shop",
              data=df,
              hue="StoreType_Shopping Center")
    fig = plt.gcf()
    fig.set_size_inches(18, 9)

    if show:
        plt.show()
    if save:
        fig.savefig("meanstdscatterpershop.png")


if __name__ == '__main__':
    import dataset.dataset as d
    import dataset.utility as utils
    import pandas as pd
    ds = d.read_imputed_onehot_dataset()
    monthlyplot(ds)
    y = 2016
    m = 3
    while y != 2018 or m != 3:
        sub_ds = utils.get_frame_in_range(ds, m, y, m, y)
        expected_out = d.to_numpy(sub_ds[['NumberOfSales']]).squeeze()
        print(str(m) + "/" + str(y) + ": ", expected_out.sum())
        m += 1
        if m == 13:
            m = 1
            y += 1
Esempio n. 14
0
def min_per_shop(df, id):
    temp = df[(df[['StoreID']] == id).all(axis=1)]
    temp = d.to_numpy(temp[['NumberOfSales']])
    return temp.min()
Esempio n. 15
0
def max_cust_per_shop(df, id):
    temp = df[(df[['StoreID']] == id).all(axis=1)]
    temp = d.to_numpy(temp[['NumberOfCustomers']])
    return temp.max()