def reorder_attributes(ds, list_order): new = p.DataFrame(index=ds.index) for attr in list_order: try: new[attr] = p.Series(d.to_numpy(ds[[attr]]).squeeze(), index=new.index) except Exception: new[attr] = p.Series(np.zeros(len(new)), index=new.index) return new
def drop_useless(ds): x = d.to_numpy( ds.drop([ 'NumberOfSales', 'StoreID', 'Date', 'IsOpen', 'Region', 'CloudCover', 'Max_Sea_Level_PressurehPa', 'WindDirDegrees', 'Max_Dew_PointC', 'Mean_Sea_Level_PressurehPa', 'Min_Sea_Level_PressurehPa', 'Day' ], axis=1)) return x
def predict(self, row): sc = (row['StoreType_Shopping Center'] == 1).all() clean_row = data_manager.to_numpy( row.drop([ 'NumberOfSales', 'Month', 'NumberOfCustomers', 'StoreID', 'Date', 'IsOpen', 'Region', 'CloudCover', 'Max_Sea_Level_PressurehPa', 'WindDirDegrees', 'Max_Dew_PointC', 'Mean_Sea_Level_PressurehPa', 'Min_Sea_Level_PressurehPa', 'Day' ], axis=1)).squeeze() clean_row = clean_row.reshape([1, -1]) if sc: return self.predict_sc(clean_row).squeeze() else: return self.predict_oth(clean_row).squeeze()
def opendaybeforegeneralplot(df, storeID, show=True, save=False): IsOpenList = list(to_numpy(df["IsOpen"]).squeeze()) IsOpenList.pop() IsOpenList.insert(0, 1) dftoplot = pd.DataFrame(np.array(IsOpenList).reshape(523021, 1), columns=["OpenDayBefore"]) dftoplot = df.assign(OpenDayBefore=dftoplot) dftoplotpershop = dftoplot[dftoplot["StoreID"] == storeID] dftoplotpershop = dftoplot[dftoplot["IsOpen"] == 1] sb.boxplot(x="OpenDayBefore", y="NumberOfSales", data=dftoplotpershop).set_title("Sales / Shop Availability") fig = plt.gcf() fig.set_size_inches(18, 9) if show: plt.show() if save: fig.savefig("opendaybeforegeneralplot.png")
def opendaybeforeonweekplot(df, storeID, show=True, save=False): df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y') df['Day'] = df['Date'].dt.weekday_name df = df.drop(df[df["Day"] == "Sunday"].index) IsOpenList = list(to_numpy(df["IsOpen"]).squeeze()) IsOpenList.pop() IsOpenList.insert(0, 1) dftoplot = pd.DataFrame(np.array(IsOpenList).reshape(448375, 1), columns=["OpenDayBefore"]) dftoplot = df.assign(OpenDayBefore=dftoplot) dftoplotpershop = dftoplot[dftoplot["StoreID"] == storeID] dftoplotpershop = dftoplot[dftoplot["IsOpen"] == 1] sb.boxplot( x="OpenDayBefore", y="NumberOfSales", data=dftoplotpershop).set_title("Sales / Shop Availability (-Sun)") fig = plt.gcf() fig.set_size_inches(18, 9) if show: plt.show() if save: fig.savefig("opendaybeforeonweekplot.png")
def gen_pandas_cols(): shops_col = [] months_col = [] sales_col = [] for i in range(len(shops)): for j in range(len(months)): shops_col.append(shops[i]) months_col.append(months[j]) sales_col.append(totp[i][j]) return np.array(shops_col), np.array(months_col), np.array(sales_col) name_precitions_csv = "final_sales_predictions_new1.csv" name_original_csv = "final_for_sales_test_r.csv" preds = d.to_numpy(d.read_dataset(name_precitions_csv)).squeeze() orig = d.read_dataset(name_original_csv) dates = d.to_numpy(orig[['Date']]).squeeze() regions = d.to_numpy(orig[['Region']]).squeeze() ids = d.to_numpy(orig[['StoreID']]).squeeze() error, totp, totr, shops, months = eva.region_error(preds, preds, regions, ids, dates, True) print(error) print(totp) print(shops) print(months)
def tss(df, attr): arr = d.to_numpy(df[[attr]]).squeeze() mean = arr.mean() return np.sum(np.square(arr - mean))
def rss(df, preds, attr): arr = d.to_numpy(df[[attr]]).squeeze() return np.sum(np.square(arr - preds))
def prepare_out(ds): y = d.to_numpy(ds[['NumberOfSales']]) return y
import evaluation.evaluation as ev_cust import dataset.utility as utils import pandas import sklearn.neural_network as nn def excluded_feats(): return [ "Month", 'Max_Humidity', 'Max_TemperatureC', 'Max_VisibilityKm', 'Max_Wind_SpeedKm_h', 'Min_Dew_PointC', 'Min_Humidity', 'Min_TemperatureC', 'Min_VisibilitykM', 'NumberOfCustomers' ] datas = d.read_dataset("final_sales_only_train.csv") regions = d.to_numpy(datas[['Region']]).squeeze() dates = d.to_numpy(datas[['Date']]).squeeze() ids = d.to_numpy(datas[['StoreID']]).squeeze() data = sb.SetBuilder(target='NumberOfSales', autoexclude=True, df=datas.copy(), split=(3, 2016, 2, 2018, 12, 2018, 12, 2018))\ .exclude_list(excluded_feats())\ .build() it = 1 yy = [] models = [] for i in range(it): bagx, bagy = data.xtr, data.ytr dt = nn.MLPRegressor(hidden_layer_sizes=(400, 3), activation='identity', solver='adam',
def prepare_out(df): y = ds.to_numpy(df[['NumberOfCustomers']]).squeeze() return y
def prepare_out(ds): y = d.to_numpy(ds[['NumberOfCustomers']]) return y
df = ds.read_dataset("best_for_customers.csv") sb.lmplot(x="meancustshop", y="meancust_std_shop", data=df, hue="StoreType_Shopping Center") fig = plt.gcf() fig.set_size_inches(18, 9) if show: plt.show() if save: fig.savefig("meanstdscatterpershop.png") if __name__ == '__main__': import dataset.dataset as d import dataset.utility as utils import pandas as pd ds = d.read_imputed_onehot_dataset() monthlyplot(ds) y = 2016 m = 3 while y != 2018 or m != 3: sub_ds = utils.get_frame_in_range(ds, m, y, m, y) expected_out = d.to_numpy(sub_ds[['NumberOfSales']]).squeeze() print(str(m) + "/" + str(y) + ": ", expected_out.sum()) m += 1 if m == 13: m = 1 y += 1
def min_per_shop(df, id): temp = df[(df[['StoreID']] == id).all(axis=1)] temp = d.to_numpy(temp[['NumberOfSales']]) return temp.min()
def max_cust_per_shop(df, id): temp = df[(df[['StoreID']] == id).all(axis=1)] temp = d.to_numpy(temp[['NumberOfCustomers']]) return temp.max()