def mean_cust_per_shop_if_holiday(df, data_from=None): if data_from is None: data_from = df df['MeanCustPerShopIfHoliday'] = p.Series(np.zeros(len(df)), df.index) means = dict() num = dict() for i in data_from.index.tolist(): prom = d.content_of(data_from, 'IsHoliday', i) reg = d.content_of(data_from, 'StoreID', i) index = str(reg) val = d.content_of(data_from, 'NumberOfCustomers', i) if prom != 0: try: means[index] += val num[index] += 1 except KeyError: means[index] = val num[index] = 1 for i in df.index.tolist(): prom = d.content_of(df, 'IsHoliday', i) reg = d.content_of(df, 'StoreID', i) if prom != 0: index = str(reg) df.set_value(i, 'MeanCustPerShopIfHoliday', means[index]/num[index]) return df
def mean_std_sales_per_shop_per_day(df, data_from=None): if data_from is None: data_from = df df['MeanSalesPerShopPerDay'] = p.Series(np.zeros(len(df)), df.index) df['StdSalesPerShopPerDay'] = p.Series(np.zeros(len(df)), df.index) means = dict() stds = dict() num = dict() for i in data_from.index.tolist(): id = d.content_of(data_from, 'StoreID', i) day = d.content_of(data_from, 'Day', i) index = str(id) + day val = d.content_of(data_from, 'NumberOfSales', i) try: means[index] += val num[index] += 1 except KeyError: means[index] = val num[index] = 1 for i in df.index.tolist(): id = d.content_of(df, 'StoreID', i) day = d.content_of(df, 'Day', i) index = str(id) + day df.set_value(i, 'MeanSalesPerShopPerDay', means[index]/num[index]) for i in data_from.index.tolist(): id = d.content_of(data_from, 'StoreID', i) day = d.content_of(data_from, 'Day', i) index = str(id) + day val = d.content_of(data_from, 'NumberOfSales', i) try: stds[index] += (val - means[index]/num[index]) * (val - means[index]/num[index]) except KeyError: stds[index] = (val - means[index]/num[index]) * (val - means[index]/num[index]) for i in df.index.tolist(): id = d.content_of(df, 'StoreID', i) day = d.content_of(df, 'Day', i) index = str(id) + day df.set_value(i, 'StdSalesPerShopPerDay', np.sqrt(stds[index]/num[index])) return df
def one_hot_numeric(ds, attr, header): """Transforms the given attribute of the given DataFrame object into one hot encoding. If you plan to use this, don't use split attribute. Returns a DataFrame object.""" vals = d.values_of(ds, attr) new_cols = vals for new in new_cols: ds[header + str(new)] = p.Series(np.zeros(len(ds)), ds.index) for i in ds.index.tolist(): if d.content_of(ds, attr, i) == new: ds.set_value(i, header + str(new), 1) return ds
def mean_sales_per_month_per_region(df, data_from=None): if data_from is None: data_from = df df['MeanSalesPerRegionPerMonth'] = p.Series(np.zeros(len(df)), df.index) means = dict() num = dict() for i in data_from.index.tolist(): month = int(d.content_of(data_from, 'Month', i)) reg = d.content_of(data_from, 'Region', i) index = str(month) + "_" + str(reg) val = d.content_of(data_from, 'NumberOfSales', i) try: means[index] += val num[index] += 1 except KeyError: means[index] = val num[index] = 1 for i in df.index.tolist(): month = int(d.content_of(df, 'Month', i)) reg = d.content_of(df, 'Region', i) index = str(month) + "_" + str(reg) df.set_value(i, 'MeanSalesPerRegionPerMonth', means[index]/num[index]) return df
def mean_cust_per_month_per_shop(df, data_from=None): if data_from is None: data_from = df df['MeanCustPerShopPerMonth'] = p.Series(np.zeros(len(df)), df.index) means = dict() num = dict() for i in data_from.index.tolist(): month = int(d.content_of(data_from, 'Month', i)) reg = d.content_of(data_from, 'StoreID', i) index = str(month) + "_" + str(reg) val = d.content_of(data_from, 'NumberOfCustomers', i) try: means[index] += val num[index] += 1 except KeyError: means[index] = val num[index] = 1 for i in df.index.tolist(): month = int(d.content_of(df, 'Month', i)) reg = d.content_of(df, 'StoreID', i) index = str(month) + "_" + str(reg) df.set_value(i, 'MeanCustPerShopPerMonth', means[index]/num[index]) return df
def add_min_per_shop(df, data_from=None): if data_from is None: data_from = df ids = d.values_of(data_from, 'StoreID') stds = dict() for id in ids: try: _ = stds[str(id)] except KeyError: stds[str(id)] = min_per_shop(data_from, id) df['min_shop'] = p.Series(np.zeros(len(df)), df.index) for i in df.index.tolist(): df.set_value(i, 'min_shop', stds[str(d.content_of(df, 'StoreID', i))]) return df
def add_avg_cust_per_shop(df, data_from=None): if data_from is None: data_from = df ids = d.values_of(data_from, 'StoreID') means = dict() for id in ids: try: _ = means[str(id)] except KeyError: means[str(id)] = average_cust_per_shop(data_from, id) df['meancustshop'] = p.Series(np.zeros(len(df)), df.index) for i in df.index.tolist(): df.set_value(i, 'meancustshop', means[str(d.content_of(df, 'StoreID', i))]) return df
def one_hot(ds, attr, header, split=False): """Transforms the given attribute of the given DataFrame object into one hot encoding. If you plan to use this, don't use split attribute. Returns a DataFrame object.""" vals = d.values_of(ds, attr) if split: new_cols = [] for v in vals: split = v.split("-") for s in split: if not new_cols.__contains__(s): new_cols.append(s) else: new_cols = vals for new in new_cols: ds[header + new] = p.Series(np.zeros(len(ds)), ds.index) for i in ds.index.tolist(): if d.content_of(ds, attr, i).find(new) != -1: ds.set_value(i, header + new, 1) return ds