def get_users_table(): u = CsvL.get_users_anag() f = CsvL.get_customers_anag() p = CsvL.get_province() r = CsvL.get_regions() ur = CsvL.get_user_roles() f = f[['Id', 'Code', 'PdcCode', 'Province']] f = f.rename(columns={'Id': 'FarmaId'}) f = f.rename(columns={'Province': 'ProvId'}) p = p.drop(['Id', 'Nome', 'Longitudine'], axis=1) p = p[['Id_Regione', 'Sigla_automobilistica', 'Latitudine']] p = p.rename(columns={'Sigla_automobilistica': 'ProvId'}) # r['Nome'] = r.Nome.apply(lambda n: n[0:4]) ur = ur.drop(['Id', 'CreatorUserId', 'TenantId'], axis=1) # alcuni utenti tipo 208 hanno più di un ruolo ur = ur.groupby('UserId')['RoleId'].last().reset_index() uf = pd.merge(u, f, left_on=['ClientCode', 'PdcCode'], right_on=['Code', 'PdcCode'], how='left') ufp = pd.merge(uf, p) ufpr = pd.merge(ufp, r) ufprr = pd.merge(ufpr, ur) ufprr.drop(['Id_Regione'], axis=1, inplace=True) ufprr.rename(columns={'Nome': 'Regione'}, inplace=True) ufprr.drop(['Code', 'PdcCode'], axis=1, inplace=True) ufprr = ufprr[ufprr.UserId != 12] # Andrea Dini return ufprr
def add_avatar_data(df, cut_pce=[]): avatar_pce = CsvL.get_avatar_pce() av_anag = CsvL.get_avatar_anag() avatar_pce = pd.merge(avatar_pce, av_anag, left_on='AvatarId', right_on='AvatarId') if len(cut_pce) > 0: avatar_pce = avatar_pce[~avatar_pce.AvatarPce.isin(cut_pce)] on_col = __av_merge_col(df) df = pd.merge(df, avatar_pce, on=on_col, how='left') df = df[~df.AvSessId.isnull()] return df
def get_df_group_prod(include_rare=False): df = get_df() p_anag = CsvL.get_prod_anag() feat_df = df.groupby('ProductId').apply(lambda x: pd.Series( { 'nUsers': x.UserId.nunique(), 'nFarma': x.FarmaId.nunique(), 'nProv': x.ProvId.nunique(), 'nReg': x.Regione.nunique(), 'nAvSess': x.AvSessId.nunique(), 'nSess': x.SessionId.nunique(), 'nTot': x.Id.count(), 'MedianPce': x.AvatarPce.median(), 'MeanPce': x.AvatarPce.mean(), 'nRight': sum(x.ActionType == "RightProduct"), 'NordSud': x.Latitudine.mean() - 42, # 'LatVar': x.Latitudine.var(), 'UserRatio': x.Id.count() / x.UserId.nunique(), 'Ratio': sum(x.ActionType == 'RightProduct') / x.Id.count(), 'Recency': (dm.MAXDATE - x.YMD.max()).days + 1, 'Frequency': x.YMD.nunique() })).reset_index() prod = pd.merge(p_anag, feat_df) if not include_rare: prod = prod[prod.nTot > 2].reset_index(drop=True) return prod
def get_df(max_date=-1): df = CsvL.get_avatar_info() df = dh.add_avatar_data(df, cut_pce=[5, 6, 7]) df = dh.add_session_date(df) df.drop(['AvatarId'], axis=1, inplace=True) df = dm.filter_date(df, 'YMD', max_date) df = Users.merge_users_clean(df) # filtro colonne per primo test sensibilita df = df.drop(['Age', 'YearMonth', 'ClientCode', 'FarmaId', 'Latitudine'], axis=1) return df
def get_df(max_date=-1): df = CsvL.get_prod_history() df = dh.add_avatar_data(df, cut_pce=[5, 6, 7]) __hist_hardfix(df) df = dh.add_session_date(df) df.drop(['AvatarId'], axis=1, inplace=True) df = dm.filter_date(df, 'YMD', max_date) df = Users.merge_users_clean(df) df = df[[ 'Id', 'UserId', 'SessionId', 'ActionType', 'ProductId', 'AvSessId', 'AvatarPce', 'YMD', 'NameSurname', 'Regione', 'RoleId', 'FarmaId', 'Latitudine', 'ProvId', 'Sex' ]] df = dh.add_prod_name(df) return df
def plot_uhist(uid, i_uh, arr_start): i_uh_r = i_uh.rolling(10) i_sumR = i_uh_r.RightCount.sum() i_sumT = i_uh_r.nTot.sum() i_movavg = i_sumR / i_sumT r_obs = range(0, len(i_movavg)) r_obs2 = np.arange(-0.7, len(i_movavg) - 0.7, 1) f = plt.figure(figsize=(9, 8)) ax = f.add_subplot(111) ax2 = ax.twinx() l_hand = [] l_lab = [] line_avg, = ax.plot(r_obs, i_movavg, lw=3, zorder=10, color=co.ab_colors['rosso']) bar_ratio = ax.bar(r_obs2, i_uh.Ratio, color=co.ab_colors['azzurro'], alpha=0.5, width=0.7, align='edge', zorder=1) bar_ntot = ax2.bar(r_obs, i_uh.nTot, color=co.ab_colors['verde'], alpha=0.5, width=0.25, align='edge') weblog = CsvL.get_web_log(uid) ax.set_zorder(ax2.get_zorder() + 1) ax.patch.set_visible(False) l_hand.append(line_avg) l_lab.append('Rolling average') l_hand.append(bar_ratio) l_lab.append('Correctness') l_hand.append(bar_ntot) l_lab.append('Number of products') if weblog is not None: num_web_check = __count_occurrences(arr_start.values, weblog) xval = np.add(np.where(num_web_check > 0), 0.25).squeeze(axis=0) yval = np.ones(len(xval)) * 0.5 sca_web = ax.scatter(xval, yval, marker='d', edgecolors='k', s=150, lw=1, facecolor=co.ab_colors['giallo'], zorder=2) l_hand.append(sca_web) l_lab.append('Website check') ax.legend(tuple(l_hand), tuple(l_lab), fontsize=16) ax.set_ylabel('% of correct recommendations', size=18) ax2.set_ylabel('Number of recommended products', size=18) ax.set_xlabel('Sessions', size=18) ax.set_xticks(r_obs) ax.tick_params(labelsize=16) vals = [0, 0.25, 0.5, 0.75, 1] ax.set_yticks(vals) ax.yaxis.set_major_formatter(ticker.PercentFormatter()) ax.set_yticklabels(['{:,.0%}'.format(x) for x in vals]) ax2.tick_params(labelsize=16) yticks = np.arange(0, i_uh.nTot.max(), 5) yticks = np.append(yticks, i_uh.nTot.max()) ax2.set_yticks(yticks) ax2.yaxis.set_major_formatter(ticker.ScalarFormatter()) lbl = arr_start.dt.strftime('%d/%m/%Y') ax.set_xticklabels(lbl, rotation=45, ha='right', size=15) plt.title("Performance history for user" " {0}".format(Users.get_user_name(uid)), size=25, y=1.02)
def add_session_date(df): ah = CsvL.get_avatar_history() df = pd.merge(df, ah, on=['AvatarId', 'SessionId', 'UserId']) return df
def add_prod_name(df): p_anag = CsvL.get_prod_anag() p_anag = p_anag[['ProductId', 'ProdName']] df = pd.merge(df, p_anag, left_on='ProductId', right_on='ProductId') return df
def get_user_name(user_id): u = CsvL.get_users_anag() ret = u[u.UserId == user_id].NameSurname.values[0] return ret
def get_product_name(product_id): p = CsvL.get_prod_anag() ret = p[p.ProductId == product_id].ProdName.values[0] return ret