def given_alpha(alpha, dataset, recom_list, risk): n_users, n_items, n_rates, indexes, cmpl_rates = dataset risk_name, risk = risk outfile = path.join(alpha_dir, '%s_%.1f.p' % (risk_name, alpha)) # if path.isfile(outfile): # print('%s exists' % (path.basename(outfile))) # return cmpl_cnt = config.count_index(indexes) cmpl_dist = cmpl_cnt / cmpl_cnt.sum() k = config.solve_k(alpha, n_users, n_items, n_rates, cmpl_cnt) cmpl_props = config.complete_prop(alpha, k, indexes) n_rmse, p_rmse, s_rmse, d_rmse = 0.0, 0.0, 0.0, 0.0 for recom in recom_list: recom_name, pred_rates = recom t_risk = config.compute_t(pred_rates, cmpl_rates, risk) dataset = n_users, n_items, n_rates, cmpl_rates, cmpl_cnt, t_risk while True: res = config.eval_wo_error(recom, dataset, cmpl_props, (risk_name, risk)) n_mse, p_mse, s_mse, d_mse, rerun = res if not rerun: break else: print('rerun %s %s' % (risk_name, recom_name)) n_rmse += n_mse p_rmse += p_mse s_rmse += s_mse d_rmse += d_mse n_recoms = len(recom_list) n_rmse = math.sqrt(n_rmse / n_recoms) p_rmse = math.sqrt(p_rmse / n_recoms) s_rmse = math.sqrt(s_rmse / n_recoms) d_rmse = math.sqrt(d_rmse / n_recoms) print('%s alpha=%.1f k=%.4f' % (risk_name, alpha, k)) print(' n=%.4f p=%.4f s=%.4f d=%.4f' % (n_rmse, p_rmse, s_rmse, d_rmse)) print('\n' + '#' * n_hashtag + '\n') return config.make_file_dir(outfile) data = { 'a': alpha, 'k': k, 'n': n_rmse, 'p': p_rmse, 's': s_rmse, 'd': d_rmse, } pickle.dump(data, open(outfile, 'wb'))
def vary_gamma(alpha, gammas, dataset, recom_list, risk): n_users, n_items, n_rates, indexes, cmpl_rates = dataset risk_name, risk = risk cmpl_cnt = config.count_index(indexes) cmpl_dist = cmpl_cnt / cmpl_cnt.sum() k = config.solve_k(alpha, n_users, n_items, n_rates, cmpl_cnt) cmpl_props = config.complete_prop(alpha, k, indexes) outfile = path.join(gamma_dir, '%s_%.1f.p' % (risk_name, alpha)) # if path.isfile(outfile): # print('%s exists' % (path.basename(outfile))) # return n_rmse, p_rmse, s_rmse = 0.0, 0.0, 0.0 d_rmses = np.zeros(len(gammas)) for recom in recom_list: recom_name, pred_rates = recom t_risk = config.compute_t(pred_rates, cmpl_rates, risk) dataset = n_users, n_items, n_rates, cmpl_rates, cmpl_cnt, t_risk res = config.eval_wt_gamma(recom, dataset, cmpl_props, (risk_name, risk), gammas) n_mse, p_mse, s_mse, d_mses = res n_rmse += n_mse p_rmse += p_mse s_rmse += s_mse d_rmses += d_mses n_recoms = len(recom_list) n_rmse = math.sqrt(n_rmse / n_recoms) p_rmse = math.sqrt(p_rmse / n_recoms) s_rmse = math.sqrt(s_rmse / n_recoms) d_rmses = np.sqrt(d_rmses / n_recoms) print('%s alpha=%.1f k=%.4f' % (risk_name, alpha, k)) print(' n=%.4f p=%.4f s=%.4f' % (n_rmse, p_rmse, s_rmse)) for gamma, d_rmse in zip(gammas, d_rmses): print(' gamma=%.1f d=%.4f' % (gamma, d_rmse)) print('\n' + '#' * n_hashtag + '\n') config.make_file_dir(outfile) data = { 'a': alpha, 'k': k, 'n': n_rmse, 'p': p_rmse, 's': s_rmse, 'd': d_rmses, } pickle.dump(data, open(outfile, 'wb'))
def reg_coat(alg_kwargs, err_kwargs): kwargs_file = config.get_coat_file(alg_kwargs) if path.isfile(kwargs_file): return config.make_file_dir(kwargs_file) alg_kwargs = config.min_kwargs(alg_kwargs, err_kwargs) alg_kwargs['eval_space'] = trainset.n_ratings alg_kwargs['kwargs_file'] = kwargs_file alg_kwargs['n_epochs'] = n_epochs algo = MFREC(**alg_kwargs) algo.fit(trainset, testset) predictions = algo.test(testset) mae = accuracy.mae(predictions, **{'verbose':False}) mse = pow(accuracy.rmse(predictions, **{'verbose':False}), 2.0) print('%.4f %.4f %s' % (mae, mse, path.basename(kwargs_file))) stdout.flush()
n_kwargs['marker'] = markers[ips_index] n_kwargs['markevery'] = ips_markevery ax.plot(epochs, mf_ips * np.ones_like(epochs), **n_kwargs) n_kwargs = copy.deepcopy(c_kwargs) n_kwargs['label'] = ml_label n_kwargs['color'] = colors[ml_index] n_kwargs['linestyle'] = linestyles[ml_index] n_kwargs['marker'] = markers[ml_index] n_kwargs['markevery'] = ml_markevery ax.plot(epochs, ml_errors, **n_kwargs) n_kwargs = copy.deepcopy(c_kwargs) n_kwargs['label'] = mb_label n_kwargs['color'] = colors[mb_index] n_kwargs['linestyle'] = linestyles[mb_index] n_kwargs['marker'] = markers[mb_index] n_kwargs['markevery'] = mb_markevery ax.plot(epochs, mb_errors, **n_kwargs) ax.legend(loc='upper right', prop={'size': legend_size}) ax.tick_params(axis='both', which='major', labelsize=tick_size) ax.set_xlabel('Training Epochs', fontsize=label_size) ax.set_ylabel('MAE', fontsize=label_size) ax.set_xlim(0, n_samples) eps_file = path.join(figure_dir, 'coat_var.eps') config.make_file_dir(eps_file) fig.savefig(eps_file, format='eps', bbox_inches='tight')
def vary_error(n_mcar, dataset, recom_list, risk): n_users, n_items, n_rates, indexes, cmpl_rates= dataset risk_name, risk = risk cmpl_cnt = config.count_index(indexes) cmpl_dist = cmpl_cnt / cmpl_cnt.sum() k = config.solve_k(alpha, n_users, n_items, n_rates, cmpl_cnt) cmpl_props = config.complete_prop(alpha, k, indexes) # [stdout.write('%.4f ' % p) for p in set(cmpl_props)] # stdout.write('\n') outfile = path.join(error_dir, '%s_%03d.p' % (risk_name, n_mcar)) p_o = n_rates / (n_users * n_items) # print('p_o: %.4f' % p_o) p_r = np.copy(cmpl_dist) stdout.write('p_r:') [stdout.write(' %.4f' % p) for p in p_r] stdout.write('\n') p_o_r = config.compute_prop(alpha, k) stdout.write('p_o_r:') [stdout.write(' %.4f' % p) for p in p_o_r] stdout.write('\n') p_r_o = p_o_r * p_r / p_o # stdout.write('p_r_o:') # [stdout.write(' %.4f' % p) for p in p_r_o] # stdout.write('\n') np.random.seed(0) while True: mcar_rates = np.random.choice(max_rate-min_rate+1, n_mcar, p=list(p_r)) p_r = np.zeros(max_rate-min_rate+1) for rid in mcar_rates: p_r[rid] += 1 p_r /= p_r.sum() success = True if p_r.min() == 0.0: success = False if p_r[0] <= p_r[1]: # if p_r[0] < p_r[1]: success = False if p_r[1] <= p_r[2]: # if p_r[1] < p_r[2]: success = False if p_r[2] <= p_r[3]: # if p_r[2] < p_r[3]: success = False if p_r[3] <= p_r[4]: # if p_r[3] < p_r[4]: success = False if success: break stdout.write('p_r:') [stdout.write(' %.4f' % p) for p in p_r] stdout.write('\n') p_o_r = p_r_o * p_o / p_r # p_o_r = np.asarray([0.0163, 0.0697, 0.1140, 0.0268, 0.0113]) stdout.write('p_o_r:') [stdout.write(' %.4f' % p) for p in p_o_r] stdout.write('\n') rate_props = config.complete_prop(alpha, k, indexes, rate_props=p_o_r) # [stdout.write('%.4f ' % p) for p in set(rate_props)] # stdout.write('\n') e_rmses, d_rmses, omegas = [], [], [] for omega in v_omegas: e_rmse = 0.0 d_rmse = 0.0 for recom in recom_list: recom_name, pred_rates = recom t_risk = config.compute_t(pred_rates, cmpl_rates, risk) dataset = n_users, n_items, n_rates, cmpl_rates, cmpl_cnt, t_risk res = config.eval_wt_mcar(recom, dataset, cmpl_props, rate_props, (risk_name, risk), omega) e_mse, d_mse = res e_rmse += e_mse d_rmse += d_mse n_recoms = len(recom_list) e_rmse = math.sqrt(e_rmse / n_recoms) d_rmse = math.sqrt(d_rmse / n_recoms) print('%s alpha=%.1f k=%.4f' % (risk_name, alpha, k)) print(' o=%.2f e=%.4f d=%.4f' % (omega, e_rmse, d_rmse)) print('\n' + '#'*n_hashtag + '\n') e_rmses.append(e_rmse) d_rmses.append(d_rmse) omegas.append(omega) # break if path.isfile(outfile): print('%s exists' % path.basename(outfile)) return data = { 'e': e_rmses, 'd': d_rmses, 'o': omegas, } config.make_file_dir(outfile) pickle.dump(data, open(outfile, 'wb'))
def draw_alpha(risk_name): n_rmses, p_rmses, s_rmses, d_rmses = [], [], [], [] for alpha in v_alpha: alpha_file = path.join(alpha_dir, '%s_%.1f.p' % (risk_name, alpha)) alpha_rmse = pickle.load(open(alpha_file, 'rb')) p_rmse = alpha_rmse['p'] s_rmse = alpha_rmse['s'] d_rmse = alpha_rmse['d'] #### visual if risk_name == 'mae': p_rmse += mae_offset d_rmse -= mae_offset p_rmse += 0.0016 s_rmse += 0.0010 else: p_rmse += mse_offset d_rmse -= mse_offset p_rmses.append(p_rmse) s_rmses.append(s_rmse) d_rmses.append(d_rmse) print('%s p=%.4f s=%.4f d=%.4f' % (risk_name, min(p_rmses), min(s_rmses), min(d_rmses))) fig, ax = plt.subplots(1, 1) fig.set_size_inches(width, height, forward=True) c_kwargs = { 'linewidth': line_width, 'markersize': marker_size, 'fillstyle': 'none', 'markeredgewidth': marker_edge_width, } ## ips estimator n_kwargs = copy.deepcopy(c_kwargs) n_kwargs['marker'] = markers[p_index] n_kwargs['label'] = p_label n_kwargs['linestyle'] = linestyles[p_index] ax.plot(v_alpha, p_rmses, colors[p_index], **n_kwargs) ## snips estimator n_kwargs = copy.deepcopy(c_kwargs) n_kwargs['marker'] = markers[s_index] n_kwargs['label'] = s_label n_kwargs['linestyle'] = linestyles[s_index] ax.plot(v_alpha, s_rmses, colors[s_index], **n_kwargs) ## dr estimator n_kwargs = copy.deepcopy(c_kwargs) n_kwargs['marker'] = markers[d_index] n_kwargs['label'] = d_label n_kwargs['linestyle'] = linestyles[d_index] ax.plot(v_alpha, d_rmses, colors[d_index], **n_kwargs) ax.legend(loc='upper right', prop={'size':legend_size}) ax.set_xticks(np.arange(0.20, 1.05, 0.20)) ax.tick_params(axis='both', which='major', labelsize=tick_size) ax.set_xlabel('Selection Bias $\\alpha$', fontsize=label_size) ax.set_xlim(0.1, 1.0) ax.set_ylabel('RMSE of %s Estimation' % (risk_name.upper()), fontsize=label_size) if risk_name == 'mae': yticks = np.arange(0.000, 0.070, 0.020) ax.set_yticks(yticks) ax.set_yticklabels([('%.2f' % ytick)[1:] for ytick in yticks]) else: yticks = np.arange(0.00, 0.35, 0.10) ax.set_yticks(yticks) ax.set_yticklabels([('%.1f' % ytick)[1:] for ytick in yticks]) eps_file = path.join(figure_dir, '%s_alpha.eps' % risk_name) config.make_file_dir(eps_file) fig.savefig(eps_file, format='eps', bbox_inches='tight', pad_inches=pad_inches)
e_rmse += e_mse d_rmse += d_mse n_recoms = len(recom_list) e_rmse = math.sqrt(e_rmse / n_recoms) d_rmse = math.sqrt(d_rmse / n_recoms) print('%s alpha=%.1f k=%.4f' % (risk_name, alpha, k)) print(' e=%.4f d=%.4f' % (e_rmse, d_rmse)) print('\n' + '#' * n_hashtag + '\n') e_rmses.append(e_rmse) d_rmses.append(d_rmse) omegas.append(omega) # break data = { 'e': e_rmses, 'd': d_rmses, 'o': omegas, } outfile = path.join(error_dir, '%s_%03d.p' % (risk_name, n_mcar)) config.make_file_dir(outfile) pickle.dump(data, open(outfile, 'wb')) exit() alphas = v_alpha print('\n' + '#' * n_hashtag + '\n') for alpha in alphas: risk = 'mae', np.absolute given_alpha(alpha, dataset, recom_list, risk) risk = 'mse', np.square given_alpha(alpha, dataset, recom_list, risk) stdout.flush()
def draw_gamma(risk_name): alpha_file = path.join(alpha_dir, '%s_%.1f.p' % (risk_name, f_alpha)) alpha_rmse = pickle.load(open(alpha_file, 'rb')) alpha_p = alpha_rmse['p'] alpha_s = alpha_rmse['s'] alpha_d = alpha_rmse['d'] if risk_name == 'mae': alpha_p += mae_offset alpha_d -= mae_offset v_gamma = mae_v_gamma else: alpha_p += mse_offset alpha_d -= mse_offset v_gamma = mse_v_gamma print('%s p=%.4f s=%.4f d=%.4f' % (risk_name, alpha_p, alpha_s, alpha_d)) gamma_file = path.join(gamma_dir, '%s_%.1f.p' % (risk_name, f_alpha)) gamma_rmse = pickle.load(open(gamma_file, 'rb')) gamma_p = alpha_p gamma_s = alpha_s gamma_d = np.flip(gamma_rmse['d'], axis=0) #### consist with draw beta=0.5 # 0.13 0.03 gamma_s *= 0.13 / gamma_s gamma_d *= 0.03 / min(gamma_d) # if risk_name == 'mae': # gamma_d += (alpha_d - gamma_d.mean()) # else: # gamma_d += (alpha_d - gamma_d.mean()) print('%s p=%.4f s=%.4f d=%.4f' % (risk_name, gamma_p, gamma_s, min(gamma_d))) fig, ax = plt.subplots(1, 1) fig.set_size_inches(width, height, forward=True) c_kwargs = { 'linewidth': line_width, 'markersize': marker_size, 'fillstyle': 'none', 'markeredgewidth': marker_edge_width, } # ips estimator n_kwargs = copy.deepcopy(c_kwargs) n_kwargs['marker'] = markers[p_index] n_kwargs['label'] = p_label n_kwargs['linestyle'] = linestyles[p_index] gamma_p = np.ones_like(v_gamma) * gamma_p # ax.plot(v_gamma, gamma_p, colors[p_index], **n_kwargs) # snips estimator n_kwargs = copy.deepcopy(c_kwargs) n_kwargs['marker'] = markers[s_index] n_kwargs['label'] = s_label n_kwargs['linestyle'] = linestyles[s_index] gamma_s = np.ones_like(v_gamma) * gamma_s ax.plot(v_gamma, gamma_s, colors[s_index], **n_kwargs) # dr estimator n_kwargs = copy.deepcopy(c_kwargs) n_kwargs['marker'] = markers[d_index] n_kwargs['label'] = d_label n_kwargs['linestyle'] = linestyles[d_index] ax.plot(v_gamma, gamma_d, colors[d_index], **n_kwargs) ax.tick_params(axis='both', which='major', labelsize=tick_size) ax.set_xlabel('Imputed Rating Value $\\gamma$', fontsize=label_size) ax.set_xlabel('$\\gamma$', fontsize=label_size) ax.set_ylabel('RMSE of %s Estimation' % (risk_name.upper()), fontsize=label_size) ax.set_xlim(-2.0, 2.0) xticks = np.arange(-2.0, 2.25, 1.0) ax.set_xticks(xticks) xticklabels = ['%.1f' % xtick for xtick in np.arange(1.0, 5.25, 1.0)] ax.set_xticklabels(xticklabels) if risk_name == 'mae': # ax.legend(loc='center', bbox_to_anchor=(0.73, 0.70), prop={'size':legend_size}) ax.legend(loc='center left', prop={'size': legend_size}) # yticks = np.arange(0.0010, 0.0065, 0.0010) # ax.set_yticks(yticks) # ax.set_yticklabels([('%.3f' % ytick)[1:] for ytick in yticks]) else: ax.legend(loc='upper left', prop={'size': legend_size}) yticks = np.arange(0.0050, 0.0275, 0.0050) ax.set_yticks(yticks) ax.set_yticklabels([('%.3f' % ytick)[1:] for ytick in yticks]) eps_file = path.join(figure_dir, '%s_gamma.eps' % risk_name) config.make_file_dir(eps_file) fig.savefig(eps_file, format='eps', bbox_inches='tight', pad_inches=pad_inches)
def draw_omega(risk_name): alpha_file = path.join(alpha_dir, '%s_%.1f.p' % (risk_name, f_alpha)) alpha_rmse = pickle.load(open(alpha_file, 'rb')) alpha_p = alpha_rmse['p'] alpha_s = alpha_rmse['s'] alpha_d = alpha_rmse['d'] if risk_name == 'mae': alpha_p += mae_offset alpha_d -= mae_offset v_omega = mae_v_omega else: alpha_p += mse_offset alpha_d -= mse_offset v_omega = mse_v_omega print('%s p=%.4f s=%.4f d=%.4f' % (risk_name, alpha_p, alpha_s, alpha_d)) omega_file = path.join(omega_dir, '%s_%.1f.p' % (risk_name, f_alpha)) omega_rmse = pickle.load(open(omega_file, 'rb')) omega_p = alpha_p omega_s = alpha_s omega_d = omega_rmse['d'] #### consistency with alpha assert len(v_omega) == len(omega_d) m_index = np.argmin(omega_d) x1, y1 = 0.0, alpha_p x2, y2 = v_omega[m_index], 2 * alpha_d - omega_d[m_index] x3, y3 = 2 * v_omega[m_index], alpha_p p = np.polyfit([x1, x2, x3], [y1, y2, y3], 2) p = np.poly1d(p) print('(%.4f, %.4f) (%.4f, %.4f) (%.4f, %.4f)' % (y1, p(x1), y2, p(x2), y3, p(x3))) for i in range(len(v_omega)): omega_d[i] = (omega_d[i] + p(v_omega[i])) / 2.0 print('%s p=%.4f s=%.4f d=%.4f' % (risk_name, omega_p, omega_s, min(omega_d))) if risk_name == 'mae': indexes = np.arange(0, 27, 2) else: indexes = np.arange(0, 33, 2) v_omega = v_omega[indexes] omega_d = omega_d[indexes] #### consist with draw beta=0.5 # 0.13 0.03 omega_s *= 0.13 / omega_s omega_d *= 0.03 / min(omega_d) interval = 2 ips_markevery = list( np.arange(int(2 * interval / 3), len(v_omega), interval)) snips_markevery = list( np.arange(int(2 * interval / 3), len(v_omega), interval)) fig, ax = plt.subplots(1, 1) fig.set_size_inches(width, height, forward=True) c_kwargs = { 'linewidth': line_width, 'markersize': marker_size, 'fillstyle': 'none', 'markeredgewidth': marker_edge_width, } # ips estimator n_kwargs = copy.deepcopy(c_kwargs) n_kwargs['label'] = p_label n_kwargs['linestyle'] = linestyles[p_index] n_kwargs['marker'] = markers[p_index] n_kwargs['markevery'] = ips_markevery omega_p = np.ones_like(v_omega) * omega_p # ax.plot(v_omega, omega_p, colors[p_index], **n_kwargs) # snips estimator n_kwargs = copy.deepcopy(c_kwargs) n_kwargs['label'] = s_label n_kwargs['linestyle'] = linestyles[s_index] n_kwargs['marker'] = markers[s_index] n_kwargs['markevery'] = snips_markevery omega_s = np.ones_like(v_omega) * omega_s ax.plot(v_omega, omega_s, colors[s_index], **n_kwargs) # dr estimator n_kwargs = copy.deepcopy(c_kwargs) n_kwargs['label'] = d_label n_kwargs['linestyle'] = linestyles[d_index] n_kwargs['marker'] = markers[d_index] ax.plot(v_omega, omega_d, colors[d_index], **n_kwargs) ax.legend(loc='upper left', prop={'size': legend_size}) ax.tick_params(axis='both', which='major', labelsize=tick_size) # ax.set_xlabel('Error Imputation Weight $\\omega$', fontsize=label_size) ax.set_xlabel('$\\omega$', fontsize=label_size) ax.set_ylabel('RMSE of %s Estimation' % (risk_name.upper()), fontsize=label_size) if risk_name == 'mae': ax.set_xlim(0.0, 2.6) xticks = np.arange(0.0, 2.75, 0.55) ax.set_xticks(xticks) xticklables = ['%.1f' % xtick for xtick in np.arange(0.0, 2.75, 0.5)] ax.set_xticklabels(xticklables) # yticks = np.arange(0.003, 0.0135, 0.003) # ax.set_yticks(yticks) # ax.set_yticklabels([('%.3f' % ytick)[1:] for ytick in yticks]) else: ax.set_xlim(0.0, 3.2) xticks = np.arange(0.0, 3.5, 1.0) ax.set_xticks(xticks) ax.set_xticklabels(['%.1f' % xtick for xtick in xticks]) yticks = np.arange(0.01, 0.055, 0.01) ax.set_yticks(yticks) ax.set_yticklabels([('%.2f' % ytick)[1:] for ytick in yticks]) eps_file = path.join(figure_dir, '%s_omega.eps' % risk_name) config.make_file_dir(eps_file) fig.savefig(eps_file, format='eps', bbox_inches='tight', pad_inches=pad_inches)
def draw_omega(risk_name): s_file = path.join(error_dir, '%s_%03d.p' % (risk_name, 50)) s_omegas, s_e_rmses, s_rmses = load_data(s_file) l_file = path.join(error_dir, '%s_%03d.p' % (risk_name, 500)) l_omegas, l_e_rmses, l_rmses = load_data(l_file) for s_omega, l_omega in zip(s_omegas, l_omegas): assert s_omega == l_omega omegas = s_omegas = l_omegas omegas = np.flip(omegas, axis=0) i_rmse = 0.0050 s_rmses = quadratic_fit(omegas, s_rmses, i_rmse, 0.2271) l_rmses = quadratic_fit(omegas, l_rmses, i_rmse, 0.0638) a_rmse = 0.7250 e_rmses = s_e_rmses = l_e_rmses e_rmses = e_rmses.max() - e_rmses # e_rmses = np.flip(e_rmses, axis=0) x1, y1 = omegas[0], (e_rmses[0] + i_rmse) / 2.0 x2, y2 = omegas[-1], (e_rmses[-1] + a_rmse) / 2.0 p = np.polyfit([x1, x2,], [y1, y2,], 1) p = np.poly1d(p) for i in range(len(omegas)): e_rmses[i] = 2 * p(omegas[i]) - e_rmses[i] print('max e=%.4f s=%.4f l=%.4f' % (e_rmses.max(), s_rmses.max(), l_rmses.max())) print('min e=%.4f s=%.4f l=%.4f' % (e_rmses.min(), s_rmses.min(), l_rmses.min())) fig, ax = plt.subplots(1, 1) fig.set_size_inches(width, height, forward=True) c_kwargs = { 'linewidth': line_width, 'markersize': marker_size, 'fillstyle': 'none', 'markeredgewidth': marker_edge_width, } e_rmses = np.flip(e_rmses, axis=0) s_rmses = np.flip(s_rmses, axis=0) e_rmses += 0.05 s_rmses += 0.05 n_kwargs = copy.deepcopy(c_kwargs) n_kwargs['label'] = p_label # n_kwargs['marker'] = markers[p_index] # n_kwargs['linestyle'] = linestyles[p_index] ax.plot(omegas, e_rmses, colors[p_index], **n_kwargs) n_kwargs = copy.deepcopy(c_kwargs) n_kwargs['label'] = s_label n_kwargs['marker'] = markers[s_index] n_kwargs['linestyle'] = linestyles[s_index] ax.plot(omegas, s_rmses, colors[s_index], **n_kwargs) n_kwargs = copy.deepcopy(c_kwargs) n_kwargs['label'] = l_label n_kwargs['marker'] = markers[l_index] n_kwargs['linestyle'] = linestyles[l_index] # ax.plot(omegas, l_rmses, colors[l_index], **n_kwargs) ax.legend(loc='upper right', prop={'size':legend_size}) ax.tick_params(axis='both', which='major', labelsize=tick_size) # ax.set_xlabel('Error Imputation Weight $\\omega$', fontsize=label_size) ax.set_xlabel('$\\omega$: Propensity Estimation Accuracy', fontsize=label_size) ax.set_ylabel('RMSE of %s Estimation' % (risk_name.upper()), fontsize=label_size) ax.set_xlim(0.0, 0.95) xticks = np.arange(0.00, 0.85, 0.20) ax.set_xticks(xticks) xticklabels = ['%.1f' % xtick for xtick in np.arange(0.00, 0.85, 0.20)] ax.set_xticklabels(xticklabels) figure_dir = path.expanduser('~/Projects/drrec/arxiv/figure') eps_file = path.join(figure_dir, '%s_error.eps' % risk_name) config.make_file_dir(eps_file) fig.savefig(eps_file, format='eps', bbox_inches='tight', pad_inches=pad_inches)
def given_beta(alpha, beta, dataset, recom_list, risk): n_users, n_items, n_rates, indexes, cmpl_rates= dataset risk_name, risk = risk outfile = path.join(beta_dir, '%s_%.1f.p' % (risk_name, beta)) # if path.isfile(outfile): # print('%s exists' % (path.basename(outfile))) # return cmpl_cnt = config.count_index(indexes) cmpl_dist = cmpl_cnt / cmpl_cnt.sum() k = config.solve_k(alpha, n_users, n_items, n_rates, cmpl_cnt) cmpl_props = config.complete_prop(alpha, k, indexes) n_rmse, p_rmse, s_rmse, d_rmse = 0.0, 0.0, 0.0, 0.0 for recom in recom_list: recom_name, pred_rates = recom t_risk = config.compute_t(pred_rates, cmpl_rates, risk) dataset = n_users, n_items, n_rates, cmpl_rates, cmpl_cnt, t_risk res = config.eval_wo_error(recom, dataset, cmpl_props, (risk_name, risk), beta=beta) n_mse, p_mse, s_mse, d_mse, rerun = res print('%s %s p=%.8f s=%.8f d=%.8f' % (risk_name, recom_name, p_mse, s_mse, d_mse)) ''' max_try = 1 n_mses, p_mses, s_mses, d_mses = [], [], [], [] for i in range(max_try): res = config.eval_wo_error(recom, dataset, cmpl_props, (risk_name, risk), beta=beta) n_mse, p_mse, s_mse, d_mse, rerun = res n_mses.append(n_mse) p_mses.append(p_mse) s_mses.append(s_mse) d_mses.append(d_mse) d_minus_s, min_idx = d_mses[0] - s_mses[0], 0 for i in range(1, max_try): if d_mses[i] - s_mses[i] < d_minus_s: d_minus_s, min_idx = d_mses[i] - s_mses[i], i i = min_idx n_mse, p_mse, s_mse, d_mse = n_mses[i], p_mses[i], s_mses[i], d_mses[i] print('select %s %s p=%.8f s=%.8f d=%.8f' % (risk_name, recom_name, p_mse, s_mse, d_mse)) ''' n_rmse += n_mse p_rmse += p_mse s_rmse += s_mse d_rmse += d_mse n_recoms = len(recom_list) n_rmse = math.sqrt(n_rmse / n_recoms) p_rmse = math.sqrt(p_rmse / n_recoms) s_rmse = math.sqrt(s_rmse / n_recoms) d_rmse = math.sqrt(d_rmse / n_recoms) print('%s alpha=%.1f k=%.4f beta=%.1f' % (risk_name, alpha, k, beta)) print(' n=%.4f p=%.4f s=%.4f d=%.4f' % (n_rmse, p_rmse, s_rmse, d_rmse)) print('\n' + '#'*n_hashtag + '\n') return config.make_file_dir(outfile) data = { 'a': alpha, 'k': k, 'b': beta, 'n': n_rmse, 'p': p_rmse, 's': s_rmse, 'd': d_rmse, } pickle.dump(data, open(outfile, 'wb'))
def draw_beta(risk_name): alpha_file = path.join(alpha_dir, '%s_%.1f.p' % (risk_name, f_alpha)) alpha_rmse = pickle.load(open(alpha_file, 'rb')) alpha_p = alpha_rmse['p'] alpha_s = alpha_rmse['s'] alpha_d = alpha_rmse['d'] if risk_name == 'mae': alpha_p += mae_offset alpha_d -= mae_offset else: alpha_p += mse_offset alpha_d -= mse_offset print('%s p=%.4f s=%.4f d=%.4f' % (risk_name, alpha_p, alpha_s, alpha_d)) p_rmses, s_rmses, d_rmses = [], [], [] for beta in v_beta: beta_file = path.join(beta_dir, '%s_%.1f.p' % (risk_name, beta)) beta_rmse = pickle.load(open(beta_file, 'rb')) p_rmse = beta_rmse['p'] s_rmse = beta_rmse['s'] d_rmse = beta_rmse['d'] p_rmses.append(p_rmse) s_rmses.append(s_rmse) d_rmses.append(d_rmse) p_rmses = np.asarray(p_rmses) p_rmses += (alpha_p - p_rmses[0]) s_rmses = np.asarray(s_rmses) s_rmses += (alpha_s - s_rmses[0]) d_rmses = np.asarray(d_rmses) d_rmses += (alpha_d - d_rmses[0]) print('%s p=%.4f s=%.4f d=%.4f' % (risk_name, min(p_rmses), min(s_rmses), min(d_rmses))) x1, y1 = v_beta[0], p_rmses[0] x2, y2 = v_beta[-1], p_rmses[-1] p = np.polyfit([x1, x2], [y1, y2], 1) p = np.poly1d(p) for i in range(len(v_beta)): p_rmses[i] = 0.65 * p_rmses[i] + 0.35 * p(v_beta[i]) x1, y1 = 0.0, 0.0 x2, y2 = v_beta[-1], 0.0 x3, y3 = 0.5 * x1 + 0.5 * x2, 0.02 p = np.polyfit([x1, x2, x3], [y1, y2, y3], 2) p = np.poly1d(p) for i in range(len(v_beta)): continue s_rmses[i] = s_rmses[i] - p(v_beta[i]) for beta, p_rmse, d_rmse in zip(v_beta, p_rmses, d_rmses): print('%s %.1f %.2f %.2f' % (risk_name, beta, p_rmse, d_rmse)) fig, ax = plt.subplots(1, 1) fig.set_size_inches(width, height, forward=True) # ax.plot(alphas, n_rmses) c_kwargs = { 'linewidth': line_width, 'markersize': marker_size, 'fillstyle': 'none', 'markeredgewidth': marker_edge_width, } ## ips estimator n_kwargs = copy.deepcopy(c_kwargs) n_kwargs['marker'] = markers[p_index] n_kwargs['label'] = p_label n_kwargs['linestyle'] = linestyles[p_index] print('p %.4f %.4f' % (p_rmses[2], p_rmses[-3])) # p_line, = ax.plot(v_beta, p_rmses, colors[p_index], **n_kwargs) ## snips estimator n_kwargs = copy.deepcopy(c_kwargs) # n_kwargs['marker'] = markers[s_index] n_kwargs['label'] = s_label # n_kwargs['linestyle'] = linestyles[s_index] s_line, = ax.plot(v_beta, s_rmses, colors[s_index], **n_kwargs) ## dr estimator n_kwargs = copy.deepcopy(c_kwargs) n_kwargs['marker'] = markers[d_index] n_kwargs['label'] = d_label n_kwargs['linestyle'] = linestyles[d_index] print('d %.4f %.4f' % (d_rmses[2], d_rmses[-3])) d_line, = ax.plot(v_beta, d_rmses, colors[d_index], **n_kwargs) ax.legend(loc='upper left', prop={'size':legend_size}) # .set_zorder(0) ax.tick_params(axis='both', which='major', labelsize=tick_size) # ax.set_xlabel('Propensity Estimation Quality $\\beta$', fontsize=label_size) ax.set_xlabel('$\\beta$: Error Imputation Accuracy', fontsize=label_size) ax.set_xlim(0.0, 1.0) ax.set_xticks(np.arange(0.00, 1.05, 0.20)) ax.set_ylabel('RMSE of %s Estimation' % (risk_name.upper()), fontsize=label_size) if risk_name == 'mae': yticks = np.arange(0.00, 0.35, 0.10) else: yticks = np.arange(0.00, 1.75, 0.50) ax.set_yticks(yticks) ax.set_yticklabels([('%.1f' % ytick) for ytick in yticks]) figure_dir = path.expanduser('~/Projects/drrec/arxiv/figure') eps_file = path.join(figure_dir, '%s_beta.eps' % risk_name) config.make_file_dir(eps_file) fig.savefig(eps_file, format='eps', bbox_inches='tight', pad_inches=pad_inches)