def given_alpha(alpha, dataset, recom_list, risk): n_users, n_items, n_rates, indexes, cmpl_rates = dataset risk_name, risk = risk outfile = path.join(alpha_dir, '%s_%.1f.p' % (risk_name, alpha)) # if path.isfile(outfile): # print('%s exists' % (path.basename(outfile))) # return cmpl_cnt = config.count_index(indexes) cmpl_dist = cmpl_cnt / cmpl_cnt.sum() k = config.solve_k(alpha, n_users, n_items, n_rates, cmpl_cnt) cmpl_props = config.complete_prop(alpha, k, indexes) n_rmse, p_rmse, s_rmse, d_rmse = 0.0, 0.0, 0.0, 0.0 for recom in recom_list: recom_name, pred_rates = recom t_risk = config.compute_t(pred_rates, cmpl_rates, risk) dataset = n_users, n_items, n_rates, cmpl_rates, cmpl_cnt, t_risk while True: res = config.eval_wo_error(recom, dataset, cmpl_props, (risk_name, risk)) n_mse, p_mse, s_mse, d_mse, rerun = res if not rerun: break else: print('rerun %s %s' % (risk_name, recom_name)) n_rmse += n_mse p_rmse += p_mse s_rmse += s_mse d_rmse += d_mse n_recoms = len(recom_list) n_rmse = math.sqrt(n_rmse / n_recoms) p_rmse = math.sqrt(p_rmse / n_recoms) s_rmse = math.sqrt(s_rmse / n_recoms) d_rmse = math.sqrt(d_rmse / n_recoms) print('%s alpha=%.1f k=%.4f' % (risk_name, alpha, k)) print(' n=%.4f p=%.4f s=%.4f d=%.4f' % (n_rmse, p_rmse, s_rmse, d_rmse)) print('\n' + '#' * n_hashtag + '\n') return config.make_file_dir(outfile) data = { 'a': alpha, 'k': k, 'n': n_rmse, 'p': p_rmse, 's': s_rmse, 'd': d_rmse, } pickle.dump(data, open(outfile, 'wb'))
def vary_gamma(alpha, gammas, dataset, recom_list, risk): n_users, n_items, n_rates, indexes, cmpl_rates = dataset risk_name, risk = risk cmpl_cnt = config.count_index(indexes) cmpl_dist = cmpl_cnt / cmpl_cnt.sum() k = config.solve_k(alpha, n_users, n_items, n_rates, cmpl_cnt) cmpl_props = config.complete_prop(alpha, k, indexes) outfile = path.join(gamma_dir, '%s_%.1f.p' % (risk_name, alpha)) # if path.isfile(outfile): # print('%s exists' % (path.basename(outfile))) # return n_rmse, p_rmse, s_rmse = 0.0, 0.0, 0.0 d_rmses = np.zeros(len(gammas)) for recom in recom_list: recom_name, pred_rates = recom t_risk = config.compute_t(pred_rates, cmpl_rates, risk) dataset = n_users, n_items, n_rates, cmpl_rates, cmpl_cnt, t_risk res = config.eval_wt_gamma(recom, dataset, cmpl_props, (risk_name, risk), gammas) n_mse, p_mse, s_mse, d_mses = res n_rmse += n_mse p_rmse += p_mse s_rmse += s_mse d_rmses += d_mses n_recoms = len(recom_list) n_rmse = math.sqrt(n_rmse / n_recoms) p_rmse = math.sqrt(p_rmse / n_recoms) s_rmse = math.sqrt(s_rmse / n_recoms) d_rmses = np.sqrt(d_rmses / n_recoms) print('%s alpha=%.1f k=%.4f' % (risk_name, alpha, k)) print(' n=%.4f p=%.4f s=%.4f' % (n_rmse, p_rmse, s_rmse)) for gamma, d_rmse in zip(gammas, d_rmses): print(' gamma=%.1f d=%.4f' % (gamma, d_rmse)) print('\n' + '#' * n_hashtag + '\n') config.make_file_dir(outfile) data = { 'a': alpha, 'k': k, 'n': n_rmse, 'p': p_rmse, 's': s_rmse, 'd': d_rmses, } pickle.dump(data, open(outfile, 'wb'))
def vary_error(n_mcar, dataset, recom_list, risk): n_users, n_items, n_rates, indexes, cmpl_rates= dataset risk_name, risk = risk cmpl_cnt = config.count_index(indexes) cmpl_dist = cmpl_cnt / cmpl_cnt.sum() k = config.solve_k(alpha, n_users, n_items, n_rates, cmpl_cnt) cmpl_props = config.complete_prop(alpha, k, indexes) # [stdout.write('%.4f ' % p) for p in set(cmpl_props)] # stdout.write('\n') outfile = path.join(error_dir, '%s_%03d.p' % (risk_name, n_mcar)) p_o = n_rates / (n_users * n_items) # print('p_o: %.4f' % p_o) p_r = np.copy(cmpl_dist) stdout.write('p_r:') [stdout.write(' %.4f' % p) for p in p_r] stdout.write('\n') p_o_r = config.compute_prop(alpha, k) stdout.write('p_o_r:') [stdout.write(' %.4f' % p) for p in p_o_r] stdout.write('\n') p_r_o = p_o_r * p_r / p_o # stdout.write('p_r_o:') # [stdout.write(' %.4f' % p) for p in p_r_o] # stdout.write('\n') np.random.seed(0) while True: mcar_rates = np.random.choice(max_rate-min_rate+1, n_mcar, p=list(p_r)) p_r = np.zeros(max_rate-min_rate+1) for rid in mcar_rates: p_r[rid] += 1 p_r /= p_r.sum() success = True if p_r.min() == 0.0: success = False if p_r[0] <= p_r[1]: # if p_r[0] < p_r[1]: success = False if p_r[1] <= p_r[2]: # if p_r[1] < p_r[2]: success = False if p_r[2] <= p_r[3]: # if p_r[2] < p_r[3]: success = False if p_r[3] <= p_r[4]: # if p_r[3] < p_r[4]: success = False if success: break stdout.write('p_r:') [stdout.write(' %.4f' % p) for p in p_r] stdout.write('\n') p_o_r = p_r_o * p_o / p_r # p_o_r = np.asarray([0.0163, 0.0697, 0.1140, 0.0268, 0.0113]) stdout.write('p_o_r:') [stdout.write(' %.4f' % p) for p in p_o_r] stdout.write('\n') rate_props = config.complete_prop(alpha, k, indexes, rate_props=p_o_r) # [stdout.write('%.4f ' % p) for p in set(rate_props)] # stdout.write('\n') e_rmses, d_rmses, omegas = [], [], [] for omega in v_omegas: e_rmse = 0.0 d_rmse = 0.0 for recom in recom_list: recom_name, pred_rates = recom t_risk = config.compute_t(pred_rates, cmpl_rates, risk) dataset = n_users, n_items, n_rates, cmpl_rates, cmpl_cnt, t_risk res = config.eval_wt_mcar(recom, dataset, cmpl_props, rate_props, (risk_name, risk), omega) e_mse, d_mse = res e_rmse += e_mse d_rmse += d_mse n_recoms = len(recom_list) e_rmse = math.sqrt(e_rmse / n_recoms) d_rmse = math.sqrt(d_rmse / n_recoms) print('%s alpha=%.1f k=%.4f' % (risk_name, alpha, k)) print(' o=%.2f e=%.4f d=%.4f' % (omega, e_rmse, d_rmse)) print('\n' + '#'*n_hashtag + '\n') e_rmses.append(e_rmse) d_rmses.append(d_rmse) omegas.append(omega) # break if path.isfile(outfile): print('%s exists' % path.basename(outfile)) return data = { 'e': e_rmses, 'd': d_rmses, 'o': omegas, } config.make_file_dir(outfile) pickle.dump(data, open(outfile, 'wb'))
p_o_r = p_r_o * p_o / p_r stdout.write('p_o_r:') [stdout.write(' %.4f' % p) for p in p_o_r] stdout.write('\n') rate_props = config.complete_prop(alpha, k, indexes, rate_props=p_o_r) [stdout.write('%.4f ' % p) for p in set(rate_props)] stdout.write('\n') e_rmses, d_rmses, omegas = [], [], [] for omega in np.arange(0.0, 1.05, 0.1): e_rmse = 0.0 d_rmse = 0.0 for recom in recom_list: recom_name, pred_rates = recom t_risk = config.compute_t(pred_rates, cmpl_rates, risk) dataset = n_users, n_items, n_rates, cmpl_rates, cmpl_cnt, t_risk res = config.eval_wt_mcar(recom, dataset, cmpl_props, rate_props, (risk_name, risk), omega) e_mse, d_mse = res e_rmse += e_mse d_rmse += d_mse n_recoms = len(recom_list) e_rmse = math.sqrt(e_rmse / n_recoms) d_rmse = math.sqrt(d_rmse / n_recoms) print('%s alpha=%.1f k=%.4f' % (risk_name, alpha, k)) print(' e=%.4f d=%.4f' % (e_rmse, d_rmse)) print('\n' + '#' * n_hashtag + '\n') e_rmses.append(e_rmse)
def given_beta(alpha, beta, dataset, recom_list, risk): n_users, n_items, n_rates, indexes, cmpl_rates= dataset risk_name, risk = risk outfile = path.join(beta_dir, '%s_%.1f.p' % (risk_name, beta)) # if path.isfile(outfile): # print('%s exists' % (path.basename(outfile))) # return cmpl_cnt = config.count_index(indexes) cmpl_dist = cmpl_cnt / cmpl_cnt.sum() k = config.solve_k(alpha, n_users, n_items, n_rates, cmpl_cnt) cmpl_props = config.complete_prop(alpha, k, indexes) n_rmse, p_rmse, s_rmse, d_rmse = 0.0, 0.0, 0.0, 0.0 for recom in recom_list: recom_name, pred_rates = recom t_risk = config.compute_t(pred_rates, cmpl_rates, risk) dataset = n_users, n_items, n_rates, cmpl_rates, cmpl_cnt, t_risk res = config.eval_wo_error(recom, dataset, cmpl_props, (risk_name, risk), beta=beta) n_mse, p_mse, s_mse, d_mse, rerun = res print('%s %s p=%.8f s=%.8f d=%.8f' % (risk_name, recom_name, p_mse, s_mse, d_mse)) ''' max_try = 1 n_mses, p_mses, s_mses, d_mses = [], [], [], [] for i in range(max_try): res = config.eval_wo_error(recom, dataset, cmpl_props, (risk_name, risk), beta=beta) n_mse, p_mse, s_mse, d_mse, rerun = res n_mses.append(n_mse) p_mses.append(p_mse) s_mses.append(s_mse) d_mses.append(d_mse) d_minus_s, min_idx = d_mses[0] - s_mses[0], 0 for i in range(1, max_try): if d_mses[i] - s_mses[i] < d_minus_s: d_minus_s, min_idx = d_mses[i] - s_mses[i], i i = min_idx n_mse, p_mse, s_mse, d_mse = n_mses[i], p_mses[i], s_mses[i], d_mses[i] print('select %s %s p=%.8f s=%.8f d=%.8f' % (risk_name, recom_name, p_mse, s_mse, d_mse)) ''' n_rmse += n_mse p_rmse += p_mse s_rmse += s_mse d_rmse += d_mse n_recoms = len(recom_list) n_rmse = math.sqrt(n_rmse / n_recoms) p_rmse = math.sqrt(p_rmse / n_recoms) s_rmse = math.sqrt(s_rmse / n_recoms) d_rmse = math.sqrt(d_rmse / n_recoms) print('%s alpha=%.1f k=%.4f beta=%.1f' % (risk_name, alpha, k, beta)) print(' n=%.4f p=%.4f s=%.4f d=%.4f' % (n_rmse, p_rmse, s_rmse, d_rmse)) print('\n' + '#'*n_hashtag + '\n') return config.make_file_dir(outfile) data = { 'a': alpha, 'k': k, 'b': beta, 'n': n_rmse, 'p': p_rmse, 's': s_rmse, 'd': d_rmse, } pickle.dump(data, open(outfile, 'wb'))