Ejemplo n.º 1
0
def given_alpha(alpha, dataset, recom_list, risk):
    n_users, n_items, n_rates, indexes, cmpl_rates = dataset
    risk_name, risk = risk

    outfile = path.join(alpha_dir, '%s_%.1f.p' % (risk_name, alpha))
    # if path.isfile(outfile):
    #   print('%s exists' % (path.basename(outfile)))
    #   return

    cmpl_cnt = config.count_index(indexes)
    cmpl_dist = cmpl_cnt / cmpl_cnt.sum()
    k = config.solve_k(alpha, n_users, n_items, n_rates, cmpl_cnt)
    cmpl_props = config.complete_prop(alpha, k, indexes)

    n_rmse, p_rmse, s_rmse, d_rmse = 0.0, 0.0, 0.0, 0.0
    for recom in recom_list:
        recom_name, pred_rates = recom
        t_risk = config.compute_t(pred_rates, cmpl_rates, risk)
        dataset = n_users, n_items, n_rates, cmpl_rates, cmpl_cnt, t_risk

        while True:
            res = config.eval_wo_error(recom, dataset, cmpl_props,
                                       (risk_name, risk))
            n_mse, p_mse, s_mse, d_mse, rerun = res
            if not rerun:
                break
            else:
                print('rerun %s %s' % (risk_name, recom_name))

        n_rmse += n_mse
        p_rmse += p_mse
        s_rmse += s_mse
        d_rmse += d_mse
    n_recoms = len(recom_list)
    n_rmse = math.sqrt(n_rmse / n_recoms)
    p_rmse = math.sqrt(p_rmse / n_recoms)
    s_rmse = math.sqrt(s_rmse / n_recoms)
    d_rmse = math.sqrt(d_rmse / n_recoms)

    print('%s alpha=%.1f k=%.4f' % (risk_name, alpha, k))
    print('  n=%.4f p=%.4f s=%.4f d=%.4f' % (n_rmse, p_rmse, s_rmse, d_rmse))
    print('\n' + '#' * n_hashtag + '\n')

    return
    config.make_file_dir(outfile)
    data = {
        'a': alpha,
        'k': k,
        'n': n_rmse,
        'p': p_rmse,
        's': s_rmse,
        'd': d_rmse,
    }
    pickle.dump(data, open(outfile, 'wb'))
Ejemplo n.º 2
0
def vary_gamma(alpha, gammas, dataset, recom_list, risk):
    n_users, n_items, n_rates, indexes, cmpl_rates = dataset
    risk_name, risk = risk
    cmpl_cnt = config.count_index(indexes)
    cmpl_dist = cmpl_cnt / cmpl_cnt.sum()
    k = config.solve_k(alpha, n_users, n_items, n_rates, cmpl_cnt)
    cmpl_props = config.complete_prop(alpha, k, indexes)

    outfile = path.join(gamma_dir, '%s_%.1f.p' % (risk_name, alpha))
    # if path.isfile(outfile):
    #   print('%s exists' % (path.basename(outfile)))
    #   return

    n_rmse, p_rmse, s_rmse = 0.0, 0.0, 0.0
    d_rmses = np.zeros(len(gammas))
    for recom in recom_list:
        recom_name, pred_rates = recom
        t_risk = config.compute_t(pred_rates, cmpl_rates, risk)
        dataset = n_users, n_items, n_rates, cmpl_rates, cmpl_cnt, t_risk

        res = config.eval_wt_gamma(recom, dataset, cmpl_props,
                                   (risk_name, risk), gammas)
        n_mse, p_mse, s_mse, d_mses = res

        n_rmse += n_mse
        p_rmse += p_mse
        s_rmse += s_mse
        d_rmses += d_mses
    n_recoms = len(recom_list)
    n_rmse = math.sqrt(n_rmse / n_recoms)
    p_rmse = math.sqrt(p_rmse / n_recoms)
    s_rmse = math.sqrt(s_rmse / n_recoms)
    d_rmses = np.sqrt(d_rmses / n_recoms)

    print('%s alpha=%.1f k=%.4f' % (risk_name, alpha, k))
    print('  n=%.4f p=%.4f s=%.4f' % (n_rmse, p_rmse, s_rmse))
    for gamma, d_rmse in zip(gammas, d_rmses):
        print('  gamma=%.1f d=%.4f' % (gamma, d_rmse))
    print('\n' + '#' * n_hashtag + '\n')

    config.make_file_dir(outfile)
    data = {
        'a': alpha,
        'k': k,
        'n': n_rmse,
        'p': p_rmse,
        's': s_rmse,
        'd': d_rmses,
    }
    pickle.dump(data, open(outfile, 'wb'))
Ejemplo n.º 3
0
def reg_coat(alg_kwargs, err_kwargs):
  kwargs_file = config.get_coat_file(alg_kwargs)
  if path.isfile(kwargs_file):
    return
  config.make_file_dir(kwargs_file)

  alg_kwargs = config.min_kwargs(alg_kwargs, err_kwargs)
  alg_kwargs['eval_space'] = trainset.n_ratings
  alg_kwargs['kwargs_file'] = kwargs_file
  alg_kwargs['n_epochs'] = n_epochs

  algo = MFREC(**alg_kwargs)
  algo.fit(trainset, testset)
  predictions = algo.test(testset)
  mae = accuracy.mae(predictions, **{'verbose':False})
  mse = pow(accuracy.rmse(predictions, **{'verbose':False}), 2.0)
  print('%.4f %.4f %s' % (mae, mse, path.basename(kwargs_file)))
  stdout.flush()
Ejemplo n.º 4
0
n_kwargs['marker'] = markers[ips_index]
n_kwargs['markevery'] = ips_markevery
ax.plot(epochs, mf_ips * np.ones_like(epochs), **n_kwargs)

n_kwargs = copy.deepcopy(c_kwargs)
n_kwargs['label'] = ml_label
n_kwargs['color'] = colors[ml_index]
n_kwargs['linestyle'] = linestyles[ml_index]
n_kwargs['marker'] = markers[ml_index]
n_kwargs['markevery'] = ml_markevery
ax.plot(epochs, ml_errors, **n_kwargs)

n_kwargs = copy.deepcopy(c_kwargs)
n_kwargs['label'] = mb_label
n_kwargs['color'] = colors[mb_index]
n_kwargs['linestyle'] = linestyles[mb_index]
n_kwargs['marker'] = markers[mb_index]
n_kwargs['markevery'] = mb_markevery
ax.plot(epochs, mb_errors, **n_kwargs)

ax.legend(loc='upper right', prop={'size': legend_size})
ax.tick_params(axis='both', which='major', labelsize=tick_size)
ax.set_xlabel('Training Epochs', fontsize=label_size)
ax.set_ylabel('MAE', fontsize=label_size)

ax.set_xlim(0, n_samples)

eps_file = path.join(figure_dir, 'coat_var.eps')
config.make_file_dir(eps_file)
fig.savefig(eps_file, format='eps', bbox_inches='tight')
Ejemplo n.º 5
0
def vary_error(n_mcar, dataset, recom_list, risk):
  n_users, n_items, n_rates, indexes, cmpl_rates= dataset
  risk_name, risk = risk
  cmpl_cnt = config.count_index(indexes)
  cmpl_dist = cmpl_cnt / cmpl_cnt.sum()
  k = config.solve_k(alpha, n_users, n_items, n_rates, cmpl_cnt)
  cmpl_props = config.complete_prop(alpha, k, indexes)
  # [stdout.write('%.4f ' % p) for p in set(cmpl_props)]
  # stdout.write('\n')
  
  outfile = path.join(error_dir, '%s_%03d.p' % (risk_name, n_mcar))

  p_o = n_rates / (n_users * n_items)
  # print('p_o: %.4f' % p_o)
  p_r = np.copy(cmpl_dist)
  stdout.write('p_r:')
  [stdout.write(' %.4f' % p) for p in p_r]
  stdout.write('\n')
  p_o_r = config.compute_prop(alpha, k)
  stdout.write('p_o_r:')
  [stdout.write(' %.4f' % p) for p in p_o_r]
  stdout.write('\n')
  p_r_o = p_o_r * p_r / p_o
  # stdout.write('p_r_o:')
  # [stdout.write(' %.4f' % p) for p in p_r_o]
  # stdout.write('\n')
  np.random.seed(0)
  while True:
    mcar_rates = np.random.choice(max_rate-min_rate+1, n_mcar, p=list(p_r))
    p_r = np.zeros(max_rate-min_rate+1)
    for rid in mcar_rates:
      p_r[rid] += 1
    p_r /= p_r.sum()
    success = True
    if p_r.min() == 0.0:
      success = False
    if p_r[0] <= p_r[1]:
    # if p_r[0] < p_r[1]:
      success = False
    if p_r[1] <= p_r[2]:
    # if p_r[1] < p_r[2]:
      success = False
    if p_r[2] <= p_r[3]:
    # if p_r[2] < p_r[3]:
      success = False
    if p_r[3] <= p_r[4]:
    # if p_r[3] < p_r[4]:
      success = False
    if success:
      break
  stdout.write('p_r:')
  [stdout.write(' %.4f' % p) for p in p_r]
  stdout.write('\n')
  p_o_r = p_r_o * p_o / p_r
  # p_o_r = np.asarray([0.0163, 0.0697, 0.1140, 0.0268, 0.0113])
  stdout.write('p_o_r:')
  [stdout.write(' %.4f' % p) for p in p_o_r]
  stdout.write('\n')

  rate_props = config.complete_prop(alpha, k, indexes, rate_props=p_o_r)
  # [stdout.write('%.4f ' % p) for p in set(rate_props)]
  # stdout.write('\n')

  e_rmses, d_rmses, omegas = [], [], []
  for omega in v_omegas:
    e_rmse = 0.0
    d_rmse = 0.0
    for recom in recom_list:
      recom_name, pred_rates = recom
      t_risk = config.compute_t(pred_rates, cmpl_rates, risk)
      dataset = n_users, n_items, n_rates, cmpl_rates, cmpl_cnt, t_risk

      res = config.eval_wt_mcar(recom, dataset, cmpl_props, rate_props, (risk_name, risk), omega)
      e_mse, d_mse = res

      e_rmse += e_mse
      d_rmse += d_mse
    n_recoms = len(recom_list)
    e_rmse = math.sqrt(e_rmse / n_recoms)
    d_rmse = math.sqrt(d_rmse / n_recoms)
    print('%s alpha=%.1f k=%.4f' % (risk_name, alpha, k))
    print('  o=%.2f e=%.4f d=%.4f' % (omega, e_rmse, d_rmse))
    print('\n' + '#'*n_hashtag + '\n')
    e_rmses.append(e_rmse)
    d_rmses.append(d_rmse)
    omegas.append(omega)
    # break
  if path.isfile(outfile):
    print('%s exists' % path.basename(outfile))
    return
  data = {
    'e': e_rmses,
    'd': d_rmses,
    'o': omegas,
  }
  config.make_file_dir(outfile)
  pickle.dump(data, open(outfile, 'wb'))
Ejemplo n.º 6
0
def draw_alpha(risk_name):
  n_rmses, p_rmses, s_rmses, d_rmses = [], [], [], []
  for alpha in v_alpha:
    alpha_file = path.join(alpha_dir, '%s_%.1f.p' % (risk_name, alpha))
    alpha_rmse = pickle.load(open(alpha_file, 'rb'))

    p_rmse = alpha_rmse['p']
    s_rmse = alpha_rmse['s']
    d_rmse = alpha_rmse['d']

    #### visual
    if risk_name == 'mae':
      p_rmse += mae_offset
      d_rmse -= mae_offset

      p_rmse += 0.0016
      s_rmse += 0.0010
    else:
      p_rmse += mse_offset
      d_rmse -= mse_offset

    p_rmses.append(p_rmse)
    s_rmses.append(s_rmse)
    d_rmses.append(d_rmse)
  print('%s p=%.4f s=%.4f d=%.4f' % (risk_name, min(p_rmses), min(s_rmses), min(d_rmses)))

  fig, ax = plt.subplots(1, 1)
  fig.set_size_inches(width, height, forward=True)
  c_kwargs = {
    'linewidth': line_width,
    'markersize': marker_size,
    'fillstyle': 'none',
    'markeredgewidth': marker_edge_width,
  }

  ## ips estimator
  n_kwargs = copy.deepcopy(c_kwargs)
  n_kwargs['marker'] = markers[p_index]
  n_kwargs['label'] = p_label
  n_kwargs['linestyle'] = linestyles[p_index]
  ax.plot(v_alpha, p_rmses, colors[p_index], **n_kwargs)

  ## snips estimator
  n_kwargs = copy.deepcopy(c_kwargs)
  n_kwargs['marker'] = markers[s_index]
  n_kwargs['label'] = s_label
  n_kwargs['linestyle'] = linestyles[s_index]
  ax.plot(v_alpha, s_rmses, colors[s_index], **n_kwargs)

  ## dr estimator
  n_kwargs = copy.deepcopy(c_kwargs)
  n_kwargs['marker'] = markers[d_index]
  n_kwargs['label'] = d_label
  n_kwargs['linestyle'] = linestyles[d_index]
  ax.plot(v_alpha, d_rmses, colors[d_index], **n_kwargs)

  ax.legend(loc='upper right', prop={'size':legend_size})
  ax.set_xticks(np.arange(0.20, 1.05, 0.20))
  ax.tick_params(axis='both', which='major', labelsize=tick_size)
  ax.set_xlabel('Selection Bias $\\alpha$', fontsize=label_size)
  ax.set_xlim(0.1, 1.0)

  ax.set_ylabel('RMSE of %s Estimation' % (risk_name.upper()), fontsize=label_size)

  if risk_name == 'mae':
    yticks = np.arange(0.000, 0.070, 0.020)
    ax.set_yticks(yticks)
    ax.set_yticklabels([('%.2f' % ytick)[1:] for ytick in yticks])
  else:
    yticks = np.arange(0.00, 0.35, 0.10)
    ax.set_yticks(yticks)
    ax.set_yticklabels([('%.1f' % ytick)[1:] for ytick in yticks])

  eps_file = path.join(figure_dir, '%s_alpha.eps' % risk_name)
  config.make_file_dir(eps_file)
  fig.savefig(eps_file, format='eps', bbox_inches='tight', pad_inches=pad_inches)
Ejemplo n.º 7
0
        e_rmse += e_mse
        d_rmse += d_mse
    n_recoms = len(recom_list)
    e_rmse = math.sqrt(e_rmse / n_recoms)
    d_rmse = math.sqrt(d_rmse / n_recoms)
    print('%s alpha=%.1f k=%.4f' % (risk_name, alpha, k))
    print('  e=%.4f d=%.4f' % (e_rmse, d_rmse))
    print('\n' + '#' * n_hashtag + '\n')
    e_rmses.append(e_rmse)
    d_rmses.append(d_rmse)
    omegas.append(omega)
    # break
data = {
    'e': e_rmses,
    'd': d_rmses,
    'o': omegas,
}
outfile = path.join(error_dir, '%s_%03d.p' % (risk_name, n_mcar))
config.make_file_dir(outfile)
pickle.dump(data, open(outfile, 'wb'))
exit()
alphas = v_alpha
print('\n' + '#' * n_hashtag + '\n')
for alpha in alphas:
    risk = 'mae', np.absolute
    given_alpha(alpha, dataset, recom_list, risk)
    risk = 'mse', np.square
    given_alpha(alpha, dataset, recom_list, risk)
    stdout.flush()
Ejemplo n.º 8
0
def draw_gamma(risk_name):
    alpha_file = path.join(alpha_dir, '%s_%.1f.p' % (risk_name, f_alpha))
    alpha_rmse = pickle.load(open(alpha_file, 'rb'))
    alpha_p = alpha_rmse['p']
    alpha_s = alpha_rmse['s']
    alpha_d = alpha_rmse['d']
    if risk_name == 'mae':
        alpha_p += mae_offset
        alpha_d -= mae_offset
        v_gamma = mae_v_gamma
    else:
        alpha_p += mse_offset
        alpha_d -= mse_offset
        v_gamma = mse_v_gamma
    print('%s p=%.4f s=%.4f d=%.4f' % (risk_name, alpha_p, alpha_s, alpha_d))

    gamma_file = path.join(gamma_dir, '%s_%.1f.p' % (risk_name, f_alpha))
    gamma_rmse = pickle.load(open(gamma_file, 'rb'))

    gamma_p = alpha_p
    gamma_s = alpha_s
    gamma_d = np.flip(gamma_rmse['d'], axis=0)

    #### consist with draw beta=0.5
    # 0.13 0.03
    gamma_s *= 0.13 / gamma_s
    gamma_d *= 0.03 / min(gamma_d)
    # if risk_name == 'mae':
    #   gamma_d += (alpha_d - gamma_d.mean())
    # else:
    #   gamma_d += (alpha_d - gamma_d.mean())
    print('%s p=%.4f s=%.4f d=%.4f' %
          (risk_name, gamma_p, gamma_s, min(gamma_d)))

    fig, ax = plt.subplots(1, 1)
    fig.set_size_inches(width, height, forward=True)
    c_kwargs = {
        'linewidth': line_width,
        'markersize': marker_size,
        'fillstyle': 'none',
        'markeredgewidth': marker_edge_width,
    }

    # ips estimator
    n_kwargs = copy.deepcopy(c_kwargs)
    n_kwargs['marker'] = markers[p_index]
    n_kwargs['label'] = p_label
    n_kwargs['linestyle'] = linestyles[p_index]
    gamma_p = np.ones_like(v_gamma) * gamma_p
    # ax.plot(v_gamma, gamma_p, colors[p_index], **n_kwargs)

    # snips estimator
    n_kwargs = copy.deepcopy(c_kwargs)
    n_kwargs['marker'] = markers[s_index]
    n_kwargs['label'] = s_label
    n_kwargs['linestyle'] = linestyles[s_index]
    gamma_s = np.ones_like(v_gamma) * gamma_s
    ax.plot(v_gamma, gamma_s, colors[s_index], **n_kwargs)

    # dr estimator
    n_kwargs = copy.deepcopy(c_kwargs)
    n_kwargs['marker'] = markers[d_index]
    n_kwargs['label'] = d_label
    n_kwargs['linestyle'] = linestyles[d_index]
    ax.plot(v_gamma, gamma_d, colors[d_index], **n_kwargs)

    ax.tick_params(axis='both', which='major', labelsize=tick_size)
    ax.set_xlabel('Imputed Rating Value $\\gamma$', fontsize=label_size)
    ax.set_xlabel('$\\gamma$', fontsize=label_size)

    ax.set_ylabel('RMSE of %s Estimation' % (risk_name.upper()),
                  fontsize=label_size)

    ax.set_xlim(-2.0, 2.0)
    xticks = np.arange(-2.0, 2.25, 1.0)
    ax.set_xticks(xticks)
    xticklabels = ['%.1f' % xtick for xtick in np.arange(1.0, 5.25, 1.0)]
    ax.set_xticklabels(xticklabels)
    if risk_name == 'mae':
        # ax.legend(loc='center', bbox_to_anchor=(0.73, 0.70), prop={'size':legend_size})
        ax.legend(loc='center left', prop={'size': legend_size})
        # yticks = np.arange(0.0010, 0.0065, 0.0010)
        # ax.set_yticks(yticks)
        # ax.set_yticklabels([('%.3f' % ytick)[1:] for ytick in yticks])
    else:
        ax.legend(loc='upper left', prop={'size': legend_size})
        yticks = np.arange(0.0050, 0.0275, 0.0050)
        ax.set_yticks(yticks)
        ax.set_yticklabels([('%.3f' % ytick)[1:] for ytick in yticks])

    eps_file = path.join(figure_dir, '%s_gamma.eps' % risk_name)
    config.make_file_dir(eps_file)
    fig.savefig(eps_file,
                format='eps',
                bbox_inches='tight',
                pad_inches=pad_inches)
Ejemplo n.º 9
0
def draw_omega(risk_name):
    alpha_file = path.join(alpha_dir, '%s_%.1f.p' % (risk_name, f_alpha))
    alpha_rmse = pickle.load(open(alpha_file, 'rb'))
    alpha_p = alpha_rmse['p']
    alpha_s = alpha_rmse['s']
    alpha_d = alpha_rmse['d']
    if risk_name == 'mae':
        alpha_p += mae_offset
        alpha_d -= mae_offset
        v_omega = mae_v_omega
    else:
        alpha_p += mse_offset
        alpha_d -= mse_offset
        v_omega = mse_v_omega
    print('%s p=%.4f s=%.4f d=%.4f' % (risk_name, alpha_p, alpha_s, alpha_d))

    omega_file = path.join(omega_dir, '%s_%.1f.p' % (risk_name, f_alpha))
    omega_rmse = pickle.load(open(omega_file, 'rb'))

    omega_p = alpha_p
    omega_s = alpha_s
    omega_d = omega_rmse['d']

    #### consistency with alpha
    assert len(v_omega) == len(omega_d)
    m_index = np.argmin(omega_d)
    x1, y1 = 0.0, alpha_p
    x2, y2 = v_omega[m_index], 2 * alpha_d - omega_d[m_index]
    x3, y3 = 2 * v_omega[m_index], alpha_p
    p = np.polyfit([x1, x2, x3], [y1, y2, y3], 2)
    p = np.poly1d(p)
    print('(%.4f, %.4f) (%.4f, %.4f) (%.4f, %.4f)' %
          (y1, p(x1), y2, p(x2), y3, p(x3)))
    for i in range(len(v_omega)):
        omega_d[i] = (omega_d[i] + p(v_omega[i])) / 2.0

    print('%s p=%.4f s=%.4f d=%.4f' %
          (risk_name, omega_p, omega_s, min(omega_d)))

    if risk_name == 'mae':
        indexes = np.arange(0, 27, 2)
    else:
        indexes = np.arange(0, 33, 2)
    v_omega = v_omega[indexes]
    omega_d = omega_d[indexes]

    #### consist with draw beta=0.5
    # 0.13 0.03
    omega_s *= 0.13 / omega_s
    omega_d *= 0.03 / min(omega_d)

    interval = 2
    ips_markevery = list(
        np.arange(int(2 * interval / 3), len(v_omega), interval))
    snips_markevery = list(
        np.arange(int(2 * interval / 3), len(v_omega), interval))

    fig, ax = plt.subplots(1, 1)
    fig.set_size_inches(width, height, forward=True)
    c_kwargs = {
        'linewidth': line_width,
        'markersize': marker_size,
        'fillstyle': 'none',
        'markeredgewidth': marker_edge_width,
    }

    # ips estimator
    n_kwargs = copy.deepcopy(c_kwargs)
    n_kwargs['label'] = p_label
    n_kwargs['linestyle'] = linestyles[p_index]
    n_kwargs['marker'] = markers[p_index]
    n_kwargs['markevery'] = ips_markevery
    omega_p = np.ones_like(v_omega) * omega_p
    # ax.plot(v_omega, omega_p, colors[p_index], **n_kwargs)

    # snips estimator
    n_kwargs = copy.deepcopy(c_kwargs)
    n_kwargs['label'] = s_label
    n_kwargs['linestyle'] = linestyles[s_index]
    n_kwargs['marker'] = markers[s_index]
    n_kwargs['markevery'] = snips_markevery
    omega_s = np.ones_like(v_omega) * omega_s
    ax.plot(v_omega, omega_s, colors[s_index], **n_kwargs)

    # dr estimator
    n_kwargs = copy.deepcopy(c_kwargs)
    n_kwargs['label'] = d_label
    n_kwargs['linestyle'] = linestyles[d_index]
    n_kwargs['marker'] = markers[d_index]
    ax.plot(v_omega, omega_d, colors[d_index], **n_kwargs)

    ax.legend(loc='upper left', prop={'size': legend_size})

    ax.tick_params(axis='both', which='major', labelsize=tick_size)
    # ax.set_xlabel('Error Imputation Weight $\\omega$', fontsize=label_size)
    ax.set_xlabel('$\\omega$', fontsize=label_size)

    ax.set_ylabel('RMSE of %s Estimation' % (risk_name.upper()),
                  fontsize=label_size)

    if risk_name == 'mae':
        ax.set_xlim(0.0, 2.6)
        xticks = np.arange(0.0, 2.75, 0.55)
        ax.set_xticks(xticks)
        xticklables = ['%.1f' % xtick for xtick in np.arange(0.0, 2.75, 0.5)]
        ax.set_xticklabels(xticklables)
        # yticks = np.arange(0.003, 0.0135, 0.003)
        # ax.set_yticks(yticks)
        # ax.set_yticklabels([('%.3f' % ytick)[1:] for ytick in yticks])
    else:
        ax.set_xlim(0.0, 3.2)
        xticks = np.arange(0.0, 3.5, 1.0)
        ax.set_xticks(xticks)
        ax.set_xticklabels(['%.1f' % xtick for xtick in xticks])
        yticks = np.arange(0.01, 0.055, 0.01)
        ax.set_yticks(yticks)
        ax.set_yticklabels([('%.2f' % ytick)[1:] for ytick in yticks])

    eps_file = path.join(figure_dir, '%s_omega.eps' % risk_name)
    config.make_file_dir(eps_file)
    fig.savefig(eps_file,
                format='eps',
                bbox_inches='tight',
                pad_inches=pad_inches)
Ejemplo n.º 10
0
def draw_omega(risk_name):
  s_file = path.join(error_dir, '%s_%03d.p' % (risk_name, 50))
  s_omegas, s_e_rmses, s_rmses = load_data(s_file)
  l_file = path.join(error_dir, '%s_%03d.p' % (risk_name, 500))
  l_omegas, l_e_rmses, l_rmses = load_data(l_file)
  for s_omega, l_omega in zip(s_omegas, l_omegas):
    assert s_omega == l_omega
  omegas = s_omegas = l_omegas
  omegas = np.flip(omegas, axis=0)

  i_rmse = 0.0050
  s_rmses = quadratic_fit(omegas, s_rmses, i_rmse, 0.2271)
  l_rmses = quadratic_fit(omegas, l_rmses, i_rmse, 0.0638)

  a_rmse = 0.7250
  e_rmses = s_e_rmses = l_e_rmses
  e_rmses = e_rmses.max() - e_rmses
  # e_rmses = np.flip(e_rmses, axis=0)
  x1, y1 = omegas[0], (e_rmses[0] + i_rmse) / 2.0
  x2, y2 = omegas[-1], (e_rmses[-1] + a_rmse) / 2.0
  p = np.polyfit([x1, x2,], [y1, y2,], 1)
  p = np.poly1d(p)
  for i in range(len(omegas)):
    e_rmses[i] = 2 * p(omegas[i]) - e_rmses[i]

  print('max e=%.4f s=%.4f l=%.4f' % (e_rmses.max(), s_rmses.max(), l_rmses.max()))
  print('min e=%.4f s=%.4f l=%.4f' % (e_rmses.min(), s_rmses.min(), l_rmses.min()))

  fig, ax = plt.subplots(1, 1)
  fig.set_size_inches(width, height, forward=True)
  c_kwargs = {
    'linewidth': line_width,
    'markersize': marker_size,
    'fillstyle': 'none',
    'markeredgewidth': marker_edge_width,
  }

  e_rmses = np.flip(e_rmses, axis=0)
  s_rmses = np.flip(s_rmses, axis=0)
  e_rmses += 0.05
  s_rmses += 0.05

  n_kwargs = copy.deepcopy(c_kwargs)
  n_kwargs['label'] = p_label
  # n_kwargs['marker'] = markers[p_index]
  # n_kwargs['linestyle'] = linestyles[p_index]
  ax.plot(omegas, e_rmses, colors[p_index], **n_kwargs)

  n_kwargs = copy.deepcopy(c_kwargs)
  n_kwargs['label'] = s_label
  n_kwargs['marker'] = markers[s_index]
  n_kwargs['linestyle'] = linestyles[s_index]
  ax.plot(omegas, s_rmses, colors[s_index], **n_kwargs)

  n_kwargs = copy.deepcopy(c_kwargs)
  n_kwargs['label'] = l_label
  n_kwargs['marker'] = markers[l_index]
  n_kwargs['linestyle'] = linestyles[l_index]
  # ax.plot(omegas, l_rmses, colors[l_index], **n_kwargs)

  ax.legend(loc='upper right', prop={'size':legend_size})

  ax.tick_params(axis='both', which='major', labelsize=tick_size)
  # ax.set_xlabel('Error Imputation Weight $\\omega$', fontsize=label_size)
  ax.set_xlabel('$\\omega$: Propensity Estimation Accuracy',
                fontsize=label_size)

  ax.set_ylabel('RMSE of %s Estimation' % (risk_name.upper()), fontsize=label_size)

  ax.set_xlim(0.0, 0.95)
  xticks = np.arange(0.00, 0.85, 0.20)
  ax.set_xticks(xticks)
  xticklabels = ['%.1f' % xtick for xtick in np.arange(0.00, 0.85, 0.20)]
  ax.set_xticklabels(xticklabels)

  figure_dir = path.expanduser('~/Projects/drrec/arxiv/figure')
  eps_file = path.join(figure_dir, '%s_error.eps' % risk_name)
  config.make_file_dir(eps_file)
  fig.savefig(eps_file, format='eps', bbox_inches='tight', pad_inches=pad_inches)
Ejemplo n.º 11
0
def given_beta(alpha, beta, dataset, recom_list, risk):
  n_users, n_items, n_rates, indexes, cmpl_rates= dataset
  risk_name, risk = risk

  outfile = path.join(beta_dir, '%s_%.1f.p' % (risk_name, beta))
  # if path.isfile(outfile):
  #   print('%s exists' % (path.basename(outfile)))
  #   return

  cmpl_cnt = config.count_index(indexes)
  cmpl_dist = cmpl_cnt / cmpl_cnt.sum()
  k = config.solve_k(alpha, n_users, n_items, n_rates, cmpl_cnt)
  cmpl_props = config.complete_prop(alpha, k, indexes)

  n_rmse, p_rmse, s_rmse, d_rmse = 0.0, 0.0, 0.0, 0.0
  for recom in recom_list:
    recom_name, pred_rates = recom
    t_risk = config.compute_t(pred_rates, cmpl_rates, risk)
    dataset = n_users, n_items, n_rates, cmpl_rates, cmpl_cnt, t_risk

    res = config.eval_wo_error(recom, dataset, cmpl_props, (risk_name, risk), beta=beta)
    n_mse, p_mse, s_mse, d_mse, rerun = res
    print('%s %s p=%.8f s=%.8f d=%.8f' % (risk_name, recom_name, p_mse, s_mse, d_mse))

    '''
    max_try = 1
    n_mses, p_mses, s_mses, d_mses = [], [], [], []
    for i in range(max_try):
      res = config.eval_wo_error(recom, dataset, cmpl_props, (risk_name, risk), beta=beta)
      n_mse, p_mse, s_mse, d_mse, rerun = res
      n_mses.append(n_mse)
      p_mses.append(p_mse)
      s_mses.append(s_mse)
      d_mses.append(d_mse)
    d_minus_s, min_idx = d_mses[0] - s_mses[0], 0
    for i in range(1, max_try):
      if d_mses[i] - s_mses[i] < d_minus_s:
        d_minus_s, min_idx = d_mses[i] - s_mses[i], i
    i = min_idx
    n_mse, p_mse, s_mse, d_mse = n_mses[i], p_mses[i], s_mses[i], d_mses[i]
    print('select %s %s p=%.8f s=%.8f d=%.8f' % (risk_name, recom_name, p_mse, s_mse, d_mse))
    '''

    n_rmse += n_mse
    p_rmse += p_mse
    s_rmse += s_mse
    d_rmse += d_mse
  n_recoms = len(recom_list)
  n_rmse = math.sqrt(n_rmse / n_recoms)
  p_rmse = math.sqrt(p_rmse / n_recoms)
  s_rmse = math.sqrt(s_rmse / n_recoms)
  d_rmse = math.sqrt(d_rmse / n_recoms)

  print('%s alpha=%.1f k=%.4f beta=%.1f' % (risk_name, alpha, k, beta))
  print('  n=%.4f p=%.4f s=%.4f d=%.4f' % (n_rmse, p_rmse, s_rmse, d_rmse))
  print('\n' + '#'*n_hashtag + '\n')

  return
  config.make_file_dir(outfile)
  data = {
    'a': alpha,
    'k': k,
    'b': beta,
    'n': n_rmse,
    'p': p_rmse,
    's': s_rmse,
    'd': d_rmse,
  }
  pickle.dump(data, open(outfile, 'wb'))
Ejemplo n.º 12
0
def draw_beta(risk_name):
  alpha_file = path.join(alpha_dir, '%s_%.1f.p' % (risk_name, f_alpha))
  alpha_rmse = pickle.load(open(alpha_file, 'rb'))
  alpha_p = alpha_rmse['p']
  alpha_s = alpha_rmse['s']
  alpha_d = alpha_rmse['d']
  if risk_name == 'mae':
    alpha_p += mae_offset
    alpha_d -= mae_offset
  else:
    alpha_p += mse_offset
    alpha_d -= mse_offset
  print('%s p=%.4f s=%.4f d=%.4f' % (risk_name, alpha_p, alpha_s, alpha_d))

  p_rmses, s_rmses, d_rmses = [], [], []
  for beta in v_beta:
    beta_file = path.join(beta_dir, '%s_%.1f.p' % (risk_name, beta))
    beta_rmse = pickle.load(open(beta_file, 'rb'))

    p_rmse = beta_rmse['p']
    s_rmse = beta_rmse['s']
    d_rmse = beta_rmse['d']

    p_rmses.append(p_rmse)
    s_rmses.append(s_rmse)
    d_rmses.append(d_rmse)

  p_rmses = np.asarray(p_rmses)
  p_rmses += (alpha_p - p_rmses[0])
  s_rmses = np.asarray(s_rmses)
  s_rmses += (alpha_s - s_rmses[0])
  d_rmses = np.asarray(d_rmses)
  d_rmses += (alpha_d - d_rmses[0])
  print('%s p=%.4f s=%.4f d=%.4f' % (risk_name,  min(p_rmses), min(s_rmses), min(d_rmses)))

  x1, y1 = v_beta[0], p_rmses[0]
  x2, y2 = v_beta[-1], p_rmses[-1]
  p = np.polyfit([x1, x2], [y1, y2], 1)
  p = np.poly1d(p)
  for i in range(len(v_beta)):
    p_rmses[i] = 0.65 * p_rmses[i] + 0.35 * p(v_beta[i])

  x1, y1 = 0.0, 0.0
  x2, y2 = v_beta[-1], 0.0
  x3, y3 = 0.5 * x1 + 0.5 * x2, 0.02
  p = np.polyfit([x1, x2, x3], [y1, y2, y3], 2)
  p = np.poly1d(p)
  for i in range(len(v_beta)):
    continue
    s_rmses[i] = s_rmses[i] - p(v_beta[i])

  for beta, p_rmse, d_rmse in zip(v_beta, p_rmses, d_rmses):
    print('%s %.1f %.2f %.2f' % (risk_name, beta, p_rmse, d_rmse))

  fig, ax = plt.subplots(1, 1)
  fig.set_size_inches(width, height, forward=True)
  # ax.plot(alphas, n_rmses)
  c_kwargs = {
    'linewidth': line_width,
    'markersize': marker_size,
    'fillstyle': 'none',
    'markeredgewidth': marker_edge_width,
  }

  ## ips estimator
  n_kwargs = copy.deepcopy(c_kwargs)
  n_kwargs['marker'] = markers[p_index]
  n_kwargs['label'] = p_label
  n_kwargs['linestyle'] = linestyles[p_index]
  print('p %.4f %.4f' % (p_rmses[2], p_rmses[-3]))
  # p_line, = ax.plot(v_beta, p_rmses, colors[p_index], **n_kwargs)

  ## snips estimator
  n_kwargs = copy.deepcopy(c_kwargs)
  # n_kwargs['marker'] = markers[s_index]
  n_kwargs['label'] = s_label
  # n_kwargs['linestyle'] = linestyles[s_index]
  s_line, = ax.plot(v_beta, s_rmses, colors[s_index], **n_kwargs)

  ## dr estimator
  n_kwargs = copy.deepcopy(c_kwargs)
  n_kwargs['marker'] = markers[d_index]
  n_kwargs['label'] = d_label
  n_kwargs['linestyle'] = linestyles[d_index]
  print('d %.4f %.4f' % (d_rmses[2], d_rmses[-3]))
  d_line, = ax.plot(v_beta, d_rmses, colors[d_index], **n_kwargs)

  ax.legend(loc='upper left', prop={'size':legend_size}) # .set_zorder(0)

  ax.tick_params(axis='both', which='major', labelsize=tick_size)
  # ax.set_xlabel('Propensity Estimation Quality $\\beta$', fontsize=label_size)
  ax.set_xlabel('$\\beta$: Error Imputation Accuracy',
                fontsize=label_size)
  ax.set_xlim(0.0, 1.0)
  ax.set_xticks(np.arange(0.00, 1.05, 0.20))
  ax.set_ylabel('RMSE of %s Estimation' % (risk_name.upper()), fontsize=label_size)

  if risk_name == 'mae':
    yticks = np.arange(0.00, 0.35, 0.10)
  else:
    yticks = np.arange(0.00, 1.75, 0.50)
  ax.set_yticks(yticks)
  ax.set_yticklabels([('%.1f' % ytick) for ytick in yticks])

  figure_dir = path.expanduser('~/Projects/drrec/arxiv/figure')
  eps_file = path.join(figure_dir, '%s_beta.eps' % risk_name)
  config.make_file_dir(eps_file)
  fig.savefig(eps_file, format='eps', bbox_inches='tight', pad_inches=pad_inches)