Beispiel #1
0
def given_alpha(alpha, dataset, recom_list, risk):
    n_users, n_items, n_rates, indexes, cmpl_rates = dataset
    risk_name, risk = risk

    outfile = path.join(alpha_dir, '%s_%.1f.p' % (risk_name, alpha))
    # if path.isfile(outfile):
    #   print('%s exists' % (path.basename(outfile)))
    #   return

    cmpl_cnt = config.count_index(indexes)
    cmpl_dist = cmpl_cnt / cmpl_cnt.sum()
    k = config.solve_k(alpha, n_users, n_items, n_rates, cmpl_cnt)
    cmpl_props = config.complete_prop(alpha, k, indexes)

    n_rmse, p_rmse, s_rmse, d_rmse = 0.0, 0.0, 0.0, 0.0
    for recom in recom_list:
        recom_name, pred_rates = recom
        t_risk = config.compute_t(pred_rates, cmpl_rates, risk)
        dataset = n_users, n_items, n_rates, cmpl_rates, cmpl_cnt, t_risk

        while True:
            res = config.eval_wo_error(recom, dataset, cmpl_props,
                                       (risk_name, risk))
            n_mse, p_mse, s_mse, d_mse, rerun = res
            if not rerun:
                break
            else:
                print('rerun %s %s' % (risk_name, recom_name))

        n_rmse += n_mse
        p_rmse += p_mse
        s_rmse += s_mse
        d_rmse += d_mse
    n_recoms = len(recom_list)
    n_rmse = math.sqrt(n_rmse / n_recoms)
    p_rmse = math.sqrt(p_rmse / n_recoms)
    s_rmse = math.sqrt(s_rmse / n_recoms)
    d_rmse = math.sqrt(d_rmse / n_recoms)

    print('%s alpha=%.1f k=%.4f' % (risk_name, alpha, k))
    print('  n=%.4f p=%.4f s=%.4f d=%.4f' % (n_rmse, p_rmse, s_rmse, d_rmse))
    print('\n' + '#' * n_hashtag + '\n')

    return
    config.make_file_dir(outfile)
    data = {
        'a': alpha,
        'k': k,
        'n': n_rmse,
        'p': p_rmse,
        's': s_rmse,
        'd': d_rmse,
    }
    pickle.dump(data, open(outfile, 'wb'))
Beispiel #2
0
def vary_gamma(alpha, gammas, dataset, recom_list, risk):
    n_users, n_items, n_rates, indexes, cmpl_rates = dataset
    risk_name, risk = risk
    cmpl_cnt = config.count_index(indexes)
    cmpl_dist = cmpl_cnt / cmpl_cnt.sum()
    k = config.solve_k(alpha, n_users, n_items, n_rates, cmpl_cnt)
    cmpl_props = config.complete_prop(alpha, k, indexes)

    outfile = path.join(gamma_dir, '%s_%.1f.p' % (risk_name, alpha))
    # if path.isfile(outfile):
    #   print('%s exists' % (path.basename(outfile)))
    #   return

    n_rmse, p_rmse, s_rmse = 0.0, 0.0, 0.0
    d_rmses = np.zeros(len(gammas))
    for recom in recom_list:
        recom_name, pred_rates = recom
        t_risk = config.compute_t(pred_rates, cmpl_rates, risk)
        dataset = n_users, n_items, n_rates, cmpl_rates, cmpl_cnt, t_risk

        res = config.eval_wt_gamma(recom, dataset, cmpl_props,
                                   (risk_name, risk), gammas)
        n_mse, p_mse, s_mse, d_mses = res

        n_rmse += n_mse
        p_rmse += p_mse
        s_rmse += s_mse
        d_rmses += d_mses
    n_recoms = len(recom_list)
    n_rmse = math.sqrt(n_rmse / n_recoms)
    p_rmse = math.sqrt(p_rmse / n_recoms)
    s_rmse = math.sqrt(s_rmse / n_recoms)
    d_rmses = np.sqrt(d_rmses / n_recoms)

    print('%s alpha=%.1f k=%.4f' % (risk_name, alpha, k))
    print('  n=%.4f p=%.4f s=%.4f' % (n_rmse, p_rmse, s_rmse))
    for gamma, d_rmse in zip(gammas, d_rmses):
        print('  gamma=%.1f d=%.4f' % (gamma, d_rmse))
    print('\n' + '#' * n_hashtag + '\n')

    config.make_file_dir(outfile)
    data = {
        'a': alpha,
        'k': k,
        'n': n_rmse,
        'p': p_rmse,
        's': s_rmse,
        'd': d_rmses,
    }
    pickle.dump(data, open(outfile, 'wb'))
Beispiel #3
0
def vary_error(n_mcar, dataset, recom_list, risk):
  n_users, n_items, n_rates, indexes, cmpl_rates= dataset
  risk_name, risk = risk
  cmpl_cnt = config.count_index(indexes)
  cmpl_dist = cmpl_cnt / cmpl_cnt.sum()
  k = config.solve_k(alpha, n_users, n_items, n_rates, cmpl_cnt)
  cmpl_props = config.complete_prop(alpha, k, indexes)
  # [stdout.write('%.4f ' % p) for p in set(cmpl_props)]
  # stdout.write('\n')
  
  outfile = path.join(error_dir, '%s_%03d.p' % (risk_name, n_mcar))

  p_o = n_rates / (n_users * n_items)
  # print('p_o: %.4f' % p_o)
  p_r = np.copy(cmpl_dist)
  stdout.write('p_r:')
  [stdout.write(' %.4f' % p) for p in p_r]
  stdout.write('\n')
  p_o_r = config.compute_prop(alpha, k)
  stdout.write('p_o_r:')
  [stdout.write(' %.4f' % p) for p in p_o_r]
  stdout.write('\n')
  p_r_o = p_o_r * p_r / p_o
  # stdout.write('p_r_o:')
  # [stdout.write(' %.4f' % p) for p in p_r_o]
  # stdout.write('\n')
  np.random.seed(0)
  while True:
    mcar_rates = np.random.choice(max_rate-min_rate+1, n_mcar, p=list(p_r))
    p_r = np.zeros(max_rate-min_rate+1)
    for rid in mcar_rates:
      p_r[rid] += 1
    p_r /= p_r.sum()
    success = True
    if p_r.min() == 0.0:
      success = False
    if p_r[0] <= p_r[1]:
    # if p_r[0] < p_r[1]:
      success = False
    if p_r[1] <= p_r[2]:
    # if p_r[1] < p_r[2]:
      success = False
    if p_r[2] <= p_r[3]:
    # if p_r[2] < p_r[3]:
      success = False
    if p_r[3] <= p_r[4]:
    # if p_r[3] < p_r[4]:
      success = False
    if success:
      break
  stdout.write('p_r:')
  [stdout.write(' %.4f' % p) for p in p_r]
  stdout.write('\n')
  p_o_r = p_r_o * p_o / p_r
  # p_o_r = np.asarray([0.0163, 0.0697, 0.1140, 0.0268, 0.0113])
  stdout.write('p_o_r:')
  [stdout.write(' %.4f' % p) for p in p_o_r]
  stdout.write('\n')

  rate_props = config.complete_prop(alpha, k, indexes, rate_props=p_o_r)
  # [stdout.write('%.4f ' % p) for p in set(rate_props)]
  # stdout.write('\n')

  e_rmses, d_rmses, omegas = [], [], []
  for omega in v_omegas:
    e_rmse = 0.0
    d_rmse = 0.0
    for recom in recom_list:
      recom_name, pred_rates = recom
      t_risk = config.compute_t(pred_rates, cmpl_rates, risk)
      dataset = n_users, n_items, n_rates, cmpl_rates, cmpl_cnt, t_risk

      res = config.eval_wt_mcar(recom, dataset, cmpl_props, rate_props, (risk_name, risk), omega)
      e_mse, d_mse = res

      e_rmse += e_mse
      d_rmse += d_mse
    n_recoms = len(recom_list)
    e_rmse = math.sqrt(e_rmse / n_recoms)
    d_rmse = math.sqrt(d_rmse / n_recoms)
    print('%s alpha=%.1f k=%.4f' % (risk_name, alpha, k))
    print('  o=%.2f e=%.4f d=%.4f' % (omega, e_rmse, d_rmse))
    print('\n' + '#'*n_hashtag + '\n')
    e_rmses.append(e_rmse)
    d_rmses.append(d_rmse)
    omegas.append(omega)
    # break
  if path.isfile(outfile):
    print('%s exists' % path.basename(outfile))
    return
  data = {
    'e': e_rmses,
    'd': d_rmses,
    'o': omegas,
  }
  config.make_file_dir(outfile)
  pickle.dump(data, open(outfile, 'wb'))
Beispiel #4
0
p_o_r = p_r_o * p_o / p_r
stdout.write('p_o_r:')
[stdout.write(' %.4f' % p) for p in p_o_r]
stdout.write('\n')

rate_props = config.complete_prop(alpha, k, indexes, rate_props=p_o_r)
[stdout.write('%.4f ' % p) for p in set(rate_props)]
stdout.write('\n')

e_rmses, d_rmses, omegas = [], [], []
for omega in np.arange(0.0, 1.05, 0.1):
    e_rmse = 0.0
    d_rmse = 0.0
    for recom in recom_list:
        recom_name, pred_rates = recom
        t_risk = config.compute_t(pred_rates, cmpl_rates, risk)
        dataset = n_users, n_items, n_rates, cmpl_rates, cmpl_cnt, t_risk

        res = config.eval_wt_mcar(recom, dataset, cmpl_props, rate_props,
                                  (risk_name, risk), omega)
        e_mse, d_mse = res

        e_rmse += e_mse
        d_rmse += d_mse
    n_recoms = len(recom_list)
    e_rmse = math.sqrt(e_rmse / n_recoms)
    d_rmse = math.sqrt(d_rmse / n_recoms)
    print('%s alpha=%.1f k=%.4f' % (risk_name, alpha, k))
    print('  e=%.4f d=%.4f' % (e_rmse, d_rmse))
    print('\n' + '#' * n_hashtag + '\n')
    e_rmses.append(e_rmse)
Beispiel #5
0
def given_beta(alpha, beta, dataset, recom_list, risk):
  n_users, n_items, n_rates, indexes, cmpl_rates= dataset
  risk_name, risk = risk

  outfile = path.join(beta_dir, '%s_%.1f.p' % (risk_name, beta))
  # if path.isfile(outfile):
  #   print('%s exists' % (path.basename(outfile)))
  #   return

  cmpl_cnt = config.count_index(indexes)
  cmpl_dist = cmpl_cnt / cmpl_cnt.sum()
  k = config.solve_k(alpha, n_users, n_items, n_rates, cmpl_cnt)
  cmpl_props = config.complete_prop(alpha, k, indexes)

  n_rmse, p_rmse, s_rmse, d_rmse = 0.0, 0.0, 0.0, 0.0
  for recom in recom_list:
    recom_name, pred_rates = recom
    t_risk = config.compute_t(pred_rates, cmpl_rates, risk)
    dataset = n_users, n_items, n_rates, cmpl_rates, cmpl_cnt, t_risk

    res = config.eval_wo_error(recom, dataset, cmpl_props, (risk_name, risk), beta=beta)
    n_mse, p_mse, s_mse, d_mse, rerun = res
    print('%s %s p=%.8f s=%.8f d=%.8f' % (risk_name, recom_name, p_mse, s_mse, d_mse))

    '''
    max_try = 1
    n_mses, p_mses, s_mses, d_mses = [], [], [], []
    for i in range(max_try):
      res = config.eval_wo_error(recom, dataset, cmpl_props, (risk_name, risk), beta=beta)
      n_mse, p_mse, s_mse, d_mse, rerun = res
      n_mses.append(n_mse)
      p_mses.append(p_mse)
      s_mses.append(s_mse)
      d_mses.append(d_mse)
    d_minus_s, min_idx = d_mses[0] - s_mses[0], 0
    for i in range(1, max_try):
      if d_mses[i] - s_mses[i] < d_minus_s:
        d_minus_s, min_idx = d_mses[i] - s_mses[i], i
    i = min_idx
    n_mse, p_mse, s_mse, d_mse = n_mses[i], p_mses[i], s_mses[i], d_mses[i]
    print('select %s %s p=%.8f s=%.8f d=%.8f' % (risk_name, recom_name, p_mse, s_mse, d_mse))
    '''

    n_rmse += n_mse
    p_rmse += p_mse
    s_rmse += s_mse
    d_rmse += d_mse
  n_recoms = len(recom_list)
  n_rmse = math.sqrt(n_rmse / n_recoms)
  p_rmse = math.sqrt(p_rmse / n_recoms)
  s_rmse = math.sqrt(s_rmse / n_recoms)
  d_rmse = math.sqrt(d_rmse / n_recoms)

  print('%s alpha=%.1f k=%.4f beta=%.1f' % (risk_name, alpha, k, beta))
  print('  n=%.4f p=%.4f s=%.4f d=%.4f' % (n_rmse, p_rmse, s_rmse, d_rmse))
  print('\n' + '#'*n_hashtag + '\n')

  return
  config.make_file_dir(outfile)
  data = {
    'a': alpha,
    'k': k,
    'b': beta,
    'n': n_rmse,
    'p': p_rmse,
    's': s_rmse,
    'd': d_rmse,
  }
  pickle.dump(data, open(outfile, 'wb'))