def sim_pcrm(times, notion): """ solves pcrm times times on randomly generated data Returns the time (in seconds) that solving each random instance of the problem took as well as the size of the instances """ time, value, bound, no_items, no_pos, no_prop = [], [], [], [], [], [] for i in range(times): p = np.random.randint(low=2, high=20, size=1)[0] n = np.random.randint(low=3, high=100, size=1)[0] m = math.ceil(np.random.uniform(low=1, high=1.5, size=1) * n) viol = 1.5 #viol = np.random.uniform(low = 1.2, high = 2.0, size = 1)[0] pos_imp, item_qual, properties = dg.sim_data(m, n, p, True) prop_list = dg.get_prop_list_sim(properties) parity_pcrm = dg.parity_pcrm(prop_list, item_qual, notion, viol) start = timeit.default_timer() rank_pcrm = ra.ip_parity(item_qual, pos_imp, prop_list, parity_pcrm) stop = timeit.default_timer() elapsed = (stop - start) time.append(elapsed) value.append(rank_pcrm[2]) bound.append(viol) no_items.append(m) no_pos.append(n) no_prop.append(p) # use bonferonni for multiple comparisons dat = np.array([time, value, bound, no_items, no_pos, no_prop]).T df = pd.DataFrame( dat, columns=['time', 'value', 'bound', 'no_items', 'no_pos', 'no_prop']) return df
def greedy_approx(times, notion, disjoint): '''Solves PCRM using the greedy approximation algorithm Returns the approximation factor (as compared to the exact solution from the IP) whether the instance was feasible and if the greedy algorithm found a feasible solution''' value_factor, feas_ip, feas_greedy, time_ip, time_greedy, viol_coeff = [], [], [], [], [], [] for i in range(times): p = np.random.randint(low=2, high=5, size=1)[0] n = np.random.randint(low=10, high=20, size=1)[0] m = math.ceil(np.random.uniform(low=1, high=1.5, size=1) * n) viol = np.random.uniform(low=1.1, high=1.2, size=1)[0] pos_imp, item_qual, properties = dg.sim_data(m, n, p, disjoint) prop_list = dg.get_prop_list_sim(properties) parity_pcrm = dg.parity_pcrm(prop_list, item_qual, notion, viol) start = timeit.default_timer() rank_ip = ra.ip_parity(item_qual, pos_imp, prop_list, parity_pcrm) stop = timeit.default_timer() elapsed_ip = (stop - start) start = timeit.default_timer() rank_greedy = ra.greedy_parity(item_qual, pos_imp, properties, parity_pcrm) stop = timeit.default_timer() elapsed_greedy = (stop - start) if rank_ip[2] > 0: f_ip = 1 else: f_ip = 0 if rank_greedy[1] > 0: f_greedy = 1 else: f_greedy = 0 if f_ip == 1 and f_greedy == 1: val_fact = rank_ip[2] / rank_greedy[1] else: val_fact = np.nan value_factor.append(val_fact) feas_ip.append(f_ip) feas_greedy.append(f_greedy) time_ip.append(elapsed_ip) time_greedy.append(elapsed_greedy) viol_coeff.append(viol) print(str(i) + ' greedy_approx completed') dat = np.array( [value_factor, feas_ip, feas_greedy, time_ip, time_greedy, viol_coeff]).T df = pd.DataFrame(dat, columns=[ 'value_factor', 'feas_ip', 'feas_greed', 'time_ip', 'time_greedy', 'viol_coeff' ]) return df
def price_of_fairness(times, notion): ''' solves the pcrm and compares the value of the fair ranking to the value of the unconstrained solution, returns the price of fairness and maximum unfairness of the unconstrained ranking as well as information on the instances''' price_fair, viol_coeff, unfairness, no_items, no_pos, no_prop = [], [], [], [], [], [] for i in range(times): p = np.random.randint(low=2, high=5, size=1)[0] n = np.random.randint(low=10, high=20, size=1)[0] m = math.ceil(np.random.uniform(low=1, high=1.5, size=1) * n) viol = np.random.uniform(low=1.15, high=1.2, size=1)[0] #viol = np.random.uniform(low = 1.2, high = 2.0, size = 1)[0] pos_imp, item_qual, properties = dg.sim_data(m, n, p, True) prop_list = dg.get_prop_list_sim(properties) parity_pcrm = dg.parity_pcrm(prop_list, item_qual, notion, viol) rank_pcrm = ra.ip_parity(item_qual, pos_imp, prop_list, parity_pcrm) fair = rank_pcrm[2] val_mat = dg.get_val_mat(pos_imp, item_qual) rank_unc = ra.unconstrained_ranking_matching(val_mat) opt = rank_unc[1] unc_rank_mat = ra.rank_mat_match(rank_unc[0], m, n) unc_rank_list = ra.rank_list_from_mat(unc_rank_mat) unfair = ra.max_unfair(unc_rank_list, pos_imp, parity_pcrm / viol, prop_list, viol) price = (opt - fair) / opt price_fair.append(price) viol_coeff.append(viol) unfairness.append(unfair) no_items.append(m) no_pos.append(n) no_prop.append(p) print(str(i) + ' price of fairness completed') # use bonferonni for multiple comparisons dat = np.array( [price_fair, viol_coeff, unfairness, no_items, no_pos, no_prop]).T df = pd.DataFrame(dat, columns=[ 'price of fairness', 'viol_coeff', 'max_unfairness', 'no_items', 'no_pos', 'no_prop' ]) return df
def pcrm_vs_topk(times, notion): """ solves pcrm and topk rm times times on randomly generated data Returns the correlation between the solutions to the two problems and the information about the instances """ value_factor, corr, sign, no_items, no_pos, no_prop = [], [], [], [], [], [] for i in range(times): p = np.random.randint(low=2, high=5, size=1)[0] n = np.random.randint(low=10, high=20, size=1)[0] m = math.ceil(np.random.uniform(low=1, high=1.5, size=1) * n) viol = np.random.uniform(low=1.1, high=1.2, size=1)[0] pos_imp, item_qual, properties = dg.sim_data(m, n, p, True) prop_list = dg.get_prop_list_sim(properties) parity_topk = dg.parity_topk(prop_list, item_qual, n, notion) parity_pcrm = dg.parity_pcrm(prop_list, item_qual, notion, viol) rank_topk = ra.greedy_topk(properties, parity_topk, pos_imp, item_qual) rank_pcrm = ra.ip_parity(item_qual, pos_imp, prop_list, parity_pcrm) rank_list_topk, val_topk = rank_topk[0], rank_topk[1] rank_list_pcrm, val_pcrm = rank_pcrm[3], rank_pcrm[2] rank_list_pcrm = ra.rank_mat_lp(rank_list_pcrm, m, n) rank_list_pcrm = ra.rank_list_from_mat(rank_list_pcrm) # return difference in value + spearman rho and maybe size rho, p_val = stats.spearmanr(rank_list_topk, rank_list_pcrm) if val_topk != 0: value_factor.append(val_pcrm / val_topk) elif val_pcrm == 0: value_factor.append(1) else: value_factor.append(0) corr.append(rho) sign.append(p_val) no_items.append(m) no_pos.append(n) no_prop.append(p) print(str(i) + ' pcrm_vs_topk completed') dat = np.array([value_factor, corr, sign, no_items, no_pos, no_prop]).T df = pd.DataFrame(dat, columns=[ 'value_factor', 'rho', 'p_val', 'no_items', 'no_pos', 'no_prop' ]) return df
def twogroup_sim(one, two, n, notion, viol): item_qual, prop_list, properties = sim_twogroup_data(one, two) m = len(item_qual) exposure = np.zeros(n) for j in range(n): exposure[j] = 1 / (math.log(j + 2)) val_mat = dg.get_val_mat(item_qual, exposure) unconstrained = ra.unconstrained_ranking_matching(val_mat) unconstrained_rank_list = np.arange(0, n) unc_val = unconstrained[1] dem_parity = dg.parity_pcrm(prop_list, item_qual, notion, viol) fair = ra.ip_parity(item_qual, exposure, prop_list, dem_parity) fair_rank_mat = ra.rank_mat_lp(fair[3], m, n) fair_rank_list = ra.rank_list_from_mat(fair_rank_mat) fair_val = fair[2] fair_approx = ra.greedy_parity_two(item_qual, exposure, properties, dem_parity) if fair_approx[1] != 0: val_fact = fair_val / fair_approx[1] else: val_fact = np.nan if notion == 'demographic': r_unfair = ra.dp_ratio(unconstrained_rank_list, exposure, prop_list) r_fair = ra.dp_ratio(fair_rank_list, exposure, prop_list) elif notion == 'utilitarian': r_unfair = ra.dt_ratio(unconstrained_rank_list, exposure, item_qual, prop_list) r_fair = ra.dt_ratio(fair_rank_list, exposure, item_qual, prop_list) mu_unfair = ra.max_unfair(unconstrained_rank_list, exposure, dem_parity, prop_list, viol) mu_fair = ra.max_unfair(fair_rank_list, exposure, dem_parity, prop_list, viol) if unc_val == 0: pof = 0 else: pof = (unc_val - fair_val) / unc_val return pof, r_unfair, r_fair, mu_unfair, mu_fair, one, two, val_fact
prop_prev_user = properties_topic.sum() prop_util_user = np.matmul(item_qual_topic, properties_topic) prop_avg_util_user = np.zeros(len(prop_util_user)) for i in range(len(prop_util_user)): prop_avg_util_user[i] = prop_util_user[i] / prop_prev_user.iloc[i] # obtain the unconstrained and fair solutions # unconstrained rank_unc = ra.unconstrained_ranking_matching(val_mat) rank_mat_unc = ra.rank_mat_match(rank_unc[0], m, n) rank_list_unc = ra.rank_list_from_mat(rank_mat_unc) unc_val = rank_unc[1] # pcrm ip_rank_pcrm = ra.ip_parity(item_qual_topic, pos_imp, prop_list_topic, parity_pcrm_topic) fair_rank_mat = ra.rank_mat_lp(ip_rank_pcrm[3], m, n) rank_list_fair_ip = ra.rank_list_from_mat(fair_rank_mat) fair_ip_val = ip_rank_pcrm[2] lp_rank_pcrm = ra.lp_parity(item_qual_topic, pos_imp, prop_list_topic, parity_pcrm_topic) fair_lp_rank_mat = ra.rank_mat_lp(lp_rank_pcrm[3], m, n) fair_lp_val = lp_rank_pcrm[2] fair_greedy_pcrm = ra.greedy_parity_two(item_qual_topic, pos_imp, properties_topic, parity_pcrm_topic) fair_greedy_pcrm_rank_mat = ra.rank_mat_greedy(fair_greedy_pcrm[0], m, n) fair_greedy_pcrm_val = fair_greedy_pcrm[1] # topk