def edge_weight_convergence(agent_type_prior, edge_weight_strategy, samples=[1, 2, 4, 8, 16, 32, 64, 128], num_draws=1000, agent_types=[0.25, 0.75]): if agent_type_prior not in TrustGraph.AGENT_TYPE_PRIORS: raise ValueError("Invalid agent type prior") if edge_weight_strategy not in TrustGraph.EDGE_WEIGHT_STRATEGIES: raise ValueError("Invalid edge weight strategy") sample_draws = [[ TrustGraph._sampled_edge_weight(agent_types, edge_weight_strategy, agent_type_prior, s, 0, 1) for _ in xrange(num_draws) ] for s in samples] draw_means = [np.mean(draws) for draws in sample_draws] draw_error = [1.96 * np.std(draws) for draws in sample_draws] expected_weight = TrustGraph._expected_edge_weight(agent_types, edge_weight_strategy, agent_type_prior, 0, 1) true_weight = agent_types[1] log_samples = [math.log(s, 2) for s in samples] # Draw the graph plt.errorbar(log_samples, draw_means, draw_error, fmt='rs', label='Sampled weights') plt.axhline(expected_weight, color='m', linestyle=':', label='Expected weight') plt.axhline(true_weight, color='b', linestyle='--', label='Correct weight') plt.suptitle('Convergence of sampled edge weights (%d draws)\n' "'%s' agent prior type, '%s' edge weight strategy" % (num_draws, agent_type_prior, edge_weight_strategy)) plt.xlabel('log(Number of samples)') plt.ylabel('Edge weight (95% Confidence Intervals)') plt.xticks(log_samples, samples) plt.margins(0.07) plt.legend(loc='best') plt.show()
def max_flow_variance(num_iters=5, edge_counts=None): if not edge_counts: edge_counts = EdgeCountExperimentSet.DEFAULT_EDGE_COUNTS raw_scores = np.zeros((len(edge_counts), num_iters, NUM_NODES, NUM_NODES)) means = np.zeros((len(edge_counts), num_iters)) # variances = np.zeros((len(edge_counts), num_iters)) for i, count in enumerate(edge_counts): for j in xrange(num_iters): g = TrustGraph(NUM_NODES, 'uniform', 'uniform', count, 'noisy', 30) at = g.agent_types scores = tm.max_flow(g) raw_scores[i, j] = scores # corrs = [stats.spearmanr(at, score)[0] for score in scores] # means[i, j] = np.mean(corrs) means[i, j] = e.compute_informativeness(at, scores, False) # variances[i, j] = np.var(corrs) # variances[i, j] = np.mean( # [np.var([x for x in score if x is not None]) # for score in scores]) # for s in scores: # plt.plot(count, np.var([x for x in score if x is not None]), # 'o', alpha=0.2) # plt.suptitle('Variances of personalized max flow scores ' # 'against graph density (n = %d)' % num_iters) # plt.ylabel('Variance of max flow scores') # plt.xlabel('Number of edges per node') # plt.xticks(edge_counts, edge_counts) # plt.margins(0.07) # plt.show() means = means.mean(axis=1) # variances = variances.mean(axis=1) # Plot Means plt.plot(edge_counts, means, 'o--') plt.suptitle('Mean Spearman correlation of personalized max flow ' 'against graph density (n = %d)' % num_iters) plt.ylabel('Average Spearman correlation') plt.xlabel('Number of edges per node') plt.xticks(edge_counts, edge_counts) plt.margins(0.07) plt.show() # Plot Variances # plt.plot(edge_counts, variances, 'o--') # plt.suptitle('Variance of personalized max flow scores ' # 'against graph density (n = %d)' % num_iters) # plt.ylabel('Average variance of personalized max flow scores') # plt.xlabel('Number of edges per node') # plt.xticks(edge_counts, edge_counts) # plt.margins(0.07) # plt.show() return raw_scores
def time_experiment(num_iters, smc_walks=5): graphs = [[ TrustGraph(num_nodes, 'uniform', 'uniform', num_nodes / 2, 'sample', 32) for _ in xrange(num_iters) ] for num_nodes in NODE_COUNTS] mat_times = np.zeros((len(NODE_COUNTS), num_iters)) smc_times = np.zeros((len(NODE_COUNTS), num_iters)) corrs = np.zeros((len(NODE_COUNTS), num_iters)) for i, graph_set in enumerate(graphs): for j, g in enumerate(graph_set): start_time = time.clock() ht_mat = m.personalized_LS_step_length_ht(g) mat_times[i, j] = time.clock() - start_time print '[%d]: mat took %.2f secs' % (len(g), mat_times[i, j]) start_time = time.clock() ht_smc = smc.complete_path_smc_hitting_time(g, num_walks=(len(g) * smc_walks)) smc_times[i, j] = time.clock() - start_time print '[%d]: SMC took %.2f secs' % (len(g), smc_times[i, j]) # Average of all the row correlations corrs[i, j] = np.mean([ stats.spearmanr(ht_mat[k, :], ht_smc[k, :])[0] for k in xrange(len(ht_smc)) ]) avg_mat_times = np.mean(mat_times, axis=1) avg_smc_times = np.mean(smc_times, axis=1) avg_corrs = np.mean(corrs, axis=1) # Plot Timings plt.plot(NODE_COUNTS, avg_mat_times, '--^', label='Matrix Algebra') plt.plot(NODE_COUNTS, avg_smc_times, '--^', label='Monte Carlo') plt.xticks(NODE_COUNTS, NODE_COUNTS) plt.suptitle('Timings of Matrix methods vs. Monte Carlo Methods ' '(%d MC walks, %d iters)' % (smc_walks, num_iters)) plt.xlabel('Number of nodes') plt.ylabel('Time (sec)') plt.legend(loc='best') plt.margins(0.07) plt.show() # Plot Correlations plt.plot(NODE_COUNTS, avg_corrs, '--^') plt.xticks(NODE_COUNTS, NODE_COUNTS) plt.suptitle('Spearman Correlation between Matrix and Monte Carlo Methods ' '(%d MC walks, %d iters)' % (smc_walks, num_iters)) plt.xlabel('Number of nodes') plt.ylabel('Spearman Correlation') plt.margins(0.07) plt.show()
def edge_weight_convergence(agent_type_prior, edge_weight_strategy, samples=[1, 2, 4, 8, 16, 32, 64, 128], num_draws=1000, agent_types=[0.25, 0.75]): if agent_type_prior not in TrustGraph.AGENT_TYPE_PRIORS: raise ValueError("Invalid agent type prior") if edge_weight_strategy not in TrustGraph.EDGE_WEIGHT_STRATEGIES: raise ValueError("Invalid edge weight strategy") sample_draws = [ [TrustGraph._sampled_edge_weight( agent_types, edge_weight_strategy, agent_type_prior, s, 0, 1) for _ in xrange(num_draws)] for s in samples] draw_means = [np.mean(draws) for draws in sample_draws] draw_error = [1.96 * np.std(draws) for draws in sample_draws] expected_weight = TrustGraph._expected_edge_weight( agent_types, edge_weight_strategy, agent_type_prior, 0, 1) true_weight = agent_types[1] log_samples = [math.log(s, 2) for s in samples] # Draw the graph plt.errorbar(log_samples, draw_means, draw_error, fmt='rs', label='Sampled weights') plt.axhline(expected_weight, color='m', linestyle=':', label='Expected weight') plt.axhline(true_weight, color='b', linestyle='--', label='Correct weight') plt.suptitle('Convergence of sampled edge weights (%d draws)\n' "'%s' agent prior type, '%s' edge weight strategy" % (num_draws, agent_type_prior, edge_weight_strategy)) plt.xlabel('log(Number of samples)') plt.ylabel('Edge weight (95% Confidence Intervals)') plt.xticks(log_samples, samples) plt.margins(0.07) plt.legend(loc='best') plt.show()
def __init__(self, num_nodes, agent_type_prior, edge_strategy, edges_per_node, edge_weight_strategy, num_weight_samples): """ Args: (These are exactly the same as TrustGraph. Please refer there for details) """ self.graph = TrustGraph(num_nodes, agent_type_prior, edge_strategy, edges_per_node, edge_weight_strategy, num_weight_samples) self.global_ttms = defaultdict(dict) self.personalized_ttms = defaultdict(dict) self.info_scores = defaultdict(dict) self.runtimes = dict()
def efficiency_by_sybil_pct(num_iters, num_strategic=None, sybil_pcts=None, cutlinks=True, gensybils=True): if not sybil_pcts: sybil_pcts = DEFAULT_SYBIL_PCTS if num_strategic is None: num_strategic = NUM_NODES / 2 graphs = [[ TrustGraph(NUM_NODES, 'uniform', 'uniform', NUM_EDGES, 'noisy', NUM_SAMPLES) for _ in xrange(num_iters) ] for _ in xrange(len(sybil_pcts))] informativeness = {n: np.zeros(len(sybil_pcts)) for n in NAMES} efficiency = {n: np.zeros(len(sybil_pcts)) for n in NAMES} for i, sybil_pct in enumerate(sybil_pcts): for is_global, name, func in MECHANISMS: info, eff = np.zeros(num_iters), np.zeros(num_iters) for j in xrange(num_iters): g = graphs[i][j] start_time = time.clock() scores = func(g, num_strategic, sybil_pct, cutlinks, gensybils) info[j] = compute_informativeness(g.agent_types, scores, is_global) eff[j] = compute_efficiency(g.agent_types, scores, is_global) total_time = time.clock() - start_time print '%s took %.2f secs' % (name, total_time) informativeness[name][i] = info.mean() efficiency[name][i] = eff.mean() return { 'info': informativeness, 'eff': efficiency, 'xticks': sybil_pcts, 'xlabel': '% Sybils created per strategic agent', 'subtitle': '%d nodes, %d edges/node, %d strategic, (%d iters)' % (NUM_NODES, NUM_EDGES, num_strategic, num_iters) }
def pagerank_outedge_robustness(): """ Examine how much PageRank varies as we remove outedges for a node. """ g = TrustGraph(NUM_NODES, 'beta', 'cluster', NUM_EDGES, 'sample', 50) node = 0 # Arbitrarily picked, WLOG edges = g.out_edges(node, data=True) raw_prs = [[] for _ in xrange(NUM_EDGES + 1)] for e in xrange(NUM_EDGES + 1): sys.stdout.write('.') for _ in xrange(NUM_ITERS): g.remove_edges_from(g.out_edges(node)) # Remove all outedges g.add_edges_from(random.sample(edges, e)) # Add in random subset raw_prs[e].append(nx.pagerank_numpy(g)[node]) prs = np.mean(np.array(raw_prs, dtype=float), axis=1) plt.plot(prs) plt.suptitle('PageRank robustness: PageRank score as outedges increases') plt.xlabel('Number of outedges for node 0') plt.ylabel('PageRank score for node 0')
def efficiency_by_strategic_counts(num_iters, strategic_counts=None, cutlinks=True, gensybils=True): """ 1. Compute scores under manipulations. 2. Compute informativeness WRT % of strategic agents. 3. Compute efficiency WRT % of strategic agents. """ if not strategic_counts: strategic_counts = DEFAULT_STRATEGIC_COUNTS graphs = [[ TrustGraph(NUM_NODES, 'uniform', 'uniform', NUM_EDGES, 'noisy', NUM_SAMPLES) for _ in xrange(num_iters) ] for _ in xrange(len(strategic_counts))] informativeness = {n: np.zeros(len(strategic_counts)) for n in NAMES} efficiency = {n: np.zeros(len(strategic_counts)) for n in NAMES} for i, num_strategic in enumerate(strategic_counts): for is_global, name, func in MECHANISMS: info, eff = np.zeros(num_iters), np.zeros(num_iters) for j in xrange(num_iters): g = graphs[i][j] scores = func(g, num_strategic, SYBIL_PCT, cutlinks, gensybils) info[j] = compute_informativeness(g.agent_types, scores, is_global) eff[j] = compute_efficiency(g.agent_types, scores, is_global) informativeness[name][i] = info.mean() efficiency[name][i] = eff.mean() return { 'info': informativeness, 'eff': efficiency, 'xticks': strategic_counts, 'xlabel': 'Number of Strategic Agents', 'subtitle': '%d nodes, %d edges/node, %d%% sybils (%d iters)' % (NUM_NODES, NUM_EDGES, int(100 * SYBIL_PCT), num_iters) }
def efficiency_by_edge_count(num_iters, edge_counts, num_strategic, sybil_pct, cutlinks=True, gensybils=True): graphs = [[ TrustGraph(NUM_NODES, 'uniform', 'uniform', e, 'noisy', NUM_SAMPLES) for _ in xrange(num_iters) ] for e in edge_counts] informativeness = {n: np.zeros(len(edge_counts)) for n in NAMES} efficiency = {n: np.zeros(len(edge_counts)) for n in NAMES} for i, _ in enumerate(edge_counts): for is_global, name, func in MECHANISMS: info, eff = np.zeros(num_iters), np.zeros(num_iters) for j in xrange(num_iters): g = graphs[i][j] scores = func(g, num_strategic, sybil_pct, cutlinks, gensybils) info[j] = compute_informativeness(g.agent_types, scores, is_global) eff[j] = compute_efficiency(g.agent_types, scores, is_global) informativeness[name][i] = info.mean() efficiency[name][i] = eff.mean() return { 'info': informativeness, 'eff': efficiency, 'xticks': edge_counts, 'xlabel': 'Number of edges per node', 'subtitle': '%d nodes, %d strategic, %d%% sybils (%d iters)' % (NUM_NODES, num_strategic, int(100 * sybil_pct), num_iters) }