def edge_weight_convergence(agent_type_prior,
                            edge_weight_strategy,
                            samples=[1, 2, 4, 8, 16, 32, 64, 128],
                            num_draws=1000,
                            agent_types=[0.25, 0.75]):
    if agent_type_prior not in TrustGraph.AGENT_TYPE_PRIORS:
        raise ValueError("Invalid agent type prior")
    if edge_weight_strategy not in TrustGraph.EDGE_WEIGHT_STRATEGIES:
        raise ValueError("Invalid edge weight strategy")

    sample_draws = [[
        TrustGraph._sampled_edge_weight(agent_types, edge_weight_strategy,
                                        agent_type_prior, s, 0, 1)
        for _ in xrange(num_draws)
    ] for s in samples]

    draw_means = [np.mean(draws) for draws in sample_draws]
    draw_error = [1.96 * np.std(draws) for draws in sample_draws]
    expected_weight = TrustGraph._expected_edge_weight(agent_types,
                                                       edge_weight_strategy,
                                                       agent_type_prior, 0, 1)
    true_weight = agent_types[1]
    log_samples = [math.log(s, 2) for s in samples]

    # Draw the graph
    plt.errorbar(log_samples,
                 draw_means,
                 draw_error,
                 fmt='rs',
                 label='Sampled weights')
    plt.axhline(expected_weight,
                color='m',
                linestyle=':',
                label='Expected weight')
    plt.axhline(true_weight, color='b', linestyle='--', label='Correct weight')

    plt.suptitle('Convergence of sampled edge weights (%d draws)\n'
                 "'%s' agent prior type, '%s' edge weight strategy" %
                 (num_draws, agent_type_prior, edge_weight_strategy))
    plt.xlabel('log(Number of samples)')
    plt.ylabel('Edge weight (95% Confidence Intervals)')

    plt.xticks(log_samples, samples)
    plt.margins(0.07)
    plt.legend(loc='best')

    plt.show()
Esempio n. 2
0
def max_flow_variance(num_iters=5, edge_counts=None):
    if not edge_counts:
        edge_counts = EdgeCountExperimentSet.DEFAULT_EDGE_COUNTS

    raw_scores = np.zeros((len(edge_counts), num_iters, NUM_NODES, NUM_NODES))
    means = np.zeros((len(edge_counts), num_iters))
    # variances = np.zeros((len(edge_counts), num_iters))
    for i, count in enumerate(edge_counts):
        for j in xrange(num_iters):
            g = TrustGraph(NUM_NODES, 'uniform', 'uniform', count, 'noisy', 30)
            at = g.agent_types
            scores = tm.max_flow(g)
            raw_scores[i, j] = scores
            # corrs = [stats.spearmanr(at, score)[0] for score in scores]
            # means[i, j] = np.mean(corrs)
            means[i, j] = e.compute_informativeness(at, scores, False)
            # variances[i, j] = np.var(corrs)
            # variances[i, j] = np.mean(
            # [np.var([x for x in score if x is not None])
            # for score in scores])

            # for s in scores:
            # plt.plot(count, np.var([x for x in score if x is not None]),
            # 'o', alpha=0.2)

    # plt.suptitle('Variances of personalized max flow scores '
    # 'against graph density (n = %d)' % num_iters)
    # plt.ylabel('Variance of max flow scores')
    # plt.xlabel('Number of edges per node')
    # plt.xticks(edge_counts, edge_counts)
    # plt.margins(0.07)
    # plt.show()

    means = means.mean(axis=1)
    # variances = variances.mean(axis=1)

    # Plot Means
    plt.plot(edge_counts, means, 'o--')
    plt.suptitle('Mean Spearman correlation of personalized max flow '
                 'against graph density (n = %d)' % num_iters)
    plt.ylabel('Average Spearman correlation')
    plt.xlabel('Number of edges per node')
    plt.xticks(edge_counts, edge_counts)
    plt.margins(0.07)
    plt.show()

    # Plot Variances
    # plt.plot(edge_counts, variances, 'o--')
    # plt.suptitle('Variance of personalized max flow scores '
    # 'against graph density (n = %d)' % num_iters)
    # plt.ylabel('Average variance of personalized max flow scores')
    # plt.xlabel('Number of edges per node')
    # plt.xticks(edge_counts, edge_counts)
    # plt.margins(0.07)
    # plt.show()

    return raw_scores
def time_experiment(num_iters, smc_walks=5):
    graphs = [[
        TrustGraph(num_nodes, 'uniform', 'uniform', num_nodes / 2, 'sample',
                   32) for _ in xrange(num_iters)
    ] for num_nodes in NODE_COUNTS]
    mat_times = np.zeros((len(NODE_COUNTS), num_iters))
    smc_times = np.zeros((len(NODE_COUNTS), num_iters))
    corrs = np.zeros((len(NODE_COUNTS), num_iters))

    for i, graph_set in enumerate(graphs):
        for j, g in enumerate(graph_set):
            start_time = time.clock()
            ht_mat = m.personalized_LS_step_length_ht(g)
            mat_times[i, j] = time.clock() - start_time
            print '[%d]: mat took %.2f secs' % (len(g), mat_times[i, j])

            start_time = time.clock()
            ht_smc = smc.complete_path_smc_hitting_time(g,
                                                        num_walks=(len(g) *
                                                                   smc_walks))
            smc_times[i, j] = time.clock() - start_time
            print '[%d]: SMC took %.2f secs' % (len(g), smc_times[i, j])

            # Average of all the row correlations
            corrs[i, j] = np.mean([
                stats.spearmanr(ht_mat[k, :], ht_smc[k, :])[0]
                for k in xrange(len(ht_smc))
            ])

    avg_mat_times = np.mean(mat_times, axis=1)
    avg_smc_times = np.mean(smc_times, axis=1)
    avg_corrs = np.mean(corrs, axis=1)

    # Plot Timings
    plt.plot(NODE_COUNTS, avg_mat_times, '--^', label='Matrix Algebra')
    plt.plot(NODE_COUNTS, avg_smc_times, '--^', label='Monte Carlo')
    plt.xticks(NODE_COUNTS, NODE_COUNTS)

    plt.suptitle('Timings of Matrix methods vs. Monte Carlo Methods '
                 '(%d MC walks, %d iters)' % (smc_walks, num_iters))
    plt.xlabel('Number of nodes')
    plt.ylabel('Time (sec)')
    plt.legend(loc='best')
    plt.margins(0.07)
    plt.show()

    # Plot Correlations
    plt.plot(NODE_COUNTS, avg_corrs, '--^')
    plt.xticks(NODE_COUNTS, NODE_COUNTS)

    plt.suptitle('Spearman Correlation between Matrix and Monte Carlo Methods '
                 '(%d MC walks, %d iters)' % (smc_walks, num_iters))
    plt.xlabel('Number of nodes')
    plt.ylabel('Spearman Correlation')
    plt.margins(0.07)
    plt.show()
def edge_weight_convergence(agent_type_prior, edge_weight_strategy,
                            samples=[1, 2, 4, 8, 16, 32, 64, 128],
                            num_draws=1000, agent_types=[0.25, 0.75]):
    if agent_type_prior not in TrustGraph.AGENT_TYPE_PRIORS:
        raise ValueError("Invalid agent type prior")
    if edge_weight_strategy not in TrustGraph.EDGE_WEIGHT_STRATEGIES:
        raise ValueError("Invalid edge weight strategy")

    sample_draws = [
        [TrustGraph._sampled_edge_weight(
            agent_types, edge_weight_strategy, agent_type_prior, s, 0, 1)
         for _ in xrange(num_draws)]
        for s in samples]

    draw_means = [np.mean(draws) for draws in sample_draws]
    draw_error = [1.96 * np.std(draws) for draws in sample_draws]
    expected_weight = TrustGraph._expected_edge_weight(
        agent_types, edge_weight_strategy, agent_type_prior, 0, 1)
    true_weight = agent_types[1]
    log_samples = [math.log(s, 2) for s in samples]

    # Draw the graph
    plt.errorbar(log_samples, draw_means, draw_error, fmt='rs',
                 label='Sampled weights')
    plt.axhline(expected_weight, color='m', linestyle=':',
                label='Expected weight')
    plt.axhline(true_weight, color='b', linestyle='--',
                label='Correct weight')

    plt.suptitle('Convergence of sampled edge weights (%d draws)\n'
                 "'%s' agent prior type, '%s' edge weight strategy" %
                 (num_draws, agent_type_prior, edge_weight_strategy))
    plt.xlabel('log(Number of samples)')
    plt.ylabel('Edge weight (95% Confidence Intervals)')

    plt.xticks(log_samples, samples)
    plt.margins(0.07)
    plt.legend(loc='best')

    plt.show()
Esempio n. 5
0
    def __init__(self, num_nodes, agent_type_prior, edge_strategy,
                 edges_per_node, edge_weight_strategy, num_weight_samples):
        """
        Args:
            (These are exactly the same as TrustGraph. Please refer there for
            details)
        """
        self.graph = TrustGraph(num_nodes, agent_type_prior, edge_strategy,
                                edges_per_node, edge_weight_strategy,
                                num_weight_samples)

        self.global_ttms = defaultdict(dict)
        self.personalized_ttms = defaultdict(dict)
        self.info_scores = defaultdict(dict)
        self.runtimes = dict()
Esempio n. 6
0
def efficiency_by_sybil_pct(num_iters,
                            num_strategic=None,
                            sybil_pcts=None,
                            cutlinks=True,
                            gensybils=True):
    if not sybil_pcts:
        sybil_pcts = DEFAULT_SYBIL_PCTS
    if num_strategic is None:
        num_strategic = NUM_NODES / 2
    graphs = [[
        TrustGraph(NUM_NODES, 'uniform', 'uniform', NUM_EDGES, 'noisy',
                   NUM_SAMPLES) for _ in xrange(num_iters)
    ] for _ in xrange(len(sybil_pcts))]
    informativeness = {n: np.zeros(len(sybil_pcts)) for n in NAMES}
    efficiency = {n: np.zeros(len(sybil_pcts)) for n in NAMES}

    for i, sybil_pct in enumerate(sybil_pcts):
        for is_global, name, func in MECHANISMS:
            info, eff = np.zeros(num_iters), np.zeros(num_iters)
            for j in xrange(num_iters):
                g = graphs[i][j]

                start_time = time.clock()

                scores = func(g, num_strategic, sybil_pct, cutlinks, gensybils)
                info[j] = compute_informativeness(g.agent_types, scores,
                                                  is_global)
                eff[j] = compute_efficiency(g.agent_types, scores, is_global)

                total_time = time.clock() - start_time
                print '%s took %.2f secs' % (name, total_time)
            informativeness[name][i] = info.mean()
            efficiency[name][i] = eff.mean()

    return {
        'info':
        informativeness,
        'eff':
        efficiency,
        'xticks':
        sybil_pcts,
        'xlabel':
        '% Sybils created per strategic agent',
        'subtitle':
        '%d nodes, %d edges/node, %d strategic, (%d iters)' %
        (NUM_NODES, NUM_EDGES, num_strategic, num_iters)
    }
Esempio n. 7
0
def pagerank_outedge_robustness():
    """ Examine how much PageRank varies as we remove outedges for a node. """
    g = TrustGraph(NUM_NODES, 'beta', 'cluster', NUM_EDGES, 'sample', 50)
    node = 0  # Arbitrarily picked, WLOG
    edges = g.out_edges(node, data=True)

    raw_prs = [[] for _ in xrange(NUM_EDGES + 1)]
    for e in xrange(NUM_EDGES + 1):
        sys.stdout.write('.')
        for _ in xrange(NUM_ITERS):
            g.remove_edges_from(g.out_edges(node))  # Remove all outedges
            g.add_edges_from(random.sample(edges, e))  # Add in random subset
            raw_prs[e].append(nx.pagerank_numpy(g)[node])
    prs = np.mean(np.array(raw_prs, dtype=float), axis=1)

    plt.plot(prs)
    plt.suptitle('PageRank robustness: PageRank score as outedges increases')
    plt.xlabel('Number of outedges for node 0')
    plt.ylabel('PageRank score for node 0')
Esempio n. 8
0
def efficiency_by_strategic_counts(num_iters,
                                   strategic_counts=None,
                                   cutlinks=True,
                                   gensybils=True):
    """
    1. Compute scores under manipulations.
    2. Compute informativeness WRT % of strategic agents.
    3. Compute efficiency WRT % of strategic agents.
    """
    if not strategic_counts:
        strategic_counts = DEFAULT_STRATEGIC_COUNTS
    graphs = [[
        TrustGraph(NUM_NODES, 'uniform', 'uniform', NUM_EDGES, 'noisy',
                   NUM_SAMPLES) for _ in xrange(num_iters)
    ] for _ in xrange(len(strategic_counts))]
    informativeness = {n: np.zeros(len(strategic_counts)) for n in NAMES}
    efficiency = {n: np.zeros(len(strategic_counts)) for n in NAMES}

    for i, num_strategic in enumerate(strategic_counts):
        for is_global, name, func in MECHANISMS:
            info, eff = np.zeros(num_iters), np.zeros(num_iters)
            for j in xrange(num_iters):
                g = graphs[i][j]
                scores = func(g, num_strategic, SYBIL_PCT, cutlinks, gensybils)
                info[j] = compute_informativeness(g.agent_types, scores,
                                                  is_global)
                eff[j] = compute_efficiency(g.agent_types, scores, is_global)
            informativeness[name][i] = info.mean()
            efficiency[name][i] = eff.mean()

    return {
        'info':
        informativeness,
        'eff':
        efficiency,
        'xticks':
        strategic_counts,
        'xlabel':
        'Number of Strategic Agents',
        'subtitle':
        '%d nodes, %d edges/node, %d%% sybils (%d iters)' %
        (NUM_NODES, NUM_EDGES, int(100 * SYBIL_PCT), num_iters)
    }
def pagerank_outedge_robustness():
    """ Examine how much PageRank varies as we remove outedges for a node. """
    g = TrustGraph(NUM_NODES, 'beta', 'cluster', NUM_EDGES, 'sample', 50)
    node = 0  # Arbitrarily picked, WLOG
    edges = g.out_edges(node, data=True)

    raw_prs = [[] for _ in xrange(NUM_EDGES + 1)]
    for e in xrange(NUM_EDGES + 1):
        sys.stdout.write('.')
        for _ in xrange(NUM_ITERS):
            g.remove_edges_from(g.out_edges(node))  # Remove all outedges
            g.add_edges_from(random.sample(edges, e))  # Add in random subset
            raw_prs[e].append(nx.pagerank_numpy(g)[node])
    prs = np.mean(np.array(raw_prs, dtype=float), axis=1)

    plt.plot(prs)
    plt.suptitle('PageRank robustness: PageRank score as outedges increases')
    plt.xlabel('Number of outedges for node 0')
    plt.ylabel('PageRank score for node 0')
Esempio n. 10
0
def efficiency_by_edge_count(num_iters,
                             edge_counts,
                             num_strategic,
                             sybil_pct,
                             cutlinks=True,
                             gensybils=True):
    graphs = [[
        TrustGraph(NUM_NODES, 'uniform', 'uniform', e, 'noisy', NUM_SAMPLES)
        for _ in xrange(num_iters)
    ] for e in edge_counts]
    informativeness = {n: np.zeros(len(edge_counts)) for n in NAMES}
    efficiency = {n: np.zeros(len(edge_counts)) for n in NAMES}

    for i, _ in enumerate(edge_counts):
        for is_global, name, func in MECHANISMS:
            info, eff = np.zeros(num_iters), np.zeros(num_iters)
            for j in xrange(num_iters):
                g = graphs[i][j]
                scores = func(g, num_strategic, sybil_pct, cutlinks, gensybils)
                info[j] = compute_informativeness(g.agent_types, scores,
                                                  is_global)
                eff[j] = compute_efficiency(g.agent_types, scores, is_global)
            informativeness[name][i] = info.mean()
            efficiency[name][i] = eff.mean()

    return {
        'info':
        informativeness,
        'eff':
        efficiency,
        'xticks':
        edge_counts,
        'xlabel':
        'Number of edges per node',
        'subtitle':
        '%d nodes, %d strategic, %d%% sybils (%d iters)' %
        (NUM_NODES, num_strategic, int(100 * sybil_pct), num_iters)
    }