def rwrw_size_estimate(graph, n_samples=-1, walk_length=200, thinning=40): # determine the number of samples if n_samples == -1: n_samples = graph.size() * 2 samples = RWRW(graph, n_samples, length=walk_length, thinning=thinning) node_degrees = [graph.degree(node) for node in samples] sum_of_degrees = sum(node_degrees) sum_of_inverse_degrees = sum(graphsize.inverse_seq(node_degrees)) collisions = graphsize.collision_count(samples) return graphsize.estimate_size(sum_of_degrees, sum_of_inverse_degrees, collisions)
def wis_wr_size_estimate(graph, n_samples=-1): # determine the number of samples if n_samples == -1: n_samples = graph.size() * 2 samples = graphsize.WIS_WR(graphsize.degree_weighted_nodes_for(graph), n_samples) node_degrees = [graph.degree(node) for node in samples] sum_of_degrees = sum(node_degrees) sum_of_inverse_degrees = sum(graphsize.inverse_seq(node_degrees)) collisions = graphsize.collision_count(samples) return graphsize.estimate_size(sum_of_degrees, sum_of_inverse_degrees, collisions)
def estimate_size_with_mhrw(graph, n_samples=-1, thinning=1, random_walk_length=20): # determine the number of samples if n_samples == -1: n_samples = graph.size() * 4 #sample the graph and process the results node_samples = MHRW(graph, graph.nodes(), n_samples, length=random_walk_length, thinning=thinning) degrees = [graph.degree(node) for node in node_samples] sum_of_degrees = sum(degrees) sum_of_inverse_degrees = sum(graphsize.inverse_seq(degrees)) collisions = graphsize.collision_count(node_samples) print 'Sum of degrees: ', sum_of_degrees print 'Sum of inverse degrees: ', sum_of_inverse_degrees print 'Repeated samples: ', collisions return graphsize.estimate_size(sum_of_degrees, sum_of_inverse_degrees, collisions)
def reweighted_sample(samples, graph): degrees = [graph.degree(node) for node in samples] sum_of_inverse_degrees = sum(graphsize.inverse_seq(degrees)) num_samples = len(samples) binned_samples = graphsize.bin_samples(samples) # print "original sample bins", binned_samples # reuse the WIS_WR probability distribution sampling method # create a tuple list of nodes and weights for each bin element # i.e. [(1, 0.2), (3, 0.4), (2, 0.5) ...} hh_weighted_nodes = [ (node, graphsize.hh_node_weight(graph.degree(node), occurrences, sum_of_inverse_degrees) ) for [node, occurrences] in binned_samples ] new_samples = graphsize.WIS_WR(hh_weighted_nodes, num_samples) # print "reweighted sample bins", graphsize.bin_samples(new_samples) return new_samples
def test_inverse_seq(self): self.assertEqual([1, 2, 4, 1], graphsize.inverse_seq([1, 0.5, 0.25, 1]))