def dpa(n, m): assert 1 <= m <= n digraph = pro_1.make_complete_graph(m) # we first make a complete directed graph as we did in QUESTION 2, without the probability factor dpa_obj = DPA.DPATrial(m) # we then create a base for DPA with 'm' number of nodes and probability that goes with it for i in range(m, n): # now we create the remaining part of the graph to get to 'n' elements digraph[i] = dpa_obj.run_trial(m) # each time we call 'run_trial' we use the constant amount of new edges 'm' choosing randomly from the existing elements therefore the initial nodes (0-m) will be cited the most return digraph # this will represent the 'rich gets richer' distribution where the initial, most visible elements are choosen much more often
def random_DPA_directed_graph(num_nodes, m_nodes): """ Function uses the Directed Preferential Attachment algoritm (DPA algorithm) to create a random directed graph. The DPA algorithm sets edges based on a preferential attachment mechanism giving more edges to nodes created earlier in the simulation. Args: num_nodes: integer, the numbe of nodes to be in the graph. m_nodes: integer, the number of existing nodes to which a new node is connected during each iteration. Returns: A dictionary representation of a directed graph where keys are node names and values are sets of out degree edges. """ # Create an instance of DPATrial dpa_obj = alg_dpa_trial.DPATrial(m_nodes) # Make a complete digraph with m nodes and add it to the final output graph = make_complete_graph(m_nodes) for node in range(m_nodes, num_nodes): neighbors = dpa_obj.run_trial(m_nodes) graph[node] = neighbors return graph
def dpa_graph(num_nodes, m): """ Generate a random directed graph num_nodes, m (m≤n), which is the number of existing nodes to which a new node is connected during each iteration. Notice that m is fixed throughout the procedure. Then, the algorithm grows the graph by adding n−m nodes, where each new node is connected to m nodes randomly chosen from the set of existing nodes. As an existing node may be chosen more than once in an iteration, we eliminate duplicates (to avoid parallel edges); hence, the new node may be connected to fewer than m existing nodes upon its addition. For this question, we will choose values for n and m that yield a DPA graph whose number of nodes and edges is roughly the same to those of the citation graph. """ #Generate a random directed graph num_nodes, m (m≤n) digraph = make_complete_directed_graph(m) graph = alg_dpa_trial.DPATrial(m) for dummy in range(m, num_nodes): digraph[dummy] = graph.run_trial(m) return digraph # citation_graph = load_graph(CITATION_URL) # m =define_m(citation_graph) # m = 13 # print(in_degree_distribution(EX_GRAPH1)) # print(make_complete_graph(3)) # print(compute_in_degrees(EX_GRAPH1)) # num_nodes = 27770 # p = .3 # num_nodes = 10 # digraph = er_graph(num_nodes, p) # digraph = dpa_graph(num_nodes, m) # print(digraph) # normalized_distribution(citation_graph)
def dpa(n_nodes, m_nodes): ''' Helper function to implement the DPA algorithm for Question 4 ''' graph = project1.make_complete_graph(m_nodes) dpa_alg = alg_dpa_trial.DPATrial(m_nodes) for node in range(m_nodes, n_nodes): graph[node] = dpa_alg.run_trial(m_nodes) return graph
def make_complete_graph(num_nodes): """ Takes the number of nodes num_nodes and returns a dictionary corresponding to a complete directed graph with the specified number of nodes. """ graph = {} dpa = alg.DPATrial(num_nodes) for i in range(num_nodes): graph[i]= dpa.run_trial(num_nodes) return graph
def dpa_graph(n, m): # step 1: make a complete graph with m nodes graph_dic = make_complete_graph(m) graph = alg.DPATrial(m) # step 2: add to graph from m to n nodes, one node per iteration # for each iteration, add m out-degree to each new node for i in range(m, n): graph_dic[i] = graph.run_trial(m) return graph_dic
def DPA(amount_vertex, start_vertex): graph = make_complete_graph(start_vertex) dpa_object = alg_dpa_trial.DPATrial(start_vertex) for offset in range(start_vertex, amount_vertex): graph[offset] = dpa_object.run_trial(start_vertex) return graph
def algorithm_dpa(n, m): graph = project1.make_complete_graph(m) dpa = alg_dpa_trial.DPATrial(m) for i in range(m, n): graph[i] = dpa.run_trial(m) return graph
citation_graph = alg_load_graph.load_graph(CITATION_URL) cite_dist = in_degree_distribution(citation_graph) """ Question 3 """ all_vertex = citation_graph.keys() out_degree = 0 for dummy_vertex in citation_graph.keys(): out_degree += len(citation_graph[dummy_vertex]) ave_out = out_degree/27770.0 print ave_out """ Question 4 """ m = 13 n = 27770 dpa_graph = make_complete_graph(m) trial = alg_dpa_trial.DPATrial(m) for index in range(m,n): nbd = trial.run_trial(m) dpa_graph[index] = nbd dpa_dist = in_degree_distribution(dpa_graph)
def run_suite(): """ Some informal testing code """ print("\nSTARTING TESTS:") suite = poc_simpletest.TestSuite() # create a TestSuite object # 1. check the basic functions directly suite.run_test( app_1.normalized_distribution({ 0: set([1, 2]), 1: set([]), 2: set([]) }), { 0: 0.3333333333333333, 1: 0.6666666666666666 }, "Test #1a: 'normalized_distribution' method") suite.run_test( app_1.normalized_distribution({ 0: set([1]), 1: set([2]), 2: set([1]) }), { 0: 0.3333333333333333, 1: 0.3333333333333333, 2: 0.3333333333333333 }, "Test #1b: 'normalized_distribution' method") suite.run_test( app_1.normalized_distribution({ 0: set([1]), 1: set([0, 2]), 2: set([0, 1]) }), { 1: 0.3333333333333333, 2: 0.6666666666666666 }, "Test #1c: 'normalized_distribution' method") # 2. check the basic functions directly suite.run_test(app_1.random_digraph(3, 1), { 0: set([1, 2]), 1: set([0, 2]), 2: set([0, 1]) }, "Test #2a: 'random_digraph' - full probability" ) # this should be a COMPLETE GRAPH suite.run_test(app_1.random_digraph(4, 0), { 0: set([]), 1: set([]), 2: set([]), 3: set([]) }, "Test #2b: 'random_digraph' - zero probability" ) # this should be a graph with no edges # 3. check the basic functions directly suite.run_test( app_1.compute_edges({ 0: set([1, 2, 3]), 1: set([0, 2, 3]), 2: set([0, 1, 3]), 3: set([0, 1]) }), 11, "Test #3a: 'compute_edges' method") suite.run_test( app_1.compute_edges({ 0: set([1]), 1: set([2, 3]), 2: set([1]), 3: set([1]) }), 5, "Test #3b: 'compute_edges' method") suite.run_test( app_1.compute_edges({ 0: set([]), 1: set([]), 2: set([]), 3: set([]) }), 0, "Test #3c: 'compute_edges' method") # 4. Testing basic functionality of the DPA CLASS suite.run_test((dpa.DPATrial(3)._node_numbers), [0, 0, 0, 1, 1, 1, 2, 2, 2], "Test #4a: 'self._node_numbers' property") suite.run_test((dpa.DPATrial(4)._node_numbers), [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3], "Test #4b: 'self._node_numbers' property") # 5. check the DPA methods suite.run_test_in_range_multi( dpa.DPATrial(3).run_trial(2), [0, 1, 2], "Test #5a: 'run_trial' method") suite.run_test( len( app_1.dpa_stand_alone( { 0: set([3]), 1: set([0, 2]), 2: set([0, 1, 3]), 3: set([0, 1, 2]) }, 6, 4)), 6, "Test #5b: 'dpa_stand_alone' method") # 6. check the basic functions directly suite.run_test(app_1.merge_data({ 0: 2, 1: 2, 2: 2 }, { 0: 4, 1: 2, 2: 1 }), { 0: 0.5, 1: 1.0, 2: 2.0 }, "Test #6a: 'merge_data' property") # all keys in both dictionaries suite.run_test(app_1.merge_data({ 0: 2, 1: 2, 2: 2, 3: 2 }, { 0: 4, 2: 1 }), { 0: 0.5, 2: 2.0 }, "Test #6b: 'merge_data' property") # not all keys in both dictionaries # 7. report number of tests and failures suite.report_results()