def effect_of_group_sizes(self): ''' This generate the evaluation graphs for ii) varrying p_g_a ''' influenced_a_list = [] influenced_b_list = [] seeds_a_list = [] seeds_b_list = [] seed_list = [11223344, 11224433, 33112244, 22113344] for group_ratio in self.group_ratios: # group_ratio = 0.5 #0.7 # A loop here to run multiple times on 5 seeds # for seed in SEED_list: filename = '{self.filename}_{self.num_nodes}_{self.p_with}_{self.p_across}_{group_ratio}' # read in graph G = ut.load_graph(filename, self.p_with, self.p_across, group_ratio, self.num_nodes) influenced, influenced_a, influenced_b, seeds_a, seeds_b = self.calculate_greedy( filename, G) stats = ut.graph_stats(G, print_stats=True) influenced_a_list.append(influenced_a) influenced_b_list.append(influenced_b) seeds_a_list.append(seeds_a) seeds_b_list.append(seeds_b) print(" ******* Finished group size analysis *******") return (influenced_a_list, influenced_b_list, seeds_a_list, seeds_b_list)
def compare_with_greedy(self): ''' compares greedy with log [with different gammas] and root with different gammas ''' influenced_a_list = [] influenced_b_list = [] labels = [] filename = '{self.filename}_{self.num_nodes}_{self.p_with}_{self.p_across}_{self.group_ratio}' # self.G = ut.load_graph(filename, self.p_with, self.p_across, group_ratio ,self.num_nodes) stats = ut.graph_stats(self.G, print_stats=False) for t in self.types: if t == 1: gammas = [1.0] # , 1.1, 1.2, 1.3, 1.4, 1.5, 2.0, 2.5] elif t == 2: gammas = self.gammas_log elif t == 3: gammas = self.gammas_root for gamma in gammas: influenced, influenced_a, influenced_b, seeds_a, seeds_b = generalGreedy_node_parallel( filename, self.G, self.seed_size, gamma, type_algo=t) ut.plot_influence(influenced_a, influenced_b, self.seed_size, filename, stats['group_a'], stats['group_b'], [len(S_a) for S_a in seeds_a], [len(S_b) for S_b in seeds_b]) influenced_a_list.append(influenced_a) influenced_b_list.append(influenced_b) if t == 1: label = "Greedy" elif t == 2: label = 'Log_gamma{gamma}' elif t == 3: label = 'Root_gamma{gamma}' labels.append(label) filename = "results/greedy_log_root_" ut.plot_influence_diff(influenced_a_list, influenced_b_list, self.seed_size, labels, filename, stats['group_a'], stats['group_b'])
def effect_of_across_group_connectivity(self): ''' This generate the evaluation graphs for i) varrying p_across with p_g_a = 0.5 ''' # Have to do this for multiple runs, and or multiple graphs influenced_a_list = [] influenced_b_list = [] seeds_a_list = [] seeds_b_list = [] group_ratio = 0.5 # just to bring out the effect of p_across for p_across in self.p_acrosses: filename = '{self.filename}_{self.num_nodes}_{self.p_with}_{p_across}_{group_ratio}' # read in graph G = ut.load_graph(filename, self.p_with, p_across, group_ratio, self.num_nodes) influenced, influenced_a, influenced_b, seeds_a, seeds_b = self.calculate_greedy( filename) # stats = ut.graph_stats(G, print_stats=True) ut.plot_influence(influenced_a, influenced_b, self.seed_size, filename, stats['group_a'], stats['group_b'], [len(S_a) for S_a in seeds_a], [len(S_b) for S_b in seeds_b]) influenced_a_list.append(influenced_a) influenced_b_list.append(influenced_b) seeds_a_list.append(seeds_a) seeds_b_list.append(seeds_b) print(" ******* Finished connectivity analysis *******") return (influenced_a_list, influenced_b_list, seeds_a_list, seeds_b_list)
def generalGreedy_node_set_cover(filename, G, budget, h_l=0, color='all', seed_size_budget=14, gamma_a=1e-2, gamma_b=0, type_algo=1): ''' Finds initial seed set S using general greedy heuristic Input: G -- networkx Graph object k -- fraction of population needs to be influenced in all three groups p -- propagation probability Output: S -- initial set of k nodes to propagate ''' # import time # start = time.time() # R = 200 # number of times to run Random Cascade stats = ut.graph_stats(G, print_stats=False) if type_algo == 1: filename = filename + '_set_cover_reach_' + str(budget) elif type_algo == 2: filename = filename + '_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_b}_' elif type_algo == 3: filename = filename + '_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_a}_' reach = 0.0 S = [] # set of selected nodes # add node to S if achieves maximum propagation for current chosen + this node influenced = [] influenced_r = [] influenced_b = [] influenced_n = [] seeds_r = [] seeds_b = [] seeds_n = [] # try: # # influenced, influenced_r, influenced_b, influenced_n, seeds_r, seeds_b, seeds_n = ut.read_files(filename) # reach = min(influenced_r[-1] / stats['group_r'], budget) + min(influenced_b[-1] / stats['group_b'])+ min(influenced_n[-1] / stats['group_r'], budget) # S = seeds_r[-1] + seeds_b[-1]+ seeds_n[-1] # if reach >= budget: # # ut.write_files(filename,influenced, influenced_a, influenced_b, seeds_a, seeds_b) # print(influenced_r) # print("\n\n") # print(influenced_b) # print("\n\n") # print(influenced_n) # print(f" reach: {reach}") # ut.plot_influence(influenced_r, influenced_b, influenced_n, len(S), filename, stats['group_a'], stats['group_b'], stats['group_c'], # [len(S_a) for S_a in seeds_r], [len(S_b) for S_b in seeds_b], [len(S_c) for S_c in seeds_n]) # return (influenced, influenced_r, influenced_b, influenced_n, seeds_r, seeds_b, seeds_n) # # except FileNotFoundError: # print(f'{filename} not Found ') i = 0 S = [] while reach < 3 * budget: # while len(S) < seed_size_budget: # cannot parallellize pool = multiprocessing.Pool(multiprocessing.cpu_count() - 1) # pool = multiprocessing.Pool(1) # for v in G.nodes(): # results = pool.map(map_select_next_seed_set_cover, (G, S, v)) if type_algo == 1: # results = pool.map(map_select_next_seed_set_cover, ((G, S, v) for v in G.nodes())) # results = pool.starmap(map_select_next_seed_set_cover, zip(repeat(G), repeat(S), list(G.nodes()),repeat(h_l), repeat(color))) results = pool.map(map_select_next_seed_set_cover, ((G, S, v, h_l, color) for v in G.nodes())) elif type_algo == 2: results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_b) for v in G.nodes())) elif type_algo == 3: results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_a) for v in G.nodes())) pool.close() pool.join() s = PQ() # priority queue for v, p, p_a, p_b, p_c in results: # # s.add_task(v, -(min(p_a / stats['group_r'], budget) + min(p_b / stats['group_b'], budget))) s.add_task( v, -(min(p_a / stats['group_r'], budget) + min(p_b / stats['group_b'], budget) + min(p_b / stats['group_n'], budget))) node, priority = s.pop_item() # priority = -priority # as the current priority is negative fraction S.append(node) # results = map_select_next_seed_set_cover, ((G, S, v) for v in G.nodes()) I, I_a, I_b, I_c = map_fair_IC((G, S, h_l)) influenced.append(I) influenced_r.append(I_a) influenced_b.append(I_b) influenced_n.append(I_c) S_red = [] S_blue = [] S_purple = [] group = G.nodes[node]['color'] for n in S: if G.nodes[n]['color'] == 'red': S_red.append(n) elif G.nodes[n]['color'] == 'blue': S_blue.append(n) else: S_purple.append(n) seeds_r.append( S_red) # id's of the seeds so the influence can be recreated seeds_b.append(S_blue) seeds_n.append(S_purple) # reach += -priority both are fine reach_a = I_a / stats['group_r'] reach_b = I_b / stats['group_b'] reach_c = I_c / stats['group_n'] reach = (min(reach_a, budget) + min(reach_b, budget) + min(reach_c, budget)) print( str(i + 1) + ' Node ID ' + str(node) + ' group ' + str(group) + ' Ia = ' + str(I_a) + ' Ib ' + str(I_b) + ' Ic ' + str(I_c) + ' each: ' + str(reach) + ' reach_a ' + str(reach_a) + ' reach_b ' + str(reach_b) + ' reach_c ' + str(reach_c)) # print(i, k, time.time() - start) i += 1 # ut.plot_influence(influenced_r, influenced_b, influenced_n, len(S), filename, stats['group_r'], stats['group_b'], stats['group_n'], # [len(S_r) for S_r in seeds_r], [len(S_b) for S_b in seeds_b], [len(S_n) for S_n in seeds_n]) # ut.plot_influence_diff(influenced_r, influenced_b, influenced_n, len(S), ['Rep','Dem','Neut'], filename, # stats['group_r'], stats['group_b'], stats['group_n']) ut.write_files(filename, influenced, influenced_r, influenced_b, influenced_n, seeds_r, seeds_b, seeds_n) return (influenced, influenced_r, influenced_b, influenced_n, seeds_r, seeds_b, seeds_n)
def generalGreedy_node_parallel(filename, G, budget, h_l, gamma1, gamma2, beta1=1.0, beta2=1.0, type_algo=1): ''' Finds initial seed set S using general greedy heuristic Input: G -- networkx Graph object k -- number of initial nodes needed p -- propagation probability Output: S -- initial set of k nodes to propagate ''' # import time # start = time.time() # R = 200 # number of times to run Random Cascade S = [] # set of selected nodes influenced = [] influenced_a = [] influenced_b = [] influenced_c = [] seeds_a = [] seeds_b = [] seeds_c = [] seed_range = [] if type_algo == 1: filename = filename + '_greedy_' elif type_algo == 2: filename = filename + '_log_gamma_{gamma1,gamma2}_' elif type_algo == 3: filename = filename + '_root_gamma_{gamma1}_beta_{beta1,beta2}_' elif type_algo == 4: filename = filename + '_root_majority_gamma_{gamma1}_beta_{beta1,beta2}_' stats = ut.graph_stats(G, print_stats=False) try: influenced, influenced_a, influenced_b, influenced_c, seeds_a, seeds_b, seeds_c = ut.read_files( filename) S = seeds_a[-1] + seeds_b[-1] + seeds_c[-1] if len(S) >= budget: # ut.write_files(filename,influenced, influenced_a, influenced_b, seeds_a, seeds_b) print(influenced_a) print("\n\n") print(influenced_b) print("\n\n") print(influenced_c) print(" Seed length ", len(S)) ut.plot_influence(influenced_a, influenced_b, influenced_c, len(S), filename, stats['group_a'], stats['group_b'], stats['group_c'], [len(S_a) for S_a in seeds_a], [len(S_b) for S_b in seeds_b], [len(S_c) for S_c in seeds_c]) return (influenced, influenced_a, influenced_b, influenced_c, seeds_a, seeds_b, seeds_c) else: seed_range = range(budget - len(S)) except FileNotFoundError: print('{filename} not Found ') seed_range = range(budget) # add node to S if achieves maximum propagation for current chosen + this node for i in seed_range: # cannot parallellize pool = multiprocessing.Pool(multiprocessing.cpu_count()) # results = None if type_algo == 1: results = pool.starmap( map_select_next_seed_set_cover, zip(repeat(G), repeat(S), list(G.nodes()), repeat(h_l))) # results = pool.map(map_select_next_seed_greedy, ((G, S, v,h_l) for v in G.nodes())) elif type_algo == 2: results = pool.map(map_select_next_seed_log_greedy, ((G, S, v, gamma1, gamma2) for v in G.nodes())) elif type_algo == 3: results = pool.map(map_select_next_seed_root_greedy, ((G, S, v, gamma1, beta1, beta2) for v in G.nodes())) elif type_algo == 4: results = pool.map(map_select_next_seed_root_majority_greedy, ((G, S, v, gamma1) for v in G.nodes())) pool.close() pool.join() s = PQ() # priority queue # if results == None: for v, priority, p_a, p_b, p_c in results: # run R times Random Cascade The gain of parallelizing isn't a lot as the one runIC is not very complex maybe for huge graphs s.add_task(v, -priority) node, priority = s.pop_item() S.append(node) I, I_a, I_b, I_c = map_fair_IC((G, S, h_l)) influenced.append(I) influenced_a.append(I_a) influenced_b.append(I_b) influenced_c.append(I_c) S_red = [] S_blue = [] S_purple = [] group = G.nodes[node]['color'] print( str(i + 1) + ' Selected Node is ' + str(node) + ' group ' + str(group) + ' Ia = ' + str(I_a) + ' Ib = ' + str(I_b) + ' Ic = ' + str(I_c)) for n in S: if G.nodes[n]['color'] == 'red': S_red.append(n) if G.nodes[n]['color'] == 'blue': S_blue.append(n) else: S_purple.append(n) seeds_a.append( S_red) # id's of the seeds so the influence can be recreated seeds_b.append(S_blue) seeds_c.append(S_purple) # print(i, k, time.time() - start) # print ( "\n \n I shouldn't be here. ********* \n \n ") ut.plot_influence(influenced_a, influenced_b, influenced_c, len(S), filename, stats['group_r'], stats['group_b'], stats['group_n'], [len(S_a) for S_a in seeds_a], [len(S_b) for S_b in seeds_b], [len(S_c) for S_c in seeds_c]) ut.write_files(filename, influenced, influenced_a, influenced_b, influenced_c, seeds_a, seeds_b, seeds_c) return (influenced, influenced_a, influenced_b, influenced_c, seeds_a, seeds_b, seeds_c)
influenced_a_list = [] influenced_b_list = [] labels = [] seed_size = 30 for p_across in p_acrosses: group_ratios = [0.7] #,0.5,0.55, 0.6, 0.65] for group_ratio in group_ratios: #group_ratio = 0.5 #0.7 filename = f'results/synthetic_data_{num_nodes}_{p_with}_{p_across}_{group_ratio}' # read in graph G = ut.load_graph(filename, p_with, p_across, group_ratio, num_nodes) ut.graph_stats(G) gammas = [1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 2.0, 2.5] #gamma = 2.5 types = [1, 2] for t in types: if t == 1: gammas = [1.0] #, 1.1, 1.2, 1.3, 1.4, 1.5, 2.0, 2.5] elif t == 2: gammas = [1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 2.0, 2.5] for gamma in gammas: influenced, influenced_a, influenced_b = generalGreedy_node_parallel( filename, G, seed_size, gamma, type_algo=t) influenced_a_list.append(influenced_a) influenced_b_list.append(influenced_b)
def generalGreedy_node_set_cover(filename, G, budget, gamma_a=1e-2, gamma_b=0, type_algo=1): ''' Finds initial seed set S using general greedy heuristic Input: G -- networkx Graph object k -- fraction of population needs to be influenced in both groups p -- propagation probability Output: S -- initial set of k nodes to propagate ''' #import time #start = time.time() #R = 200 # number of times to run Random Cascade stats = ut.graph_stats(G, print_stats=False) if type_algo == 1: filename = filename + f'_set_cover_reach_{budget}_' elif type_algo == 2: filename = filename + f'_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_b}_' elif type_algo == 3: filename = filename + f'_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_a}_' reach = 0.0 S = [] # set of selected nodes # add node to S if achieves maximum propagation for current chosen + this node influenced = [] influenced_a = [] influenced_b = [] seeds_a = [] seeds_b = [] try: influenced, influenced_a, influenced_b, seeds_a, seeds_b = ut.read_files( filename) reach = min(influenced_a[-1] / stats['group_a'], budget) + min( influenced_b[-1] / stats['group_b'], budget) S = seeds_a[-1] + seeds_b[-1] if reach >= budget: #ut.write_files(filename,influenced, influenced_a, influenced_b, seeds_a, seeds_b) print(influenced_a) print("\n\n") print(influenced_b) print(f" reach: {reach}") ut.plot_influence(influenced_a, influenced_b, len(S), filename, stats['group_a'], stats['group_b'], [len(S_a) for S_a in seeds_a], [len(S_b) for S_b in seeds_b]) return (influenced, influenced_a, influenced_b, seeds_a, seeds_b) except FileNotFoundError: print(f'{filename} not Found ') i = 0 while reach < 2 * budget: # cannot parallellize pool = multiprocessing.Pool(multiprocessing.cpu_count() - 1) if type_algo == 1: results = pool.map(map_select_next_seed_set_cover, ((G, S, v) for v in G.nodes())) elif type_algo == 2: results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_b) for v in G.nodes())) elif type_algo == 3: results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_a) for v in G.nodes())) pool.close() pool.join() s = PQ() # priority queue for v, p, p_a, p_b in results: # s.add_task( v, -(min(p_a / stats['group_a'], budget) + min(p_b / stats['group_b'], budget))) node, priority = s.pop_item() #priority = -priority # as the current priority is negative fraction S.append(node) I, I_a, I_b = map_fair_IC((G, S)) influenced.append(I) influenced_a.append(I_a) influenced_b.append(I_b) S_red = [] S_blue = [] group = G.nodes[node]['color'] for n in S: if G.nodes[n]['color'] == 'red': S_red.append(n) else: S_blue.append(n) seeds_a.append( S_red) # id's of the seeds so the influence can be recreated seeds_b.append(S_blue) #reach += -priority both are fine reach_a = I_a / stats['group_a'] reach_b = I_b / stats['group_b'] reach = (min(reach_a, budget) + min(reach_b, budget)) print( f'{i+1} Node ID {node} group {group} Ia = {I_a} Ib {I_b} reach: {reach} reach_a {reach_a} reach_b {reach_b}' ) #print(i, k, time.time() - start) i += 1 ut.plot_influence(influenced_a, influenced_b, len(S), filename, stats['group_a'], stats['group_b'], [len(S_a) for S_a in seeds_a], [len(S_b) for S_b in seeds_b]) ut.write_files(filename, influenced, influenced_a, influenced_b, seeds_a, seeds_b) return (influenced, influenced_a, influenced_b, seeds_a, seeds_b)