def brute_force_opposing_views(graph, pickle_name, verbose): """"" This method brute forces all opposing opinion nodes and find the decrease of the network by adding every possible edge between them. (1 edge at a time). Also stores the resulting dictionary in a pickle file #todo 1. maybe implement 3,4,.. edge additions, too much time for larger networks (even small ones) #todo 2. implement it for graphs that have intermediate values not only -1 and 1 (also need dataset?) ------------------------------------------------------------------------------------------------------ :param pickle_name: name of the file that the results will be stored :param graph: networkx graph :param verbose: =1 prints result to terminal, =0 hides result from terminal :return: dictionary that holds information about the decrease after adding an edge example : "{0.01938319984207737: {'addition': '15->20'}, ...}" """ value_dictionary = nx.get_node_attributes(graph, 'value') positive_indices = [k for (k, v) in value_dictionary.items() if v == 1] negative_indices = [k for (k, v) in value_dictionary.items() if v == -1] # create all edge pairs to be added all_pairs = [[pos_node, neg_node] for pos_node in positive_indices for neg_node in negative_indices] # clean duplicate edges if exist, [a,b]==[b,a] all_pairs = list({tuple(sorted(item)) for item in all_pairs}) initial_polarization, converged_opinions = get_polarization(graph) # holds values of decrease for each addition difference = {} for i in tqdm(range(len(all_pairs))): g_copy = graph.copy() # add a new addition every time g_copy.add_edge(all_pairs[i][0], all_pairs[i][1]) # get the new polarization after addition new_pol, converged_opinions = get_polarization(g_copy) # compute and store decrease difference[abs(initial_polarization - new_pol)] = { 'addition': f"{all_pairs[i][0]}->{all_pairs[i][1]}" } # store data (serialize) into pickle file with open(f"../pickles/{pickle_name}", 'wb') as handle: pickle.dump(difference, handle, protocol=pickle.HIGHEST_PROTOCOL) # prints to terminal if verbose: for key in sorted(difference): print("%s: %s" % (key, difference[key])) return difference
def example_increase_that_confirms_intuition(): graph, size = get_graph_type('intuition_graph') nodeDict = dict(graph.nodes(data=True)) list_negative = [ comb for comb in itertools.combinations( [-0.1, -0.2, -0.3, -0.4, -0.5, -0.6, -0.7, -0.8, -0.9, -1], 4) ] print(len(list_negative)) print(list_negative) list_positive = [ comb for comb in itertools.permutations( [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 1], 3) ] print(len(list_positive)) print(list_positive) c = list(itertools.product(list_negative, list_positive)) opinion_list = [list(itertools.chain(*items)) for items in c] print(len(opinion_list)) print(opinion_list) print("ddd") for opinions in tqdm(opinion_list): graph = attach_values_from_list_to_graph(graph, opinions) edges_to_add = nx.non_edges(graph) original_polarization, converged_opinions = get_polarization(graph) for edge in edges_to_add: node_a_val = nodeDict[edge[0]]['value'] node_b_val = nodeDict[edge[1]]['value'] g_copy = graph.copy() diff = abs(node_a_val - node_b_val) mul = node_a_val * node_b_val if 1.2 >= diff >= 0.8 and mul < 0: g_copy.add_edges_from([edge]) polarization_after_addition, converged_opinions = get_polarization( g_copy) if polarization_after_addition > original_polarization: print("found one:") print(edge) print(nx.info(g_copy)) print(opinion_list) return
def brute_force_all_edges_removal(graph, pickle_name, verbose): """"" This method brute forces all opposing opinion nodes and find the decrease of the network by REMOVING every possible edge between them. (1 edge at a time). Also stores the resulting dictionary in a pickle file. It is not so costly as the edge additions derived from the fact that we have to remove only existing edges and not all possible combinations ------------------------------------------------------------------------------------------------------ :param pickle_name: name of the file that the results will be stored :param graph: networkx graph :param verbose: =1 prints result to terminal, =0 hides result from terminal :return: dictionary that holds information about the decrease after adding an edge """ graph_edges = graph.edges() graph_polarization, converged_opinions = get_polarization(graph) nodeDict = dict(graph.nodes(data=True)) difference = {} for edge in tqdm(graph_edges): g_copy = graph.copy() # unpacks e from an edge tuple g_copy.remove_edge(*edge) # get new polarization after deleting an edge new_polarization, converged_opinions = get_polarization(g_copy) # get data from the nodes attached to this edge node_a_polarization = nodeDict[edge[0]]['value'] node_b_polarization = nodeDict[edge[1]]['value'] mul = node_a_polarization * node_b_polarization add = node_a_polarization + node_b_polarization difference[graph_polarization - new_polarization] = { 'edge_removal': edge, 'multiplication': mul, 'addition': add } # store data (serialize) into pickle file with open(f"../pickles/{pickle_name}", 'wb') as handle: pickle.dump(difference, handle, protocol=pickle.HIGHEST_PROTOCOL) # prints to terminal if verbose: print("Difference with edge removals:") print("==============================") for key in sorted(difference): print("%s: %s" % (key, difference[key])) return difference
def random_edge_addition(k, graph_in): edges_to_add_list = [] polarizations = [0]*len(k) edges_list = list(nx.non_edges(graph_in)) start = time.time() # run 20 times to sample and then average the results of random edge selection for i in range(20): for j, k_edge in enumerate(k): edges_to_add_list = random.sample(edges_list, k_edge) g_copy = graph_in.copy() g_copy.add_edges_from(edges_to_add_list) polarizations[j] += get_polarization(g_copy)[0] end = time.time() elapsed = end - start averaged_polarizations = [x / 20 for x in polarizations] return edges_to_add_list, averaged_polarizations, [elapsed] * len(k)
def first_top_greedy(k, graph_in, expected_p_z_mode, probabilities_dictionary): """ :param k: List that contains all the top-k edge additions we want to examine, e.g [5, 10, 15, 20] :param graph_in: Networkx graph that we want to examine :param expected_p_z_mode: Expected problem function definition. Available modes: 'common_neighbors', 'Jaccard_coefficient', 'Adamic_addar_index', 'Embeddings' :param probabilities_dictionary: probabilities coming from embeddings :return: 1) k_items, a list of all the edges proposed sorted by their decrease or expected decrease. They already sorted from the greedy_batch function so they don't need to be sorted here 2) polarizations, a list contain the polarization after the addition of the top-k edges that were proposed each time. top-k edges are given as an input in the parameter k above. 3) times, elapsed times of the algorithm for each k. """ polarizations = [] times = [] k_items = [] addition_return_info = [] # copy the graph so we won't alter it graph = graph_in.copy() start = time.time() for i in tqdm(range(max(k)), ascii="~~~~~~~~~~~~~~~#"): polarization, converged_opinions = get_polarization(graph) positive_nodes, negative_nodes = get_first_top_k_positive_and_negative_opinions( max(k), converged_opinions) addition_info = iterate_over_different_opinions( graph, positive_nodes, negative_nodes, polarization, converged_opinions, 'Not expressed', expected_p_z_mode, probabilities_dictionary, True) sorted_edges = sorted(addition_info.items(), key=lambda x: x[1], reverse=True) graph.add_edges_from([sorted_edges[0][0]]) k_items.append(sorted_edges[0][0]) addition_return_info.append(sorted_edges[0]) end = time.time() for k_edge in k: polarizations.append( add_edges_and_count_polarization(k_items[:k_edge], graph_in)) times.append(end - start) return k_items, polarizations, times, addition_return_info
def random_edge_addition_different(k, graph_in): edges_to_add_list = [] polarizations = [0] * len(k) edges_list = [] start = time.time() initial_polarization, converged_opinions = get_polarization(graph_in) positive_nodes, negative_nodes = get_first_top_k_positive_and_negative_opinions( len(converged_opinions), converged_opinions) for node_pos in tqdm(positive_nodes, ascii="~~~~~~~~~~~~~~~#"): for node_neg in negative_nodes: edge_to_add = (node_pos, node_neg) # skip edge if the edge exists in the original graph if graph_in.has_edge(*edge_to_add): continue edges_list.append(edge_to_add) # run 20 times to sample and then average the results of random edge selection for i in range(40): for j, k_edge in enumerate(k): edges_to_add_list = random.sample(edges_list, k_edge) g_copy = graph_in.copy() g_copy.add_edges_from(edges_to_add_list) polarizations[j] += get_polarization(g_copy)[0] end = time.time() elapsed = end - start averaged_polarizations = [x / 40 for x in polarizations] return edges_to_add_list, averaged_polarizations, [elapsed] * len(k)
def force_example(graph): nodeDict = dict(graph.nodes(data=True)) edges_to_add = nx.non_edges(graph) original_polarization, converged_opinions = get_polarization(graph) for edge in edges_to_add: node_a_val = nodeDict[edge[0]]['value'] node_b_val = nodeDict[edge[1]]['value'] g_copy = graph.copy() diff = abs(node_a_val - node_b_val) mul = node_a_val * node_b_val if 1.5 >= diff >= 0.8 and mul < 0: g_copy.add_edges_from([edge]) polarization_after_addition, converged_opinions = get_polarization( g_copy) if polarization_after_addition > original_polarization: print("found one:") print(edge) print(nx.info(g_copy))
def p_reduction(k, graph_in, mode, expected_p_z_mode, probabilities_dictionary): polarizations = [] if mode == "Distance" or mode == "pReduction": reverse_flag = True else: reverse_flag = False edges_to_add_list = [] start = time.time() g_copy = graph_in.copy() initial_polarization, converged_opinions = get_polarization(g_copy) positive_nodes, negative_nodes = get_first_top_k_positive_and_negative_opinions( len(converged_opinions), converged_opinions) addition_info = iterate_over_different_opinions( g_copy, positive_nodes, negative_nodes, initial_polarization, converged_opinions, mode, expected_p_z_mode, probabilities_dictionary, True) print(addition_info) sorted_edges = sorted(addition_info.items(), key=lambda x: x[1], reverse=reverse_flag) print(sorted_edges) # consider only edges that do not already exist for edge in sorted_edges: if not g_copy.has_edge(*edge[0]): edges_to_add_list.append(edge[0]) end = time.time() for k_edge in k: polarizations.append( add_edges_and_count_polarization(edges_to_add_list[:k_edge], graph_in)) max_edges_added = edges_to_add_list[:max(k)] return max_edges_added, polarizations, [end - start] * len(k)
def dataset_statistics_driver(datasets, verbose): info = {} for ds in datasets: graph = load_graph(f'../datasets/{ds}.gml') polarization, converged_opinions = get_polarization(graph) stats = nx.info(graph) if verbose: print(stats) print("Polarization:", end='') print(polarization) print("---------------------") info[ds] = {'polarization': polarization, 'info': stats} return info
def expressed_batch(k, graph_in, mode, expected_p_z_mode, probabilities_dictionary): """ :param k: List that contains all the top-k edge additions we want to examine, e.g [5, 10, 15, 20] :param graph_in: Networkx graph that we want to examine :param mode: 'Distance' for absolute distance, 'Multiplication' for multiplication :param expected_p_z_mode: Expected problem function definition. Available modes: 'common_neighbors', 'Jaccard_coefficient', 'Adamic_addar_index', 'Embeddings' to not consider it the flag 'Ignore' can be passed. :param probabilities_dictionary: probabilities coming from embeddings :return: 1) sorted_edges, a list of all the edges proposed sorted by their decrease or expected decrease. 2) polarizations, a list contain the polarization after the addition of the top-k edges that were proposed each time. top-k edges are given as an input in the parameter k above. 3) elapsed time of the algorithms. """ polarizations = [] if mode == "Distance": reverse_flag = True else: reverse_flag = False edges_to_add_list = [] start = time.time() g_copy = graph_in.copy() initial_polarization, converged_opinions = get_polarization(g_copy) positive_nodes, negative_nodes = get_first_top_k_positive_and_negative_opinions( len(converged_opinions), converged_opinions) addition_info = iterate_over_different_opinions( g_copy, positive_nodes, negative_nodes, initial_polarization, converged_opinions, mode, expected_p_z_mode, probabilities_dictionary, True) sorted_edges = sorted(addition_info.items(), key=lambda x: x[1], reverse=reverse_flag) # consider only edges that do not already exist for edge in sorted_edges: if not g_copy.has_edge(*edge[0]): edges_to_add_list.append(edge[0]) end = time.time() for k_edge in k: polarizations.append( add_edges_and_count_polarization(edges_to_add_list[:k_edge], graph_in)) max_edges_added = edges_to_add_list[:max(k)] edges_to_return_with_info = sorted_edges[:max(k)] return max_edges_added, polarizations, [ end - start ] * len(k), edges_to_return_with_info
def greedy_batch(k, graph_in, expected_p_z_mode, verbose, probabilities_dictionary): """ Greedy batch finds the best edge to add according to the polarization decrease among all different opinions. It is also used with an expected mode to consider the addition of edges considering additional information e.g. common neighbors :param k: List that contains all the top-k edge additions we want to examine, e.g [5, 10, 15, 20] :param graph_in: Networkx graph that we want to examine :param expected_p_z_mode: Expected problem function definition. Available modes: 'common_neighbors', 'Jaccard_coefficient', 'Adamic_addar_index', 'Embeddings' to not consider it the flag 'Ignore' can be passed. :param verbose: disabling or enabling the tqdm progress bar output to the console. do not change. GBatch is also used on greedy so it is used to disable the inner progress bar when we run the greedy algorithm. :param probabilities_dictionary: probabilities coming from embeddings :return: 1) sorted_edges, a list of all the edges proposed sorted by their decrease or expected decrease. 2) polarizations, a list contain the polarization after the addition of the top-k edges that were proposed each time. top-k edges are given as an input in the parameter k above. 3) elapsed time of the algorithm. """ g_copy = graph_in.copy() original_polarization, converged_opinions = get_polarization(g_copy) polarizations = [] edges_to_add_list = [] # when we want all positive and negative nodes we pass the len(converge) positive_nodes, negative_nodes = get_first_top_k_positive_and_negative_opinions(len(converged_opinions), converged_opinions) start = time.time() addition_info = iterate_over_different_opinions(g_copy, positive_nodes, negative_nodes, original_polarization, converged_opinions, 'Not expressed', expected_p_z_mode, probabilities_dictionary, verbose) end = time.time() elapsed = end - start sorted_edges = sorted(addition_info.items(), key=lambda x: x[1], reverse=True) # consider only edges that do not already exist for edge in sorted_edges: if not g_copy.has_edge(*edge[0]): edges_to_add_list.append(edge[0]) # compute all the polarization decreases for every K we have given as input. for k_edge in k: # pass a graph in the helper method (copies it) polarizations.append(add_edges_and_count_polarization(edges_to_add_list[:k_edge], graph_in)) max_edges_added = [edge[0] for edge in sorted_edges[:max(k)]] max_edges_with_info = sorted_edges[:max(k)] return max_edges_added, polarizations, [elapsed] * len(k), max_edges_with_info
def check_graph_permutations(number_of_vertices, graph): # make a list of nodes e.g. for 3 nodes returns [1,2,3] lst_nodes = [node for node in range(number_of_vertices)] # get all possible permutations for the values value_permutations = [] for i in itertools.product([-0.8, -0.5, 0.1, 0, -0.9, 0.8, -0.1, 0.5], repeat=number_of_vertices): value_permutations.append(list(i)) edge_permutations = [i for i in itertools.combinations(lst_nodes, 2)] # creates all possible pairs, pairs of two, of three etc.. up to number of vertices-1 possible_combs = [ i for i in itertools.combinations(edge_permutations, number_of_vertices - 1) ] # wrap the edges addition in a tuple for compatibility issues bellow additions_tuple = list(zip(edge_permutations)) # join all the possible edge additions in one list possible_combs.extend(additions_tuple) # check all value and edge combinations decrease = {} for perm in value_permutations: initial_polarization, converged_opinions = get_polarization(graph) for edge_additions in possible_combs: # re-init graph to check different edge scenario g = nx.Graph() g.add_edges_from(graph.edges()) # check if the addition already exist in the graph, every addition must NOT be # an edge that exists inside the graph beforehand. # exist = True : all edge_additions does not exist in the current graph # exist = False: at least one edge addition in edge_additions exist in the current graph exist = all(x not in graph.edges() for x in edge_additions) # check that all the connections are with different opinions all_connections_different_opinions = all( perm[y[0]] * perm[y[1]] < 0 for y in edge_additions) if exist and all_connections_different_opinions: for edge_perm in edge_additions: g.add_edge(edge_perm[0], edge_perm[1]) new_pol, converged_opinions = get_polarization(g) decrease[abs(initial_polarization - new_pol)] = { 'graph': graph.name, 'values': perm, 'edge_additions': edge_additions } if new_pol > initial_polarization: print(nx.info(graph)) print(nx.info(g)) print("===================") print("graph topology:", graph.name) print("values", perm) print("initial:", initial_polarization) print("after:", new_pol) print("addition", edge_additions) print("==============") print(max(decrease)) print(decrease[max(decrease)])
def iterate_over_different_opinions(graph_in, positive_nodes, negative_nodes, original_polarization, converged_opinions, mode, expected_p_z_mode, probabilities_dictionary, verbose): addition_info = {} for node_pos in tqdm(positive_nodes, ascii="~~~~~~~~~~~~~~~#", disable=verbose): for node_neg in negative_nodes: edge_to_add = (node_pos, node_neg) # skip edge if the edge exists in the original graph if graph_in.has_edge(*edge_to_add): continue if mode == "Distance": value = abs(converged_opinions[node_pos] - converged_opinions[node_neg]) elif mode == "Multiplication": value = converged_opinions[node_pos] * converged_opinions[ node_neg] elif mode == "pReduction": try: value = probabilities_dictionary[edge_to_add] # some edges were stored in reverse?? except KeyError: value = probabilities_dictionary[(edge_to_add[1], edge_to_add[0])] else: # check how much the polarization was reduced in comparison with the original graph g_copy = graph_in.copy() g_copy.add_edges_from([edge_to_add]) polarization_after_addition, converged_opinions = get_polarization( g_copy) if polarization_after_addition < original_polarization: value = original_polarization - polarization_after_addition else: # polarization increased value = 999999 # addition_info is computed differently if we considering # the expected addition problem if expected_p_z_mode == 'Embeddings' and mode != 'pReduction': try: probability = probabilities_dictionary[edge_to_add] # some edges were stored in reverse?? except KeyError: probability = probabilities_dictionary[(edge_to_add[1], edge_to_add[0])] addition_info[edge_to_add] = value * probability else: # considering the initial problem of just the polarization decrease addition_info[edge_to_add] = value return addition_info
def algorithms_driver(k, datasets, algorithms, expected_mode, experiment_comment): """ The result list is the max edges that each algorithm adds, for example if the experiment is conducted with the following list [5,10,15,20] the results will have the 20 edges that have been added. For later use you can just pick the first 5 if you need to check something on the 5 edges addition. :param experiment_comment: :param expected_mode: :param k: list that contains all the different top-k additions we want to add to the graph :param datasets: a list containing the string names of the datasets we want to examine :param algorithms: a list containing the string names of the algorithms we wan to run on the datasets :return: dictionary info that has all the information about every experiment. It uses this key to store information ---> info[{algorithm}_{dataset}_{edges}] = {...} """ info = {} print("====================================================") for ds in datasets: graph = load_graph(f'../datasets/{ds}.gml') total_decreases = [] total_times = [] polarizations = [] results = [] time_list = [] # holds probability for each edge that does not exist # key-> edge, value-> prob probabilities_dictionary = {} if expected_mode != 'Ignore' or 'pReduction' in algorithms: results, probabilities = graph_embeddings(ds, 0) probabilities_dictionary = {results[i]: probabilities[i] for i in range(len(results))} for algorithm in algorithms: print(f'\r Now in --> Dataset: {ds}, algorithm: {algorithm}') time.sleep(1) # append initial polarization for the graph output pol, converged_opinions = get_polarization(graph) decrease_list = [pol] if algorithm == 'Greedy': results, polarizations, time_list, addition_info = greedy(k, graph, expected_mode, probabilities_dictionary) elif algorithm == 'GBatch': results, polarizations, time_list, addition_info = greedy_batch(k, graph, expected_mode, False, probabilities_dictionary) elif algorithm == 'FTGreedy': results, polarizations, time_list, addition_info = first_top_greedy(k, graph, expected_mode, probabilities_dictionary) elif algorithm == 'FTGreedyBatch': results, polarizations, time_list, addition_info = first_top_greedy_batch(k, graph, expected_mode, probabilities_dictionary) elif algorithm == 'Expressed Distance': results, polarizations, time_list, addition_info = expressed(k, graph, 'Distance', expected_mode, probabilities_dictionary) elif algorithm == 'Expressed Multiplication': results, polarizations, time_list, addition_info = expressed(k, graph, 'Multiplication', expected_mode, probabilities_dictionary) elif algorithm == 'BExpressed Distance': results, polarizations, time_list, addition_info = expressed_batch(k, graph, 'Distance', expected_mode, probabilities_dictionary) elif algorithm == 'BExpressed Multiplication': results, polarizations, time_list, addition_info = expressed_batch(k, graph, 'Multiplication', expected_mode, probabilities_dictionary) elif algorithm == 'pReduction': results, polarizations, time_list = p_reduction(k, graph, 'pReduction', expected_mode, probabilities_dictionary) elif algorithm == 'Random': results, polarizations, time_list = random_edge_addition(k, graph) elif algorithm == 'Random different': results, polarizations, time_list = random_edge_addition_different(k, graph) decrease_list = decrease_list + polarizations for i, k_edges in enumerate(k): index = f'{algorithm}_{ds}_{k_edges}' info[index] = {'result_dictionary': results[:k_edges], 'time': time_list[i], 'polarization': decrease_list[i + 1]} total_decreases.append(decrease_list) total_times.append(time_list) decreases_checked, labels_checked = check_for_same_results(total_decreases, algorithms, 1) save_data_to_pickle(data_to_write=[total_decreases, algorithms, decreases_checked, labels_checked, info, probabilities_dictionary, k], atr_list=['decreases_pol', 'labels_pol', 'decreases_checked_pol', 'labels_checked_pol', 'info', 'probabilities_dictionary', 'k_edge_number_addition_list'], ds=ds, experiment_comment=experiment_comment) k_copy = k.copy() k_copy.insert(0, 0) vis_graphs_heuristics(k_copy, decreases_checked, labels_checked, f"{ds} Polarization Decrease", "Number of Edges Added", "π(z)", 0) return info