def distanceMatrix(G, R=None): if (R): #set up so that resolving set R will be column labels and rest of graph will be rows dist = {} for r in R: dist.update({r: dict(shortest_path_length(G, source=r))}) else: dist = dict(shortest_path_length(G)) dist_mat = pd.DataFrame.from_dict(dist) return (dist_mat)
def compress_graph_from_hard_partition_ts(G,nodes,features,p,partition,node_subset): """ Obtain a sparse tall-skinny matrix and new probabilities from a hard partition of a graph. For each point, we only find the distance to its anchor, not to all other anchors. ----------- Parameters: G : NetworkX graph nodes : sorted list of graph nodes p : probability vector of sorted nodes partition : list of sets containing node labels node_subset : sorted list of anchor node labels ------- Returns: dists : |nodes|x|node_subset| matrix of distances from each block of partition to anchor in that block membership : |nodes|x|node_subset| membership matrix p_compressed : vector of aggregated probabilities on anchors """ # Distances between anchors dists_subset = np.zeros((len(node_subset),len(node_subset))) for i in range(len(node_subset)): for j in range(i+1,len(node_subset)): dists_subset[i,j] = shortest_path_length(G,node_subset[i],node_subset[j]) dists_subset = dists_subset + dists_subset.T # Sparse tall-skinny matrix of distances and feature-vector distances from points to their own anchors # Also, tall-skinny membership matrix and mass-compression matrix row_idx, col_idx, dist_data, mass_data, fdist_data = [], [], [], [], [] for (aidx,anchor) in enumerate(node_subset): bidx = [anchor in v for v in partition].index(True) #block containing current anchor point block = partition[bidx] for b in block: idx = nodes.index(b) d = shortest_path_length(G,nodes[idx],anchor) fd = pairwise_distances(features[nodes.index(anchor),:].reshape(1,-1), features[idx,:].reshape(1,-1))[0][0] row_idx.append(idx) col_idx.append(aidx) dist_data.append(d) mass_data.append(p[idx]) fdist_data.append(fd) dists = coo_matrix((dist_data, (row_idx, col_idx)),shape=(len(nodes), len(node_subset))) fdists = coo_matrix((fdist_data, (row_idx, col_idx)),shape=(len(nodes), len(node_subset))) membership = coo_matrix(([1 for v in row_idx], (row_idx, col_idx)),shape=(len(nodes), len(node_subset))) # coup = coo_matrix((mass_data, (row_idx, col_idx)),shape=(len(nodes), len(node_subset))) p_subset = csr_matrix.dot(p, membership) return dists.tocsr(),fdists.tocsr(),membership.tocsr(),p_subset, dists_subset
def get_topological_features(G, nodes=None): N_ = len(G.nodes) if nodes is None: nodes = G.nodes # Degree centrality d_c = get_features(degree_centrality(G).values()) print 'a' # Betweeness centrality b_c = get_features(betweenness_centrality(G).values()) print 'b' # Close ness centrality c_c = get_features(closeness_centrality(G).values()) print 'c' # Clustering c = get_features(clustering(G).values()) print 'd' d = diameter(G) r = radius(G) s_p_average = [] for s in shortest_path_length(G): dic = s[1] lengths = dic.values() s_p_average += [sum(lengths) / float(N_)] s_p_average = get_features(s_p_average) features = np.concatenate((d_c, b_c, c_c, c, s_p_average, [d], [r]), axis=0) return features
def outgraph(g, source, distance=None): outs = descendants(g, source) if distance is not None: outs = { o for o in outs if shortest_path_length(g, source=source, target=o) <= distance } return g.subgraph(outs | {source})
def ingraph(g, target, distance=None): ins = ancestors(g, target) if distance is not None: ins = { i for i in ins if shortest_path_length(g, source=i, target=target) <= distance } return g.subgraph(ins | {target})
def _find_pivot_lang(self, langs): langs = [l.lower() for l in langs] lang_graph = self.seg_map.get_lang_graph() path_len = shortest_path_length(lang_graph, langs[0], langs[1]) if path_len != 2: return None # Find shortest path path = shortest_path(lang_graph, langs[0], langs[1]) assert (len(path) == 3) # Get a pivot language and scan all pivot segments return path[1]
def get_dist_to_center(pattern): center_dist = {} center = get_center_nodes(pattern) for n in pattern.nodes(): min_length = len(pattern.nodes()) for center_node in center: length = shortest_path_length(pattern, n, center_node) if length < min_length: min_length = length center_dist[n] = min_length return sorted(center_dist.items(), key=operator.itemgetter(1))
def calculate_distance(graph, sourceset, targetset, weight): res = 0 n = len(sourceset) for s in sourceset: ds = [] for t in targetset: d = shortest_path_length(graph, s, t) if d == 0: # the target is one of the disease genes ds.append(d - weight[t]) else: # the target is not a disease gene ds.append(d) res += min(ds) distance = res / n return distance
def get_correct_path(self, relations, correct_tails, verbose=False, return_graph=False): correct_batch, correct_nodes = self.find_correct_tails( self.cog_graph.node_lists, correct_tails) graphs = self.cog_graph.to_networkx() if return_graph: reason_list = [{} for _ in range(len(correct_tails))] else: reason_list = [[] for _ in range(len(correct_tails))] for batch_id, node_id in zip(correct_batch, correct_nodes): if verbose: print("{}: Query relation: {}".format( batch_id, self.id2relation[relations[batch_id]])) correct_tail = self.id2entity[self.cog_graph.node_lists[batch_id] [node_id]] head = self.id2entity[self.cog_graph.node_lists[batch_id][0]] graph = graphs[batch_id] if return_graph: nodes = shortest_path_length(graph, target=correct_tail) neighbor_dict = {} for node in nodes: neighbor_dict[node] = [] for e1, e2, r in graph.edges(node, keys=True): if e2 in nodes: neighbor_dict[node].append((e1, e2, r)) reason_list[batch_id] = neighbor_dict else: paths = list( networkx.algorithms.all_simple_paths( graphs[batch_id], head, correct_tail)) reason_paths = [] for path in paths: reason_path = [path[0]] last_node = path[0] for node in path[1:]: relation = list( map( lambda x: x[2], filter(lambda x: x[1] == node, graph.edges(last_node, keys=True)))) last_node = node reason_path.append((node, relation)) reason_paths.append(reason_path) reason_list[batch_id] = reason_paths return reason_list
def act(self,defState, defNode, eps=0): # add in the chase part if (defState.nodes[defNode]["isDef"] != 1): raise ValueError("def location doesn't match") if (defState.graph["isFound"] == True): """ if position of the attacker is known: chase """ attNode = defState.graph["attNode"] neighbors = list(defState.neighbors(defNode)) bestNode = neighbors[0] minDist = 1000000000 for n in neighbors: curDist = shortest_path_length(defState, n, attNode) if curDist < minDist: bestNode = n minDist = curDist return (defNode, bestNode) else: """ else do parameterized random walk """ neighbors = list(defState.neighbors(defNode)) neighborRewards = [] neighborDegrees = [] for n in neighbors: neighborRewards.append(defState.nodes[n]["r"]) neighborDegrees.append(len(list(defState.neighbors(n)))) neighborRewards = np.array(neighborRewards) neighborDegrees = np.array(neighborDegrees) comb = self.w1 * neighborRewards + self.w2 * neighborDegrees softmaxProb = np.exp(comb) / sum(np.exp(comb)) nxtNode = np.random.choice(neighbors, p = softmaxProb) return (defNode, nxtNode)
def get_fact_dist(self, ignore_relation=True): graph = self.kg.to_networkx(multi=True, neighbor_limit=256) fact_dist = {} for relation, pairs in tqdm(self.trainer.train_query.items()): deleted_edges = [] if ignore_relation: reverse_relation = self.reverse_relation[relation] for head, tail in itertools.chain( pairs, self.trainer.train_support[relation], self.trainer.train_query[reverse_relation], self.trainer.train_support[reverse_relation]): try: graph.remove_edge(head, tail, relation) deleted_edges.append((head, tail, relation)) except NetworkXError: pass try: graph.remove_edge(head, tail, reverse_relation) deleted_edges.append((head, tail, reverse_relation)) except NetworkXError: pass for head, tail in itertools.chain( self.trainer.train_query[relation], self.trainer.train_support[relation]): delete_edge = False try: graph.remove_edge(head, tail, relation) delete_edge = True except NetworkXError: pass try: dist = shortest_path_length(graph, head, tail) except NetworkXNoPath or KeyError: dist = -1 fact_dist[(head, relation, tail)] = dist if delete_edge: graph.add_edge(head, tail, relation) graph.add_edges_from(deleted_edges) return fact_dist
def graph_stats(G): """ Compute all the graph-related statistics in the features. Note that since the graph is always fully connected, all of these are the weighted versions. For this reason, many of these functions use the implementations in bctpy rather than NetworkX. """ # Local measures clustering_dict = clustering(G, weight='weight') adjacency = np.array(adjacency_matrix(G).todense()) betweenness_centrality_dict = betweenness_centrality(G, weight='weight') paths = shortest_path_length(G, weight='weight') eccentricities = [max(dists.values()) for (source, dists) in sorted(paths)] local_measures = np.concatenate( [[v for (k, v) in sorted(clustering_dict.items())], [v for (k, v) in sorted(betweenness_centrality_dict.items())], eccentricities]) graph_diameter = max(eccentricities) graph_radius = min(eccentricities) aspl = average_shortest_path_length(G, weight='weight') global_measures = np.array([graph_diameter, graph_radius, aspl]) return np.concatenate([local_measures, global_measures])
def _build_core_nodes(self): """ It builds the list of core nodes """ from networkx.algorithms.shortest_paths.generic import \ shortest_path_length # Force core to contain constrained atoms core_nodes = [index for index in self.constraint_indices] # Calculate graph distances according to weight values weighted_distances = dict(shortest_path_length(self, weight="weight")) # Add also all atoms at 0 distance with respect to constrained # atom into the core for node in self.nodes: for constraint_index in self.constraint_indices: d = weighted_distances[constraint_index][node] if d == 0 and node not in core_nodes: core_nodes.append(node) self._core_nodes = core_nodes
def get_dist_dict(self, mode='test', by_relation=True): self.graph = self.kg.to_networkx(multi=False) global_dist_count = defaultdict(int) fact_dist = {} if mode == 'test': relations = self.trainer.test_relations elif mode == 'valid': relations = self.trainer.validate_relations else: raise NotImplemented for relation in relations: dist_count = defaultdict(int) for head, tail in self.trainer.task_ground[relation]: try: dist = shortest_path_length(self.graph, head, tail) except networkx.NetworkXNoPath: dist = -1 dist_count[dist] += 1 global_dist_count[dist] += 1 fact_dist[(head, relation, tail)] = dist if by_relation: print(relation, sorted(dist_count.items(), key=lambda x: x[0])) print(sorted(global_dist_count.items(), key=lambda x: x[0])) return fact_dist, global_dist_count
def loop(episode, feature): with open(f'data/starwars-episode-{episode}-{feature}.json') as f: data = json.load(f) with open('character_side.json') as d: sides = json.load(d) characters = { "Light Side": ["FINN", "OBI-WAN", "YODA", "PADME", "LUKE"], "Dark Side": ["EMPEROR", "DARTH VADER", "PIETT", "GENERAL HUX", "NUTE GUNRAY"] } probability = 0.4 # Task 2 Hypothesis 1 connections, interractions = get_info(data) homophily = get_homophily(data, characters) classification = light_dark_classification(data, sides) Graph = get_graph(data, episode) # Task 2 Hypothesis 2 betweenness = sort(centrality.betweenness_centrality(Graph))[:5] + sort( centrality.betweenness_centrality(Graph))[-5:] degree_centrality = sort(connections)[:5] + sort(connections)[-5:] # Task 3 cliquishness = sort(nx.clustering(Graph))[:5] + sort( nx.clustering(Graph))[-5:] path_length = list(nxpath.shortest_path_length(Graph)) randomness = get_randomness(Graph, probability) # Uncomment to display randomness graphs and save them # for graph in range(1, len(randomness[0]), 10): # plt.figure(figsize=(30,15)) # plt.subplot(121) # nx.draw(randomness[0][graph], with_labels=True) # plt.savefig(f'task3_results/Episode{episode}_{graph}.png') # plt.close() # Task 4, 5 temp = set() for i, j in zip(connections.items(), interractions.items()): temp.add((i[0], i[1], j[1], get_value(i[0], data["nodes"]))) temp = sorted(temp, key=lambda x: x[1])[::-1] # Uncomment block for visualizations plt.figure(figsize=(25, 10)) plt.title(f'Episode-{episode} {feature}') plt.plot( list(zip(*temp))[0], list(zip(*temp))[1], list(zip(*temp))[0], list(zip(*temp))[2], list(zip(*temp))[0], list(zip(*temp))[3]) plt.xticks(list(zip(*temp))[0][::1], rotation='vertical') plt.savefig(f'task4_results/images/Episode_{episode}_{feature}.png') plt.close() return (homophily, classification), (betweenness, degree_centrality), ( cliquishness, path_length, randomness[1]), (connections, interractions)
def strategy_b(contexts): """ A group is a tuple with the following contents (set((src-activity, dest-activity, distance)), set(contexts)) """ group_collector = defaultdict(set) for name, context_graph, nodes, filtered_nodes in contexts: print(f"Adding to groups for {name}") # print(f"Unfiltered Nodes = {filtered_nodes}") # print("\n") # print(set([label for label in nodes_in_shortest_path(context_graph) if heuristic_filter(label)])) # print("\n") # plot.figure() # draw_spring(context_graph, with_labels=True) # plot.show() for source, target_dict in shortest_path_length(context_graph): if source not in filtered_nodes: continue for destination, length in target_dict.items(): if destination not in filtered_nodes: continue # print(f"{name}: ({source}, {destination}, {length})") if length == 0 or length > 8: continue group_edge = frozenset([(source, destination, length)]) group_collector[group_edge].add(name) print() initial_groups = set() for left, right in group_collector.items(): initial_groups.add((left, frozenset(right))) # for i, group in enumerate(initial_groups): # print(f"Group #{i}") # print(f"Contexts = {group[1]}") # print(f"Edges = {group[0]}") # print() all_groups = set() print("Processing groups...") for g1 in initial_groups: new_group = g1 new_score = strategy_b_score(new_group) for g2 in initial_groups: if g1 == g2: continue if strategy_b_can_merge(new_group, g2): potential_group = strategy_b_merge(new_group, g2) potential_score = strategy_b_score(potential_group) if potential_score > new_score: new_group = potential_group new_score = potential_score all_groups.add(new_group) print("Done") print() print("Scoring groups") scored_groups = [] for group in all_groups: scored_groups.append((group, strategy_b_score(group))) print("Sorting Groups") sorted_groups = sorted(scored_groups, key=lambda a: a[1], reverse=True) found = set() for i, (group, score) in enumerate(sorted_groups): contexts = group[1] activities = set(strategy_b_activities_in_group(group)) if len(contexts) == 1: continue found_record = (frozenset(contexts), frozenset(activities)) if found_record in found: continue found.add(found_record) print(f"Group #{i} - Score = {score}") print(f"Contexts = {contexts}") print(f"Activities = {activities}") print() print(len(found)) print(len(initial_groups))
def nodes_in_shortest_path(graph): for source, target_dict in shortest_path_length(graph): for destination, length in target_dict.items(): yield source yield destination
def bond_distance(self, a1, a2): return shortest_path_length(self.graph_representation, a1, a2)
def has_langs(self, langs): return shortest_path_length(self.seg_map.get_lang_graph(), langs[0], langs[1]) == 1
def solve(inp=input_data): grid = defaultdict(int) grid.update({complex(x, y): v for y, row in enumerate(inp.splitlines()) for x,v in enumerate(row)}) w,h = len(inp.splitlines()[0]), len(inp.splitlines()) G = nx.Graph() G.add_nodes_from(k for k,v in grid.items() if v ==".") portals_outer = defaultdict(int) portals_inner = defaultdict(int) portals = defaultdict(int) for n,v in grid.items(): if v != ".": continue for i in [n-1, n+1, n-1j, n+1j]: if i in grid: if grid[i] == ".": G.add_edge(n, i) elif grid[i] in string.ascii_uppercase: second = [k for k in [i-1, i+1, i-1j, i+1j] if k in grid and grid[k] in string.ascii_uppercase][0] portal = tuple(sorted([grid[i], grid[second]])) if portal == tuple(sorted("AA")): start = n elif portal == tuple(sorted("ZZ")): end = n else: portals[i] = portal if i.real < 2 or i.real > w - 3 or i.imag < 2 or i.imag > h -3: portals_outer[portal] = (i,n) else: portals_inner[portal] = (i,n) G.add_edges_from((c[1], portals_outer[p][1]) for p,c in portals_inner.items()) portal_cs_inner = list(zip(*portals_inner.values()))[0] portal_cs_outer = list(zip(*portals_outer.values()))[0] tiles = [k for k,i in grid.items() if i == "."] Node = namedtuple("Node", ["pos", "last", "level"]) nodes = [Node(start, [start + 1j**i for i in range(4) if grid[start + 1j**i] in string.ascii_uppercase][0], 0)] counter = 0 while True: old_nodes = nodes.copy() nodes.clear() print(counter) for n in old_nodes: if n.pos == end and n.level == 0: return shortest_path_length(G, start, end), counter if n.level > 25: # credit to some anon for this speed-up hint. runs unusably slow without this continue adjacent = [n.pos + 1j**i for i in range(4)] adjacent.remove(n.last) for i in adjacent: if i in tiles: nodes.append(Node(i, n.pos, n.level)) elif i in portal_cs_outer and n.level != 0: nodes.append(Node(portals_inner[portals[i]][1], portals_inner[portals[i]][0], n.level-1)) elif i in portal_cs_inner: nodes.append(Node(portals_outer[portals[i]][1], portals_outer[portals[i]][0], n.level+1)) counter += 1
def connect_graph(G): """ Add additional edges to the provided graph until all edges are connected. Performed iteratively, connecting each subsequent graph to the the largest """ graphs = list(nx.connected_component_subgraphs(G)) if len(graphs) > 1: combinations = itertools.combinations(graphs, 2) shortest = {} links = [] for g0, g1 in combinations: # For each node in the graph, compare vs. all nodes in the giant-list. The # difference x,y must be 1:1 (diagonal), then find the shortest + link. g_ix = (g0, g1) for n1 in g1.nodes: for n0 in g0.nodes: d = euclidean_distance_45deg(n1, n0) if d: link = d, n0, n1 # Find global short links if (shortest.get(g_ix) is None or d < shortest.get(g_ix)[0]): shortest[g_ix] = link if d < SHORT_LINK_LEN: # Sub-graph link which is short, can be used to shortcut. links.append(link) # We have a list of shortest graph-graph connections. We want to find the shortest connections # neccessary to connect *all* graphs. If we connect A-B and B-C then A-B-C are all connected. # Start from the largest graph and the shortest links and work backwards. shortlinks = sorted(shortest.items(), key=lambda x: x[1][0]) # Sort by link data x[1], first part [0] = d connected = [max(graphs, key=len)] # Our current connected graphs while len(connected) < len(graphs): shortest = None # Find the shortest link for any graph in connected to any graph that isn't. for (g0, g1), link in shortlinks: d, n0, n1 = link if (g0 in connected and g1 not in connected) and (shortest is None or d < shortest[1][0]): shortest = g1, link if (g1 in connected and g0 not in connected) and (shortest is None or d < shortest[1][0]): shortest = g0, link if shortest: g, (d, n0, n1) = shortest G.add_edge(n0, n1, weight=d) connected.append(g) # Use detected short links to minimise the path lengths for link in sorted(links, key=lambda x: x[0]): d, n0, n1 = link if shortest_path_length(G, n0, n1, weight='weight') >= SHORT_LINK_THRESHOLD: G.add_edge(n0, n1, weight=d) return G
def _build_core_nodes(self): """ It builds the list of core nodes """ def get_all_nrot_neighbors(self, atom_id, visited_neighbors): """ A recursive function that hierarchically visits all atom neighbors in the graph. Parameters ---------- atom_id : int Is is both the id of the graph's node and index of the corresponding atom visited_neighbors : set[int] The ids of the nodes that have already been visited Returns ------- visited_neighbors : set[int] The updated set that contains the ids of the nodes that have already been visited """ if atom_id in visited_neighbors: return visited_neighbors visited_neighbors.add(atom_id) nrot_neighbors = self.nodes[atom_id]['nrot_neighbors'] for nrot_neighbor in nrot_neighbors: visited_neighbors = get_all_nrot_neighbors( self, nrot_neighbor, visited_neighbors) return visited_neighbors from networkx.algorithms.shortest_paths.generic import \ shortest_path_length from networkx.algorithms.distance_measures import eccentricity # Calculate graph distances according to weight values weighted_distances = dict(shortest_path_length(self, weight="weight")) # Calculate eccentricites using weighted distances eccentricities = eccentricity(self, sp=weighted_distances) # Group nodes by eccentricity nodes_by_eccentricities = defaultdict(list) for node, ecc in eccentricities.items(): nodes_by_eccentricities[ecc].append(node) # Core atoms must have the minimum eccentricity _, centered_nodes = sorted(nodes_by_eccentricities.items())[0] # Construct nrot groups with centered nodes # already_visited = set() centered_node_groups = list() for node in centered_nodes: # if node in already_visited: # continue centered_node_groups.append( get_all_nrot_neighbors(self, node, set())) # In case of more than one group, core will be the largest core_nodes = sorted(centered_node_groups, key=len, reverse=True)[0] # To do: think on what to do with the code below """ # Core can hold a maximum of one rotatable bond <- Not true! # Get all core's neighbors neighbor_candidates = set() for node in core_nodes: neighbors = self.neighbors(node) for neighbor in neighbors: if neighbor not in core_nodes: neighbor_candidates.add(neighbor) # If any core's neighbor, get the deepest one and include it to # the core if len(neighbor_candidates) > 0: branch_graph = deepcopy(self) for node in core_nodes: branch_graph.remove_node(node) branch_groups = list(nx.connected_components(branch_graph)) rot_bonds_per_group = self._get_rot_bonds_per_group(branch_groups) best_group = sorted(rot_bonds_per_group, key=len, reverse=True)[0] for neighbor in neighbor_candidates: if any([neighbor in rot_bond for rot_bond in best_group]): deepest_neighbor = neighbor break else: raise Exception('Unconsistent graph') deepest_neighbors = get_all_nrot_neighbors(self, deepest_neighbor, set()) for neighbor in deepest_neighbors: core_nodes.add(neighbor) """ self._core_nodes = core_nodes