def modifiedDijkstra(G: nx.Graph, sourceNode, distances): """ Based on: https://gist.github.com/kachayev/5990802 """ seen = set() frontier = [(0, sourceNode, [])] # COST, NODE, PATH mins = {sourceNode: 0} distance = {} paths = {} while frontier: (cost, v1, path) = heappop(frontier) if v1 not in seen: seen.add(v1) path = [v1] + path for v2 in G.neighbors(v1): if v2 in seen: continue c = 1 if (G.get_edge_data(v1, v2) == {}) else G.get_edge_data( v1, v2)['weight'] next = cost + c prev = mins.get(v2, None) if prev is None or next < distances[v2]: mins[v2] = next paths[v2] = path heappush(frontier, (next, v2, path)) return mins, paths
def merge_mwvc_constraints(agt1: str, G1: nx.Graph, agt2: str, G2: nx.Graph) -> (nx.Graph, nx.Graph): """ Merge the weights associated to the nodes of type 'dec_var' that have the same 'name'. It assigns :param agt1: The name of agent 1 :param G1: The gadget graph associated to agent 1 :param agt2: The name of agent 2 :param G2: The gadget graph associated to agent 2 :return: The pairs of gadget (gadget1 and gadget2) associated to agents 1 and 2, reps. processed after the merging operation. """ shared_dec_vars = [ n for n in G1.nodes() for m in G2.nodes() if n == m and G1.nodes[n]['type'] == 'dec_var' ] for u in shared_dec_vars: if agt1 <= agt2: G1.nodes[u]['weight'] += G2.nodes[u]['weight'] for e in G2.edges(u): G1.add_edge(e[0], e[1], w=G2.get_edge_data(*e)['w']) G2.remove_node(u) else: G2.nodes[u]['weight'] += G1.nodes[u]['weight'] for e in G1.edges(u): G2.add_edge(e[0], e[1], w=G1.get_edge_data(*e)['w']) G1.remove_node(u) return G1, G2
def dependent_weight(graph: nx.Graph, prev_edge: Optional[Edge], cur_edge: Edge) -> float: """ Edge based weight function, which returns a weight value for the given edge. If we already visited an edge, the weight is the weight of the current edge divided by the previous edge. """ if prev_edge is None: return graph.get_edge_data(*cur_edge)['weight'] prev_weight = graph.get_edge_data(*prev_edge)['weight'] cur_weight = graph.get_edge_data(*cur_edge)['weight'] return cur_weight / prev_weight
def getPathCost(S: nx.Graph, path: list): weight = 0 i = 0 while i < len(path) - 1: weight += S.get_edge_data(path[i], path[i + 1])['weight'] i += 1 return weight
def create_src_tokengraph(dataset, vocab, G: nx.Graph = None, window_size: int = 2): """ Given a corpus create a token Graph. Append to graph G if provided. :param edge_attr: Name of the edge attribute, should match with param name when calling add_edge(). :param window_size: Sliding window size :param G: :param dataset: TorchText dataset :param vocab: TorchText field containing vocab. :return: """ ## Create graph if not exist: if G is None: G = nx.Graph() ## Add token's id as node to the graph for token_txt, token_id in vocab['str2idx_map'].items(): # try: # token_emb = glove_embs[token_txt] # except KeyError: # emb_shape = glove_embs[list(glove_embs.keys())[0]].shape # glove_embs['<UNK>'] = np.random.uniform(low=0.5, high=0.5, # size=emb_shape) # token_emb = glove_embs['<UNK>'] # G.add_node(token_id, node_txt=token_txt, s_co=field.vocab.freqs[ # token_txt], t_co=0, emb=token_emb) G.add_node(token_id, node_txt=token_txt, s_co=vocab['freqs'][token_txt], t_co=0) ## Add edges based on token co-occurrence within a sliding window: for txt_toks in dataset: j = 0 txt_len = len(txt_toks) if window_size is None or window_size > txt_len: window_size = txt_len slide = txt_len - window_size + 1 for k in range(slide): txt_window = txt_toks[j:j + window_size] ## Co-occurrence in tweet: occurrences = find_cooccurrences(txt_window) ## Add edges with attribute: for token_pair, wt in occurrences.items(): node1 = vocab['str2idx_map'][token_pair[0]] node2 = vocab['str2idx_map'][token_pair[1]] if G.has_edge(node1, node2): wt = G.get_edge_data(node1, node2)['s_pair'] + wt G.add_edge(node1, node2, s_pair=wt, t_pair=0) j = j + 1 return G
def L_P_WCN(network: nx.Graph, num_add): nodes_pair = [] # the pairs of nodes with edges and without edges probability_add = [] # the probabilities of the pairs of nodes to be added score = 0.0 # the score of each pair of nodes in link prediction model total_score = 0.0 # the sum of scores of pairs of nodes without edge and with edge # calculate the score of each pair of nodes for i, elei in enumerate(list(network.nodes()), 1): for j, elej in enumerate(list(network.nodes()), 1): # initialize score for each edge score = 0.0 if i >= j: continue try: for z in nx.common_neighbors(network, elei, elej): w_elei_z = network.get_edge_data(elei, z).get('weight') w_z_elej = network.get_edge_data(z, elej).get('weight') score += w_elei_z + w_z_elej except: continue total_score += score nodes_pair.append((elei, elej, score)) for a, b, c in nodes_pair: probability_add.append(c / total_score) # calculate the probabilities of edges to be added # select edges to be added according to probabilities edges_add = calculate_param.prob_select(nodes_pair, probability_add, num_add) ''' for a, b, c in edges_add: network.add_edge(a, b) # add selected edges ''' return edges_add
def LTM(graph: networkx.Graph, patients_0: List, iterations: int) -> Set: total_infected = set(patients_0) not_infected = set(graph.nodes).difference(total_infected) # STEP of Concern Update for v in graph.nodes: graph.nodes[v]['concern'] = 0 # -------------------------------------------------------------------------- for i in range(iterations): # Step of New Infected for v in not_infected: edges_w_sum = 0 for neighbor in graph[v]: if neighbor in total_infected: edges_w_sum += graph.get_edge_data(v, neighbor, default=0)['w'] if CONTAGION * edges_w_sum >= 1 + graph.nodes[v]['concern']: total_infected.add(v) not_infected.remove(v) # -------------------------------------------- # Update S # not_infected = set(graph.nodes).difference(total_infected) # -------------------------------------------- # UPDATE CONCERN****************************** for v in not_infected: sick_neighbors_count = 0 for neighbor in graph[v]: if neighbor in total_infected: sick_neighbors_count += 1 graph.nodes[v]['concern'] = sick_neighbors_count / graph.degree[v] # ------------------------------------------------------------------ # print(len(total_infected)) return total_infected
def cal_s(g: nx.Graph): """ :param g: graphs generated :return: sum of the average degree of nodes """ s = 0 # s : initialising average degree of nodes for node in g.nodes: neigh = [n for n in g.neighbors(node)] # neigh : Neighbours of the node in a graph # print(neigh) s_temp = 0 for i in neigh: w = g.get_edge_data(node, i)['weight'] # w: getting weights between node i and node j where node i varies while node j is fixed s_temp += 1 / w # s_temp : sum of the weights of between i and j s += s_temp * (1 / len(neigh)) # Finding average of s leng = nx.average_shortest_path_length(g) # average length between all the nodes # print(leng) efficiency = 1 / (s * leng) return efficiency
def obj_fun(solution: List, dest_mat, route: nx.Graph, ticket_cost, fuel_cost, start_cost): const_cost = 1 temp_dest_fun = 0 sol_cost = 0 num_of_passengers = 0 route_weight = 0 dest_mat_temp = deepcopy(dest_mat) for bus in solution: #print('bus',bus) sol_cost = sol_cost - start_cost #koszt uruchomienia autobusu bus_stop_combinations = [ ] #wszystkie kombinacje przystankow source->destination for b_stop in range(len(bus) - 1): route_weight += route.get_edge_data( bus[b_stop], bus[b_stop + 1])['weight'] #suma wag krawedzi tworzacej trasy #print([bus[b_stop], bus[b_stop+1]]) #print(route.get_edge_data(bus[b_stop], bus[b_stop+1])['weight']) for comb in range(len(bus) - 1 - b_stop): bus_stop_combinations.append([bus[b_stop], bus[comb]]) for combination in bus_stop_combinations: num_of_passengers += dest_mat_temp[combination[0] - 1][combination[1] - 1] dest_mat_temp[combination[0] - 1][combination[1] - 1] = 0 sol_cost += num_of_passengers * ticket_cost # dochod bilety sol_cost = sol_cost - route_weight * fuel_cost global num_of_obj num_of_obj += 1 global iter_stats iter_stats.append(sol_cost) return sol_cost
def remove_point_triangulation(affected_triangles: List[Triangle], p: Point) -> Polygon: """ Removes a point from affected triangles, return the resulting polygon that fills the gap. :param affected_triangles: list of the triangles containing the point to be removed :param p: point to be removed :return: polygon created by merging the affected triangles """ # First we construct a dictionary that tells us adjacency of triangles boundaries = [set(tri.pts) for tri in affected_triangles] point2triangles = defaultdict(set) for i, bound in enumerate(boundaries): bound.remove(p) u, v = bound point2triangles[u].add(i) point2triangles[v].add(i) # Connect adjacent triangles, noting which point connects them graph = Graph() for u, (i, j) in point2triangles.items(): graph.add_edge(i, j, point=u) # Walk around the triangles to get the new outer boundary # TODO: Remember to make this work. DFS visits all nodes not all edges. I think find_cycle works. # new_boundary = [ graph.get_edge_data(i, j)["point"] for (i, j) in nx.find_cycle(graph) # for (i, j) in nx.dfs_edges(graph) ] return Polygon(new_boundary)
def calculate_max_cut_cost(graph: nx.Graph) -> float: """Brute force MaxCut calculation. Args: graph: Weighted graph. Returns: max_cost: Maximum cost. """ node_to_idx = {node: idx for idx, node in enumerate(graph.nodes)} num_vertexes = len(node_to_idx) all_maxcut_iter = product(*((0,1) for _ in range(num_vertexes))) max_cost = 0.0 for mask in all_maxcut_iter: cost = 0.0 for edge in graph.edges: weight = graph.get_edge_data(*edge)['weight'] cost += weight * (mask[node_to_idx[edge[0]]] != mask[node_to_idx[edge[1]]]) if cost > max_cost: max_cost = cost return max_cost
def _travel_length(path: [str], state_map: nx.Graph) -> float: result = 0 start = path[0] for town in path[1:len(path)]: result += state_map.get_edge_data(start, town)['weight'] start = deepcopy(town) return -result
def append_subgraph(self, original_subgraph: nx.Graph, additional_subgraph: nx.Graph) -> nx.Graph: for edge in additional_subgraph.edges(data=True): if edge[2]['weight'] > 0: original_subgraph.get_edge_data( *edge)['weight'] = edge[2]['weight'] self._received_new_data = True return original_subgraph
def perEdgeMap(self, gamma: list, p: int, graph: nx.Graph, barrier=False, initial_Hadamard=False): assert p == len(gamma) self.__add_defaultWeights(graph) self.full_hamiltonian = 0 self.quantum_circuit = [] self.qubit_map = cir_build.circuit_builder.map_qubits( self.variables, 0, graph) no_qubits = len(self.qubit_map.values()) for i in range(p): cir = QuantumCircuit(no_qubits) if i == 0 and initial_Hadamard == True: for j in range(no_qubits): cir.h(j) if len(self.variables) == 2: for e in graph.edges: if i == 0: temp = self.Hamil_exp l = 0 for sym in self.Hamil_exp.free_symbols: if not (sym == I): temp = temp.subs(sym, symbols('Z_{}'.format(e[l]))) l = (l + 1) % 2 self.full_hamiltonian += graph.get_edge_data( e[0], e[1])["weight"] * temp cir += cir_build.circuit_builder.generate_Zcircuit( self.quanCir_list, gamma[i], self.qubit_map, e) else: if i == 0: for v in graph.nodes: temp = self.Hamil_exp for sym in self.Hamil_exp.free_symbols: if not (sym == I): temp = temp.subs(sym, symbols('Z_{}'.format(v))) self.full_hamiltonian += temp cir += cir_build.circuit_builder.generate_Zcircuit( self.quanCir_list, gamma[i], self.qubit_map, edge=(-1, -1)) if barrier == True: cir.barrier() self.quantum_circuit.append(cir)
def a_star(self, graph: nx.Graph, start_node, goal_node): if start_node is goal_node: self.endSearch = True self.visited.append(start_node) return queue = [start_node] cost_so_far = {start_node: 0} while len(queue) > 0 and not self.endSearch: fn = {} s = queue.pop(0) self.visited.append(s) print("Drive to", s, " Estate", end="\n") first_closed_index: int = -1 for i in graph.neighbors(s): if i in self.closed: minimum = min(self.closed.index(i), first_closed_index) first_closed_index = self.closed.index( i) if first_closed_index is -1 else minimum continue if len(list(graph.neighbors(s))) is 1: self.closed.append(s) current_weight = graph.get_edge_data(s, i).get('weight') fn[i] = float(self.heuristics.get(i)) + float( current_weight) + cost_so_far.get(s) if len(fn) < 1 and first_closed_index > -1: key = self.closed[first_closed_index] self.closed.pop(first_closed_index) current_weight = graph.get_edge_data(s, key).get('weight') fn[key] = float(self.heuristics.get(key)) + float( current_weight) + cost_so_far.get(s) if len(fn) < 1: raise Exception("No Path to Destination Exists!") min_key = self.get_min_key(fn) if min_key in self.visited: index = self.visited.index(min_key) self.closed.append(self.visited[index + 1]) self.closed.append(self.visited[index - 1]) cost_so_far[min_key] = float(cost_so_far.get(s)) + float( graph.get_edge_data(s, min_key).get('weight')) queue.append(min_key) if min_key is goal_node: self.endSearch = True self.visited.append(min_key) break
def graph_weights_histogram(graph: nx.Graph): ax = plt.subplot() weights = [] for src, dst in graph.edges: weight = graph.get_edge_data(src, dst)['weight'] weights.append((src, dst, weight)) pd.DataFrame(weights, columns=['src', 'dst', 'weight']).weight.hist(bins=50) ax.set_title('edge weight histogram') ax.set_xlabel('weight') plt.show()
def a_star(self, graph: nx.Graph, start_node, goal_node): queue = [start_node] cost_so_far = 0 while queue and not self.endSearch: fn = {} s = queue.pop(0) self.visited.append(s) print("Drive to", s, " Estate", end="\n") for i in list(graph[s]): fn[i] = float(self.heuristics.get(i)) + float( graph.get_edge_data(s, i).get('weight')) + cost_so_far min_key = self.get_min_key(fn) cost_so_far += float(graph.get_edge_data(s, min_key).get('weight')) queue.append(min_key) print("Goal Node: ", goal_node, "\nCurrent Node: ", min_key) print('cost so far', cost_so_far) if min_key is goal_node: self.endSearch = True self.visited.append(min_key) break
def getMaximumWeightedEdge(S: nx.Graph, path: list): weight = float('-inf') i = 0 while i < len(path) - 1: edgeWeight = S.get_edge_data(path[i], path[i + 1])['weight'] if edgeWeight > weight: weight = edgeWeight nodes = (path[i], path[i + 1]) i += 1 return weight, nodes[0], nodes[1]
def a_star(self, graph: nx.Graph, start_node, goal_node): queue = [start_node] cost_so_far = {start_node: 0} while len(queue) > 0 and not self.endSearch: fn = {} s = queue.pop(0) self.visited.append(s) print("Drive to", s, " Estate", end="\n") for i in graph.neighbors(s): current_weight = graph.get_edge_data(s, i).get('weight') fn[i] = float(current_weight) + cost_so_far.get(s) if len(fn) < 1: raise Exception("No Path to Destination Exists!") min_key = self.get_min_key(fn) cost_so_far[min_key] = float(cost_so_far.get(s)) + float( graph.get_edge_data(s, min_key).get('weight')) queue.append(min_key) if min_key is goal_node: self.endSearch = True self.visited.append(min_key) break
def uc_search(graph: nx.Graph, start: str, goal: str): """ Uninformed search implementation for searching a graph :param graph: :param start: city name :param goal: city name :return: result = { 'distance': 455, 'route': [ {'from': 'Bremen', 'to': 'Dortmund', 'distance': 234}, {'from': 'Dortmund', 'to': 'Frankfurt', 'distance': 221}, ], } """ # TODO: infinite distance for loop result = { 'route': [], 'cost': float('inf'), } if start == goal: return 0, [{'from': start, 'to': goal, 'distance': 0}] reached = {start: {'cost': 0}} frontier = queue.PriorityQueue() frontier.put((0, start)) parent = frontier.get() while parent is not None and parent[0] < result['cost']: for child in graph.neighbors(parent[1]): cost = parent[0] + graph.get_edge_data(parent[1], child)['distance'] if child not in reached.keys() or cost < reached[child]['cost']: reached[child] = {'cost': cost, 'route': None} frontier.put((cost, child)) graph.nodes[child]['predecessor'] = parent[1] if child == goal and cost <= reached[child]['cost']: result['cost'] = cost if frontier.empty(): parent = None else: parent = frontier.get() max_loop = 1000 loop = 0 backtrack_node = goal while loop < max_loop and graph.nodes[backtrack_node].get('predecessor'): pred = graph.nodes[backtrack_node].get('predecessor') result['route'].insert(0, {'from': pred, 'to': backtrack_node, 'distance': graph[pred][backtrack_node]['distance']}) backtrack_node = pred loop += 1 return result['cost'], result['route']
def add_pearson_edge_attributes(graph: nx.Graph, video_path: Path, attribute_name: str = "Pearson") -> nx.Graph: """adds an attribute to each edge which is the Pearson correlation coefficient between the traces of the two pixels (nodes) associated with that edge. Parameters ---------- graph: nx.Graph a graph with nodes like (row, col) and edges connecting them video_path: Path path to an hdf5 video file, assumed to have a dataset "data" nframes x nrow x ncol attribute_name: str name set on each edge for this calculated value Returns ------- new_graph: nx.Graph an undirected networkx graph, with attribute added to edges """ new_graph = nx.Graph() # copies over node attributes new_graph.add_nodes_from(graph.nodes(data=True)) # load the section of data that encompasses this graph rows, cols = np.array(graph.nodes).T with h5py.File(video_path, "r") as f: data = f["data"][:, rows.min():(rows.max() + 1), cols.min():(cols.max() + 1)] offset = np.array([rows.min(), cols.min()]) for node1 in graph: neighbors = set(list(graph.neighbors(node1))) new_neighbors = set(list(new_graph.neighbors(node1))) neighbors = list(neighbors - new_neighbors) if len(neighbors) == 0: continue nrow, ncol = np.array(node1) - offset irows, icols = (np.array(neighbors) - offset).T weights = 1.0 - cdist([data[:, nrow, ncol]], [data[:, r, c] for r, c in zip(irows, icols)], metric="correlation")[0] for node2, weight in zip(neighbors, weights): attr = graph.get_edge_data(node1, node2) attr.update({attribute_name: weight}) new_graph.add_edge(node1, node2, **attr) return new_graph
def dijkstra(graph: nx.Graph, start, goal): frontier = PriorityQue() visited = dict() frontier.push(0, start) while len(frontier): sofar, current = frontier.pop() for n in graph.neighbors(current): if n in visited: continue edge_len = graph.get_edge_data(current, n)['weight'] frontier.push(sofar + edge_len, n) visited[n] = current if n == goal: return list(backtrack(visited, start, goal))[::-1]
def remove_insignificant_edges(graph: nx.Graph, threshold: float) -> nx.Graph: """Removes all edges from a given graph with a score below the threshold Arguments: graph: -- The graph to work on threshold: -- The threshold below which edges are removed Returns: A reference to the same graph that has been passed """ for u, v in graph.edges(): attrs = graph.get_edge_data(u, v) if attrs['combined_score'] < threshold: graph.remove_edge(u, v) return graph
def minCut(G: nx.Graph): minimum = 9999 edge = None for node in list(G.nodes): for neighbor in list(G.neighbors(node)): after = countNodes(G, neighbor) + 1 before = G.number_of_nodes() - after d = G.get_edge_data(node, neighbor) if after <= before: dcut = d['density'] / after else: dcut = d['density'] / before if dcut < minimum: minimum = dcut edge = (node, neighbor) return edge
def get_paths_bottlenecks( graph: nx.Graph, paths: Dict[MacPair, List[Path]]) -> Dict[MacPair, List[float]]: bottlenecks: Dict[MacPair, Union[List[None], List[float]]] = defaultdict(lambda: PATHS * [0.0]) for (src, dst), path_list in paths.items(): for path_idx, path in enumerate(path_list): switches = [switch for (switch, in_port, out_port) in path] if len(switches) > 1: pairs = [(switches[i], switches[i + 1]) for i in range(len(switches) - 1)] bottlenecks[src, dst][path_idx] = min( graph.get_edge_data(*pair)['weight'] for pair in pairs) else: bottlenecks[src, dst][path_idx] = 1000000.0 return bottlenecks
def dijkstra(graph: Graph, source: int, target: int): graph.nodes[source]["distance"] = 0 while graph.nodes: shortest_path_node = get_node_with_minimal_distance(graph) if target == shortest_path_node: return graph.nodes[target]["distance"] for neighbour in graph.neighbors(shortest_path_node): alt = graph.nodes[shortest_path_node]["distance"] + \ graph.get_edge_data(shortest_path_node, neighbour)["weight"] if alt < graph.nodes[neighbour]["distance"]: graph.nodes[neighbour]["distance"] = alt graph.remove_node(shortest_path_node) return graph
def find_route_min_max(santas_route: nx.Graph) -> Tuple[int]: """ Find the length of the shortest & longest paths in Santa's route that visit every location once. """ route_summary = [] # Iterate over all combinations of the nodes and calculate the distance if they form a path for route in permutations(list(santas_route.nodes)): if nx.is_simple_path(santas_route, route): route_distance = sum( [ santas_route.get_edge_data(*node_pair)["distance"] for node_pair in zip(route, route[1:]) ] ) route_summary.append([route, route_distance]) # Sort by distance and return the min & max route_summary.sort(key=lambda x: x[1]) return route_summary[0][1], route_summary[-1][1]
def generate_layered_graph(graph: Graph, machine: StateMachine) -> LayeredGraph: layers = len(machine.elements) - 1 layered_graph = LayeredGraph(graph, layers) for u, v, key in graph.edges(keys=True): for level in range(layers): edge_data = graph.get_edge_data(u, v, key) arc_type = edge_data['arc_type'] operation_result = machine.apply(machine.elements[level], arc_type) if operation_result is not machine.forbidden: source = u + level * layered_graph.max_node dest = v + machine.elements.index( operation_result) * layered_graph.max_node layered_graph.add_edge(source, dest, origin_edge=key, **edge_data) return layered_graph
def FBCA(G: net.Graph, user, k: int): # locations user has visited l_u = [l for l in list(G.neighbors(user)) if l.startswith('l')] # all locations locations = [l for l in list(G.nodes) if l.startswith('l')] # locations user has not visited locations_not_visited = [l for l in locations if l not in l_u] # different users different_users = [ u for u in list(G.nodes) if u.startswith('u') and u != user ] # compute PPR for all users PPR = BCA(G, user) # initializing scores of locations that user has not visited scores = {} for l in locations_not_visited: scores[l] = 0 for u in different_users: for l in locations: locations_visited = [ l for l in G.neighbors(u) if l.startswith('l') ] if l not in locations_visited: # if user has not visited location skip it continue n_visits = G.get_edge_data(u, l)['weight'] scores[l] += PPR[u] * n_visits # sorting scores sorted_scores = sorted(scores.items(), reverse=True, key=lambda kv: kv[1]) # if k exceeds length of scored locations, recommend all locations instead if k < len(sorted_scores): k = len(sorted_scores) return sorted_scores[:k - 1]
def dijkstra(graph: nx.Graph, start): vertex_mark = [math.inf for i in range(graph.number_of_nodes())] parent_node = [None for i in range(graph.number_of_nodes())] unvisited = list(graph.nodes()) vertex_mark[start] = 0 current_node = start while True: for neighbor in graph.neighbors(current_node): if neighbor in unvisited: weight = graph.get_edge_data(current_node, neighbor)['weight'] if vertex_mark[current_node] + weight < vertex_mark[neighbor]: vertex_mark[neighbor] = vertex_mark[current_node] + weight parent_node[neighbor] = current_node unvisited.remove(current_node) if len(unvisited) != 0: idx, node = min(enumerate(unvisited), key=lambda t: vertex_mark[t[1]]) current_node = node else: return parent_node
class LangGraph(object): """ A graph of all the relationships in a document and/or sentence """ def __init__(self, directed=False): """ Builds a graph out of the given document """ self.isDirected = directed #a graph that is meant to be full of class Instance if self.isDirected: self.graph = DiGraph() else: self.graph = Graph() self.start = None #an Instance #keep the graph also according to temporal, redundant probably needs #refactoring self.temporal = None self.temporalMap = None def setStart(self, start): """ Sets the starting instance, also builds the temporal ordering of the graph """ self.start = start self.temporal = self.narrativeOrder() self.temporalMap = self.narrativeMapping() def indexToInst(self, index): """ Returns the instance corresponding to the given index """ result = index #if the index is an int, lookup the instance associated with it if type(index) == int: result = self.temporal[index] return result def instToIndex(self, instance): """ Return the index associated with the instance """ return self.temporalMap[instance] def narrativeOrder(self): """ Returns the instances in narrative order """ results = [] node = self.start prev = None #while there are more nodes, keep adding them while node is not None: #record the current node results.append(node) #get the connected nodes fringe = [n for n in self.adj(node, WORD_EDGE) if n != prev] nextNode = fringe[0] if fringe else None #advance to the next node prev = node node = nextNode return results def narrativeMapping(self): """ Makes the mapping from instances to their narrative index """ return {inst:i for i,inst in enumerate(self.temporal)} def addNode(self, node): """ Adds a node to the graph """ self.graph.add_node(node) def addEdge(self, start, end, type): """ Adds an edge between the two instances """ #if the edge exists, just add the type if self.graph.has_edge(start, end): self.addType(start, end, type) else: self.graph.add_edge(start, end, TYPES=set([type])) def removeEdge(self, start, end, edgeType): """ Removes an edge with a given type from the edge type """ #remove the type self.removeType(start, end, edgeType) #if there are no types, remove the edge itself types = self.edgeTypes(start, end) #remove the edge if not len(types) and self.graph.has_edge(start, end): self.graph.remove_edge(start, end) def addType(self, start, end, type): """ Adds a type between the edges """ #look for existing types types = self.graph[start][end].get(TYPES, set()) #add the new type types.add(type) self.graph[start][end][TYPES] = types def removeType(self, start, end, edgeType): """ Removes the type on the edge """ for prefix in [PARENT, CHILD]: edgeType = removePrefix(prefix, edgeType) types = self.graph[start][end][TYPES] #if the types contains the edge, remove if edgeType in types: types.remove(edgeType) def hasType(self, start, end, type): """ Returns true if the edge between the two nodes has the given type """ return type in self.edgeTypes(start, end) def singleEdgeTypes(self, start, end): """ Returns the types on the edge if any, or an empty set is returned """ #make sure we are using instances rather than indexes start = self.indexToInst(start) end = self.indexToInst(end) data = self.graph.get_edge_data(start,end) result = set() #if there is data, get the types if data is not None: result = data.get(TYPES, set()) return result def edgeTypes(self, start, end): """ Returns the types on the edge if any, or an empty set is returned """ if self.isDirected: parent = addPrefixes(PARENT, self.singleEdgeTypes(end, start)) child = addPrefixes(CHILD, self.singleEdgeTypes(start, end)) types = parent.union(child) else: types = self.singleEdgeTypes(start, end) return types def allEdgeTypes(self): """ Returns all the edge types """ results = set() #collect all the edges with all the types for s,e,types in self.allEdges(): #look up the edge types to make sure everything is covered for edgeType in types: results.add(edgeType) #add in the reverse types for edgeType in self.edgeTypes(e,s): results.add(edgeType) return results def allEdges(self): """ Yield all the edges in the graph """ for start, end in self.graph.edges(): yield start, end, self.edgeTypes(start, end) def contains(self, instance): """ Returns true if the graph contains the instance """ return self.graph.has_node(instance) def instances(self): """ Return all the instances in the graph """ return self.graph.nodes() def edges(self, instance): """ Returns all the edges connected to this instance """ inst = self.indexToInst(instance) #make get the directed edges if self.isDirected: results = [t for _, t in self.graph.out_edges(inst)] + [t for t, _ in self.graph.in_edges(inst)] else: results = self.graph.adj[inst] return results def docType(self): """ Returns the document type (String) """ return self.temporal[0].event.docType def adj(self, instance, type=None): """ Returns the adjancent node with a given type """ return [other for other in self.edges(instance) if self.hasType(instance, other, type) or type is None] def nonNarrativeAdj(self, instance, returnIndex=False): """ Returns the nodes that are not adjancent to the given instance """ results = [] #add each node if it has a non-narrative (temporal) connection for node in self.edges(instance): #get the non narrative types edgeTypes = nonNarrativeTypes(self.edgeTypes(instance, node)) #if there is a non-narrative edge, add it if edgeTypes: #lookup the index of the node nodeMarker = self.instToIndex(node) if returnIndex else node results.append((nodeMarker, edgeTypes)) return results def words(self): """ Returns the words in narrative order """ return [t.word for t in self.tokens()] def tokens(self): """ Returns the tokens in narrative order """ return [i.token for i in self.temporal] def labels(self): """ Returns the sequence of labels for the instances """ return [i.event.type for i in self.temporal] def removeAny(self, blackList): """ Removes any nodes/tokens/instances that match the words in the blacklist """ #if a token or its lemma match any of the words in the blacklist #mark it for removal toRemove = {inst.token for inst in self.temporal if inst.token.word.lower() in blackList or inst.token.lemma.lower() in blackList} self.removeNodes(toRemove) def removeNodes(self, tokens): """ Removes the token from the graph """ startLen = len(self) #mark all the instances/indexes to remove instances = {inst:i for inst,i in self.temporalMap.items() if inst.token in tokens} #determine the remaining nodes remaining = sorted(list(set(range(startLen)) - {i for i in instances.values()})) #add in all the bypasses for startIndex, endIndex in iterPairs(remaining): start = self.temporal[startIndex] end = self.temporal[endIndex] self.addEdge(start, end, WORD_EDGE) #remove the edges for inst in instances: self.graph.remove_node(inst) #if there are remaining nodes then reset the temporal mapping if remaining: startIndex = min(remaining) self.start = self.temporal[startIndex] #redo narrative order self.temporal = self.narrativeOrder() self.temporalMap = self.narrativeMapping() else: self.start = None self.temporal = [] self.temporalMap = {} def copy(self): """ Performs a shallow copy of the graph """ newGraph = LangGraph(self.isDirected, self.entEdges) #create new instances newInst = {i:me.Instance(copy(i.token), i.event) for i in self.temporal} #add in all the edges for start, end in self.graph.edges(): for eType in self.edgeTypes(start, end): newGraph.addEdge(newInst[start], newInst[end], eType) newGraph.setStart(newInst[self.start]) return newGraph def graphString(self): """ Returns the graph as a string """ return " ".join([t.word for t in self.tokens()]) def __len__(self): """ Returns the number of nodes (tokens) in the graph """ return len(self.graph) def __repr__(self): """ Returns a summary string of the graph """ return "LangGraph {} nodes, {} edges".format(len(self.graph.nodes()), len(self.graph.edges()))