def create_rr_set(g: nx.Graph): free_nodes = g.graph['free'] free_nodes_set = set(free_nodes) v_index = np.random.randint(len(g.graph["free"])) v = g.graph["free"][v_index] q = [v] rr_set = {v} while q: node = q.pop(0) for edge in g.in_edges(node, data=True): u = edge[0] if (u not in rr_set) and (u in free_nodes_set): r = np.random.random() if r < edge[2]['new_weight']: q.append(u) rr_set.add(u) return rr_set
def normalization(g: nx.Graph): for i in g.nodes(): sum_new_weight = 0 in_edges = g.in_edges(i, data=True) for j in in_edges: sum_new_weight += g.edge[j[0]][j[1]]['new_weight'] for k in in_edges: g.edge[k[0]][k[1]]['new_weight'] = g.edge[k[0]][ k[1]]['new_weight'] / sum_new_weight
def _clean_unit(processor: Graph, unit: object) -> None: """Clean the given unit properties. `processor` is the processor containing the unit. `unit` is the unit to clean whose properties. The function restricts the capabilities of the given unit to only those supported by its predecessors. It also removes incoming edges coming from a predecessor unit having no capabilities in common with the given unit. """ processor.nodes[unit][UNIT_CAPS_KEY] = frozenset( processor.nodes[unit][UNIT_CAPS_KEY]) pred_caps = map(lambda edge: _chk_edge(processor, edge), tuple(processor.in_edges(unit))) processor.nodes[unit][UNIT_CAPS_KEY] = typing.cast( FrozenSet[ICaseString], frozenset()).union(*pred_caps)
def create_rr_set_(g: nx.Graph, v): free_nodes = g.graph['free'] free_nodes_set = set(free_nodes) q = [v] rr_set = {v} while q: node = q.pop(0) for edge in g.in_edges(node, data=True): u = edge[0] if (u not in rr_set) and (u in free_nodes_set): r = np.random.random() if r < edge[2]['w']: q.append(u) rr_set.add(u) return rr_set
def _random_rr_set(g: nx.Graph, sb): free_nodes = g.graph['free'] free_nodes_set = set(free_nodes) - set(sb) q = list(sb) B = set(sb) visited = set() while q: node = q.pop(0) B.add(node) for edge in g.out_edges(node, data=True): u = edge[0] if u not in visited: r = np.random.random() if r < edge[2]['w']: q.append(u) visited.add(u) v = random.sample(free_nodes_set, 1)[0] q = [v] visited = set() rr_set = set() while q: node = q.pop(0) rr_set.add(node) for edge in g.in_edges(node, data=True): u = edge[0] if (u not in rr_set) and (u not in B) and (u not in visited): r = np.random.random() if r < edge[2]['w']: q.append(u) visited.add(u) return rr_set
class LangGraph(object): """ A graph of all the relationships in a document and/or sentence """ def __init__(self, directed=False): """ Builds a graph out of the given document """ self.isDirected = directed #a graph that is meant to be full of class Instance if self.isDirected: self.graph = DiGraph() else: self.graph = Graph() self.start = None #an Instance #keep the graph also according to temporal, redundant probably needs #refactoring self.temporal = None self.temporalMap = None def setStart(self, start): """ Sets the starting instance, also builds the temporal ordering of the graph """ self.start = start self.temporal = self.narrativeOrder() self.temporalMap = self.narrativeMapping() def indexToInst(self, index): """ Returns the instance corresponding to the given index """ result = index #if the index is an int, lookup the instance associated with it if type(index) == int: result = self.temporal[index] return result def instToIndex(self, instance): """ Return the index associated with the instance """ return self.temporalMap[instance] def narrativeOrder(self): """ Returns the instances in narrative order """ results = [] node = self.start prev = None #while there are more nodes, keep adding them while node is not None: #record the current node results.append(node) #get the connected nodes fringe = [n for n in self.adj(node, WORD_EDGE) if n != prev] nextNode = fringe[0] if fringe else None #advance to the next node prev = node node = nextNode return results def narrativeMapping(self): """ Makes the mapping from instances to their narrative index """ return {inst:i for i,inst in enumerate(self.temporal)} def addNode(self, node): """ Adds a node to the graph """ self.graph.add_node(node) def addEdge(self, start, end, type): """ Adds an edge between the two instances """ #if the edge exists, just add the type if self.graph.has_edge(start, end): self.addType(start, end, type) else: self.graph.add_edge(start, end, TYPES=set([type])) def removeEdge(self, start, end, edgeType): """ Removes an edge with a given type from the edge type """ #remove the type self.removeType(start, end, edgeType) #if there are no types, remove the edge itself types = self.edgeTypes(start, end) #remove the edge if not len(types) and self.graph.has_edge(start, end): self.graph.remove_edge(start, end) def addType(self, start, end, type): """ Adds a type between the edges """ #look for existing types types = self.graph[start][end].get(TYPES, set()) #add the new type types.add(type) self.graph[start][end][TYPES] = types def removeType(self, start, end, edgeType): """ Removes the type on the edge """ for prefix in [PARENT, CHILD]: edgeType = removePrefix(prefix, edgeType) types = self.graph[start][end][TYPES] #if the types contains the edge, remove if edgeType in types: types.remove(edgeType) def hasType(self, start, end, type): """ Returns true if the edge between the two nodes has the given type """ return type in self.edgeTypes(start, end) def singleEdgeTypes(self, start, end): """ Returns the types on the edge if any, or an empty set is returned """ #make sure we are using instances rather than indexes start = self.indexToInst(start) end = self.indexToInst(end) data = self.graph.get_edge_data(start,end) result = set() #if there is data, get the types if data is not None: result = data.get(TYPES, set()) return result def edgeTypes(self, start, end): """ Returns the types on the edge if any, or an empty set is returned """ if self.isDirected: parent = addPrefixes(PARENT, self.singleEdgeTypes(end, start)) child = addPrefixes(CHILD, self.singleEdgeTypes(start, end)) types = parent.union(child) else: types = self.singleEdgeTypes(start, end) return types def allEdgeTypes(self): """ Returns all the edge types """ results = set() #collect all the edges with all the types for s,e,types in self.allEdges(): #look up the edge types to make sure everything is covered for edgeType in types: results.add(edgeType) #add in the reverse types for edgeType in self.edgeTypes(e,s): results.add(edgeType) return results def allEdges(self): """ Yield all the edges in the graph """ for start, end in self.graph.edges(): yield start, end, self.edgeTypes(start, end) def contains(self, instance): """ Returns true if the graph contains the instance """ return self.graph.has_node(instance) def instances(self): """ Return all the instances in the graph """ return self.graph.nodes() def edges(self, instance): """ Returns all the edges connected to this instance """ inst = self.indexToInst(instance) #make get the directed edges if self.isDirected: results = [t for _, t in self.graph.out_edges(inst)] + [t for t, _ in self.graph.in_edges(inst)] else: results = self.graph.adj[inst] return results def docType(self): """ Returns the document type (String) """ return self.temporal[0].event.docType def adj(self, instance, type=None): """ Returns the adjancent node with a given type """ return [other for other in self.edges(instance) if self.hasType(instance, other, type) or type is None] def nonNarrativeAdj(self, instance, returnIndex=False): """ Returns the nodes that are not adjancent to the given instance """ results = [] #add each node if it has a non-narrative (temporal) connection for node in self.edges(instance): #get the non narrative types edgeTypes = nonNarrativeTypes(self.edgeTypes(instance, node)) #if there is a non-narrative edge, add it if edgeTypes: #lookup the index of the node nodeMarker = self.instToIndex(node) if returnIndex else node results.append((nodeMarker, edgeTypes)) return results def words(self): """ Returns the words in narrative order """ return [t.word for t in self.tokens()] def tokens(self): """ Returns the tokens in narrative order """ return [i.token for i in self.temporal] def labels(self): """ Returns the sequence of labels for the instances """ return [i.event.type for i in self.temporal] def removeAny(self, blackList): """ Removes any nodes/tokens/instances that match the words in the blacklist """ #if a token or its lemma match any of the words in the blacklist #mark it for removal toRemove = {inst.token for inst in self.temporal if inst.token.word.lower() in blackList or inst.token.lemma.lower() in blackList} self.removeNodes(toRemove) def removeNodes(self, tokens): """ Removes the token from the graph """ startLen = len(self) #mark all the instances/indexes to remove instances = {inst:i for inst,i in self.temporalMap.items() if inst.token in tokens} #determine the remaining nodes remaining = sorted(list(set(range(startLen)) - {i for i in instances.values()})) #add in all the bypasses for startIndex, endIndex in iterPairs(remaining): start = self.temporal[startIndex] end = self.temporal[endIndex] self.addEdge(start, end, WORD_EDGE) #remove the edges for inst in instances: self.graph.remove_node(inst) #if there are remaining nodes then reset the temporal mapping if remaining: startIndex = min(remaining) self.start = self.temporal[startIndex] #redo narrative order self.temporal = self.narrativeOrder() self.temporalMap = self.narrativeMapping() else: self.start = None self.temporal = [] self.temporalMap = {} def copy(self): """ Performs a shallow copy of the graph """ newGraph = LangGraph(self.isDirected, self.entEdges) #create new instances newInst = {i:me.Instance(copy(i.token), i.event) for i in self.temporal} #add in all the edges for start, end in self.graph.edges(): for eType in self.edgeTypes(start, end): newGraph.addEdge(newInst[start], newInst[end], eType) newGraph.setStart(newInst[self.start]) return newGraph def graphString(self): """ Returns the graph as a string """ return " ".join([t.word for t in self.tokens()]) def __len__(self): """ Returns the number of nodes (tokens) in the graph """ return len(self.graph) def __repr__(self): """ Returns a summary string of the graph """ return "LangGraph {} nodes, {} edges".format(len(self.graph.nodes()), len(self.graph.edges()))