def _CheckEdge(self, node1id, relation, parentid): Reverse = False if relation[0] == "~": logging.debug("_CheckEdge: Reverse! {}".format(relation)) Reverse = True relation = relation[1:] relationid = FeatureOntology.GetFeatureID(relation) if Reverse: edgecandidates = [ e for e in self.graph if e[0] == parentid and e[2] == node1id ] else: edgecandidates = [ e for e in self.graph if e[0] == node1id and e[2] == parentid ] for edge in sorted(edgecandidates, key=operator.itemgetter(2, 1, 0)): if relationid == edge[3]: return True else: edgerelationnode = FeatureOntology.SearchFeatureOntology( edge[3]) if edgerelationnode and relationid in edgerelationnode.ancestors: if logging.root.isEnabledFor(logging.DEBUG): logging.debug(" Found ontology ancesstor relation!") return True return False
def _AddEdge(self, node1id, relation, parentid): #find the write relation to add, if already have a child relation for the same nodes. self.graph.add((node1id, relation, parentid, FeatureOntology.GetFeatureID(relation))) # Use ontology to find the ancestors of the relation relationid = FeatureOntology.GetFeatureID(relation) if FeatureOntology.SearchFeatureOntology(relationid): for ancestor in FeatureOntology.SearchFeatureOntology( relationid).ancestors: ancestorname = FeatureOntology.GetFeatureName(ancestor) if (node1id, ancestorname, parentid, ancestor) in self.graph: self.graph.remove( (node1id, ancestorname, parentid, ancestor)) #Set the parent to have the relation. hasFeatureID = FeatureOntology.GetFeatureID("has" + relation) if hasFeatureID >= 0: self.nodes[parentid].ApplyFeature(hasFeatureID) else: logging.error( "There is no has{} feature in the feature.txt!".format( relation))
def _RemoveEdge(self, node1id, relation, parentid): if relation[0] == "~": #revert self._RemoveEdge(parentid, relation[1:], node1id) return relationid = FeatureOntology.GetFeatureID(relation) for edge in [ e for e in self.graph if e[0] == node1id and e[2] == parentid ]: if relationid == edge[ 3] or relationid in FeatureOntology.SearchFeatureOntology( edge[3]).ancestors: self.graph.remove(edge)
def newnode(self, start, count, compound=False): #logging.info("new node: start=" + str(start) + " count=" + str(count)) if not self.head: raise RuntimeError( "This SentenceLinkedList is null! Can't combine.") if start + count > self.size: logging.error(self.__str__()) raise RuntimeError("Can't get " + str(count) + " items start from " + str(start) + " from the sentence!") startnode = self.get(start) endnode = self.get(start + count - 1) p = startnode sons = [] EndOffset = p.StartOffset NewText = "" NewNorm = "" NewAtom = "" hasUpperRelations = [] for i in range(count): if i == 0: spaces = "" else: if compound: spaces = "_" else: spaces = " " * (p.StartOffset - EndOffset) EndOffset = p.EndOffset NewText += spaces + p.text NewNorm += spaces + p.norm NewAtom += spaces + p.atom if p.UpperRelationship and p.UpperRelationship != 'H': hasUpperRelations.append( FeatureOntology.GetFeatureID("has" + p.UpperRelationship)) sons.append(p) p = p.next NewNode = SentenceNode(NewText) NewNode.norm = NewNorm NewNode.atom = NewAtom NewNode.sons = sons NewNode.StartOffset = startnode.StartOffset NewNode.EndOffset = endnode.EndOffset Lexicon.ApplyWordLengthFeature(NewNode) for haverelation in hasUpperRelations: NewNode.ApplyFeature(haverelation) return NewNode, startnode, endnode
def InitGlobalFeatureID(): global FeatureID_JS, FeatureID_JS2, FeatureID_JM2, FeatureID_JM, FeatureID_0 global FeatureID_CD, FeatureID_punc, FeatureID_SYM, FeatureID_NNP, FeatureID_External global FeatureID_OOV, FeatureID_CM, FeatureID_NEW, FeatureID_SpaceQ, FeatureID_SpaceH, FeatureID_FULLSTRING global FeatureID_VB, FeatureID_Ved, FeatureID_Ving global FeatureID_H, FeatureID_Subj, FeatureID_Obj, FeatureID_Pred global FeatureID_AC, FeatureID_NC, FeatureID_VC, FeatureID_comPair global FeatureID_HIT, FeatureID_HIT2, FeatureID_HIT3 if not FeatureID_JS2: import FeatureOntology FeatureID_JS = FeatureOntology.GetFeatureID("JS") FeatureID_JS2 = FeatureOntology.GetFeatureID("JS2") FeatureID_JM2 = FeatureOntology.GetFeatureID("JM2") FeatureID_JM = FeatureOntology.GetFeatureID("JM") FeatureID_0 = FeatureOntology.GetFeatureID("0") FeatureID_CD = FeatureOntology.GetFeatureID("CD") FeatureID_punc = FeatureOntology.GetFeatureID("punc") FeatureID_SYM = FeatureOntology.GetFeatureID("SYM") FeatureID_NNP = FeatureOntology.GetFeatureID("NNP") FeatureID_External = FeatureOntology.GetFeatureID("External") FeatureID_OOV = FeatureOntology.GetFeatureID("OOV") FeatureID_CM = FeatureOntology.GetFeatureID("CM") FeatureID_NEW = FeatureOntology.GetFeatureID("NEW") FeatureID_SpaceQ = FeatureOntology.GetFeatureID("spaceQ") FeatureID_SpaceH = FeatureOntology.GetFeatureID("spaceH") FeatureID_FULLSTRING = FeatureOntology.GetFeatureID("FULLSTRING") FeatureID_VB = FeatureOntology.GetFeatureID("VB") FeatureID_Ved = FeatureOntology.GetFeatureID("Ved") FeatureID_Ving = FeatureOntology.GetFeatureID("Ving") FeatureID_H = FeatureOntology.GetFeatureID("H") FeatureID_Subj = FeatureOntology.GetFeatureID("Subj") FeatureID_Obj = FeatureOntology.GetFeatureID("Obj") FeatureID_Pred = FeatureOntology.GetFeatureID("Pred") FeatureID_AC = FeatureOntology.GetFeatureID("AC") FeatureID_NC = FeatureOntology.GetFeatureID("NC") FeatureID_VC = FeatureOntology.GetFeatureID("VC") FeatureID_HIT = FeatureOntology.GetFeatureID("HIT") FeatureID_HIT2 = FeatureOntology.GetFeatureID("HIT2") FeatureID_HIT3 = FeatureOntology.GetFeatureID("HIT3") FeatureID_comPair = FeatureOntology.GetFeatureID("comPair") FeatureOntology.BarTagIDs = [[ FeatureOntology.GetFeatureID(t) for t in row ] for row in FeatureOntology.BarTags] for IDList in FeatureOntology.BarTagIDs: FeatureOntology.BarTagIDSet.update(set(IDList)) FeatureOntology.SentimentTagIDSet = [ FeatureOntology.GetFeatureID(t) for t in FeatureOntology.SentimentTags ] FeatureOntology.SentimentTagIDSet = set( FeatureOntology.SentimentTagIDSet)
def OutputStringTokens_onelinerSA_ben(dag): sentimentfeature = [ "Target", "Pro", "Con", "PosEmo", "NegEmo", "Neutral", "Needed", "Key", "Value" ] sentimentfeatureids = [ FeatureOntology.GetFeatureID(f) for f in sentimentfeature ] sentimentfeatureidset = set(sentimentfeatureids) nodes = dag.nodes nodelist = list(nodes.values()) nodelist.sort(key=lambda x: x.StartOffset) FeatureID_Key = FeatureOntology.GetFeatureID("Key") FeatureID_Value = FeatureOntology.GetFeatureID("Value") outputdict = [] for edge in sorted(dag.graph, key=operator.itemgetter(2, 0, 1)): node1 = nodes.get(edge[2]) node2 = nodes.get(edge[0]) sentimentnode = {} if FeatureID_Key in node1.features and FeatureID_Value in node2.features: sentimentnode["keyid"] = node1.ID sentimentnode["key"] = node1.text sentimentnode["keyfeatures"] = [ FeatureOntology.GetFeatureName(f) for f in node1.features if f in sentimentfeatureids ] sentimentnode["valuyeid"] = node2.ID sentimentnode["value"] = node2.text sentimentnode["valuefeatures"] = [ FeatureOntology.GetFeatureName(f) for f in node2.features if f in sentimentfeatureids ] outputdict.append(sentimentnode) if FeatureID_Key in node2.features and FeatureID_Value in node1.features: sentimentnode["keyid"] = node2.ID sentimentnode["key"] = node2.text sentimentnode["keyfeatures"] = [ FeatureOntology.GetFeatureName(f) for f in node2.features if f in sentimentfeatureids ] sentimentnode["valueid"] = node1.ID sentimentnode["value"] = node1.text sentimentnode["valuefeatures"] = [ FeatureOntology.GetFeatureName(f) for f in node1.features if f in sentimentfeatureids ] outputdict.append(sentimentnode) for nid in dag.nodes: node = dag.nodes[nid] if sentimentfeatureidset.intersection(node.features): Existed = False for snode in outputdict: if snode["keyid"] == node.ID or snode["valueid"] == node.ID: Existed = True break if not Existed: sentimentnode = {} sentimentnode["keyid"] = -1 sentimentnode["key"] = "_Emo" sentimentnode["keyfeatures"] = [] sentimentnode["valueid"] = node.ID sentimentnode["value"] = node.text sentimentnode["valuefeatures"] = [ FeatureOntology.GetFeatureName(f) for f in node.features if f in sentimentfeatureids ] outputdict.append(sentimentnode) return json.dumps(outputdict, default=lambda o: o.__dict__, sort_keys=True, ensure_ascii=False)
def ApplyActions(self, actinstring): #self.FailedRuleTokens.clear() Actions = actinstring.split() #logging.debug("Word:" + self.text) if "NEW" in Actions: self.features = set() if "NEUTRAL" in Actions: FeatureOntology.ProcessSentimentTags(self.features) HasBartagAction = False for Action in Actions: # if Action == "NEW": # continue # already process before. # if Action == "NEUTRAL": # continue # already process before. if Action[-1] == "-": if Action[0] == "^": #Remove UpperRelationship if "." in Action: if self.UpperRelationship == Action.split(".", 1)[1][-1]: # TODO: actually break the token. not just delattr delattr(self, "UpperRelationship") logging.warning( " TODO: actually break the token. not just delattr Remove Relationship:" + Action) else: logging.warning("This Action is not right:" + Action) continue FeatureID = FeatureOntology.GetFeatureID(Action.strip("-")) if FeatureID in self.features: self.features.remove(FeatureID) continue if Action[-1] == "+": if Action[-2] == "+": if Action[-3] == "+": #"+++" self.ApplyFeature(utils.FeatureID_0) self.sons = [] #remove the sons of this self.Head0Text = '' #remove Head0Text. else: #"X++": #this should be in a chunk, only apply to the new node HasBartagAction = True FeatureID = FeatureOntology.GetFeatureID( Action.strip("++")) self.ApplyFeature(FeatureID) else: #"X+" #MajorPOSFeatures = ["A", "N", "P", "R", "RB", "X", "V"] #Feb 20, 2018: use the BarTagIDs[0] as the MajorPOSFeatures. for bar0id in FeatureOntology.BarTagIDs[0]: if bar0id in self.features: self.features.remove(bar0id) for bar0id in [ utils.FeatureID_AC, utils.FeatureID_NC, utils.FeatureID_VC ]: if bar0id in self.features: self.features.remove(bar0id) FeatureID = FeatureOntology.GetFeatureID(Action.strip("+")) self.ApplyFeature(FeatureID) continue if Action[0] == "^": if "." in Action: self.UpperRelationship = Action.split(".")[-1] RelationActionID = FeatureOntology.GetFeatureID( self.UpperRelationship) if RelationActionID != -1: self.ApplyFeature(RelationActionID) else: logging.warning("Wrong Relation Action to apply:" + self.UpperRelationship + " in action string: " + actinstring) # apply this "has" to the parent (new) node (chunk) # RelationActionID = FeatureOntology.GetFeatureID("has" + self.UpperRelationship) # if RelationActionID != -1: # self.ApplyFeature(RelationActionID) # else: # logging.warning("Wrong Relation Action to apply:" + self.UpperRelationship + " in action string: " + actinstring) else: logging.error( "The Action is wrong: It does not have dot to link to proper pointer" ) logging.error(" actinstring:" + actinstring) self.UpperRelationship = Action[1:] continue if Action[0] == '\'': #Make the norm of the token to this key self.norm = Action[1:-1] continue if Action[0] == '%': #Make the pnorm of the token to this key self.pnorm = Action[1:-1] continue if Action[0] == '/': #Make the atom of the token to this key self.atom = Action[1:-1] continue ActionID = FeatureOntology.GetFeatureID(Action) if ActionID != -1: self.ApplyFeature(ActionID) else: logging.warning("Wrong Action to apply:" + Action + " in action string: " + actinstring) # strtokens[StartPosition + i + GoneInStrTokens].features.add(ActionID) if HasBartagAction: #only process bartags if there is new bar tag, or trunking (in the combine() function) FeatureOntology.ProcessBarTags(self.features)
def ApplyDagActions(self, OpenNode, node, actinstring, rule): iepairmatch = re.search("(#.*#)", actinstring) if iepairmatch: ieaction = iepairmatch.group(1) actinstring = actinstring.replace(iepairmatch.group(1), '') self.ApplyDagActions_IEPair(OpenNode, node, rule, ieaction) Actions = actinstring.split() for Action in copy.copy(Actions): if "---" in Action: ParentPointer = Action[:Action.rfind( '.')] #find pointer up the the last dot "." parentnodeid = self.FindPointerNode(OpenNode.ID, ParentPointer, rule, node.ID) if not parentnodeid: return if "~---" in Action: self.graph = set([ edge for edge in self.graph if edge[0] != parentnodeid or edge[2] != node.ID ]) logging.debug( "Dag Action {}: Removed all edge from {} to {}".format( Action, parentnodeid, node.ID)) else: self.graph = set([ edge for edge in self.graph if edge[0] != node.ID or edge[2] != parentnodeid ]) logging.debug( "Dag Action {}: Removed all edge from {} to {}".format( Action, parentnodeid, node.ID)) Actions.pop(Actions.index(Action)) for Action in sorted(Actions, key=lambda d: (d[-1])): if Action[0] == '^': ParentPointer = Action[:Action.rfind( '.')] #find pointer up the the last dot "." parentnodeid = self.FindPointerNode(OpenNode.ID, ParentPointer, rule, node.ID) if not parentnodeid: return #logging.warning("DAG Action: This action {} to apply, parent id={}".format(Action, parentnodeid)) if Action[-1] == "-": # remove relation = Action[Action.rfind('.') + 1:-1] self._RemoveEdge(node.ID, relation, parentnodeid) else: relation = Action[Action.rfind('.') + 1:] newedge = [node.ID, relation, parentnodeid] if logging.root.isEnabledFor(logging.DEBUG): logging.debug( "DAG Action:Adding new edge: {}".format(newedge)) self._AddEdge(node.ID, relation, parentnodeid) RelationActionID = FeatureOntology.GetFeatureID(relation) if RelationActionID != -1: node.ApplyFeature(RelationActionID) else: logging.warning( "Wrong Relation Action to apply: {} in action string: {}" .format(relation, actinstring)) continue if Action[-1] == "-": FeatureID = FeatureOntology.GetFeatureID(Action.strip("-")) if FeatureID in node.features: node.features.remove(FeatureID) continue if Action[-1] == "+": if Action[-2] == "+": if Action[-3] == "+": #"+++" logging.error( "There should be no +++ operation in DAG.") else: #"X++": FeatureID = FeatureOntology.GetFeatureID( Action.strip("++")) node.ApplyFeature(FeatureID) else: #"X+" for bar0id in FeatureOntology.BarTagIDs[0]: if bar0id in node.features: node.features.remove(bar0id) for bar0id in [ utils.FeatureID_AC, utils.FeatureID_NC, utils.FeatureID_VC ]: if bar0id in node.features: node.features.remove(bar0id) FeatureID = FeatureOntology.GetFeatureID(Action.strip("+")) node.ApplyFeature(FeatureID) continue if Action[0] == '\'': #Make the norm of the token to this key node.norm = Action[1:-1] continue if Action[0] == '%': # Make the pnorm of the token to this key node.pnorm = Action[1:-1] continue if Action[0] == '/': #Make the atom of the token to this key node.atom = Action[1:-1] continue ActionID = FeatureOntology.GetFeatureID(Action) if ActionID != -1: node.ApplyFeature(ActionID) else: logging.warning("Wrong Action to apply:" + Action + " in action string: " + actinstring) if Action == "NEUTRAL": FeatureOntology.ProcessSentimentTags(node.features)
def FindPointerNode(self, openID, SubtreePointer, rule, CurrentNodeID): if logging.root.isEnabledFor(logging.DEBUG): logging.debug("Dag.FindPointerNode for {}".format(SubtreePointer)) if (openID, SubtreePointer, rule.ID) in self.FindPointerNode_Cache: #logging.debug("FindPointerNode_Cache: hit!") return self.FindPointerNode_Cache[(openID, SubtreePointer, rule.ID)] if len(SubtreePointer) >= 1 and SubtreePointer[0] == '^': SubtreePointer = SubtreePointer[1:] nodeID = None if "+" in SubtreePointer: logging.warning( "There is + sign in SubtreePointer of FindPointerNode(): {} ". format(rule)) for AndCondition in SubtreePointer.split("+"): Negation = False if len(AndCondition) > 1 and AndCondition[0] == "!": #logging.warning("FindPointerNode: Negation! {}".format(SubtreePointer)) Negation = True AndCondition = AndCondition[1:] if "." in AndCondition: pointer, relations = AndCondition.split(".", 1) else: pointer, relations = [AndCondition, ""] #pointers = SubtreePointer.split(".") # Note: here Pointer (subtreepointer) does not have "^" #logging.debug("tree:{}".format(pointers)) # if len(pointers) <=1: # #logging.error("Should have more than 1 pointers! Can't find {} in graph {}".format(SubtreePointer, self.graph)) # return openID nodeID = None if pointer == '': nodeID = openID elif pointer == '~': #logging.warning("THIS POINTER in {}".format(rule)) nodeID = CurrentNodeID else: if pointer.isdigit(): pointer_num = int(pointer) try: nodeID = rule.Tokens[pointer_num].MatchedNodeID except AttributeError as e: #AttributeError: 'RuleToken' object has no attribute 'MatchedNodeID' logging.error(e) logging.error( "FindPointerNode: The rule is written error, because the reference token is not yet matched. Please rewrite!" ) logging.info(rule) return None except IndexError as e: logging.error(e) logging.error( "FindPointerNode: The rule is written error, failed to find pointer {} IndexError!" .format(pointer_num)) logging.info(rule) return None else: pointer = "^" + pointer #logging.info("Finding pointer node {} from TempPointer".format(pointer)) for nodeid in sorted(self.nodes): #logging.debug("DAG.FindPointerNode: evaluating temppointer {} in {} with pointer {}".format(self.nodes[nodeid].TempPointer, self.nodes[nodeid].text, pointer)) if self.nodes[nodeid].TempPointer == pointer: #logging.debug("Matched nodeid {}".format(nodeid)) nodeID = nodeid break #logging.warning("after looping over the nodes, nodeID={}".format(nodeID)) if nodeID and relations: for relation in relations.split("."): Found = False # if relation == "LEFT": # nodeID = self.LinearNodeOffset(nodeID, -1) # if nodeID: # Found = True # elif relation == "RIGHT": # nodeID = self.LinearNodeOffset(nodeID, 1) # if nodeID: # Found = True # else: relationid = FeatureOntology.GetFeatureID(relation) for edge in sorted(self.graph, key=operator.itemgetter(2, 1, 0)): #logging.debug("Evaluating edge{} with relation {}, node {}".format(edge, relation, nodeID)) if edge[2] == nodeID: if relationid == edge[ 3]: # or relationid in FeatureOntology.SearchFeatureOntology(edge[3]): nodeID = edge[0] Found = True if logging.root.isEnabledFor(logging.DEBUG): logging.debug(" Found!") break else: edgerelationnode = FeatureOntology.SearchFeatureOntology( edge[3]) if edgerelationnode and relationid in edgerelationnode.ancestors: nodeID = edge[0] Found = True if logging.root.isEnabledFor( logging.DEBUG): logging.debug( " Found ontology ancesstor relation!" ) break if not Found: if not Negation: return None #logging.warning("Failed to find pointer {} in graph {}".format(SubtreePointer, self)) #return None #Can't find the pointers. #logging.info("Found this node {} for these pointers:{}".format(nodeID, pointers)) if nodeID: self.FindPointerNode_Cache[(openID, SubtreePointer, rule.ID)] = nodeID return nodeID else: logging.warning("Can't find {} pointer in this rule{}".format( SubtreePointer, rule)) return None
def GetFeatureID(word): return jsonpickle.encode(FeatureOntology.GetFeatureID(word))