def _CheckEdge(self, node1id, relation, parentid): Reverse = False if relation[0] == "~": logging.debug("_CheckEdge: Reverse! {}".format(relation)) Reverse = True relation = relation[1:] relationid = FeatureOntology.GetFeatureID(relation) if Reverse: edgecandidates = [ e for e in self.graph if e[0] == parentid and e[2] == node1id ] else: edgecandidates = [ e for e in self.graph if e[0] == node1id and e[2] == parentid ] for edge in sorted(edgecandidates, key=operator.itemgetter(2, 1, 0)): if relationid == edge[3]: return True else: edgerelationnode = FeatureOntology.SearchFeatureOntology( edge[3]) if edgerelationnode and relationid in edgerelationnode.ancestors: if logging.root.isEnabledFor(logging.DEBUG): logging.debug(" Found ontology ancesstor relation!") return True return False
def CleanOutput(self, KeepOriginFeature=False): a = JsonClass() a.ID = self.ID a.text = self.text if self.norm != self.text: a.norm = self.norm if self.pnorm: a.pnorm = self.pnorm if self.iepair: a.iepair = self.iepair if self.atom != self.text: a.atom = self.atom a.features = sorted([ FeatureOntology.GetFeatureName(f) for f in self.features if f not in FeatureOntology.NotShowList ]) if KeepOriginFeature: a.features = sorted( [FeatureOntology.GetFeatureName(f) for f in self.features]) if self.Head0Text: a.Head0Text = self.Head0Text a.StartOffset = self.StartOffset a.EndOffset = self.EndOffset if self.UpperRelationship: a.UpperRelationship = self.UpperRelationship if self.sons \ and utils.FeatureID_0 not in self.features: #not to export lower than 0 a.sons = [s.CleanOutput(KeepOriginFeature) for s in self.sons] return a
def _RemoveEdge(self, node1id, relation, parentid): if relation[0] == "~": #revert self._RemoveEdge(parentid, relation[1:], node1id) return relationid = FeatureOntology.GetFeatureID(relation) for edge in [ e for e in self.graph if e[0] == node1id and e[2] == parentid ]: if relationid == edge[ 3] or relationid in FeatureOntology.SearchFeatureOntology( edge[3]).ancestors: self.graph.remove(edge)
def CleanOutput_FeatureLeave(self): a = JsonClass() a.text = self.text if self.norm != self.text: a.norm = self.norm if self.pnorm: a.pnorm = self.pnorm if self.iepair: a.iepair = self.iepair if self.atom != self.text: a.atom = self.atom features = [ FeatureOntology.GetFeatureName(f) for f in Lexicon.CopyFeatureLeaves(self.features) if f not in FeatureOntology.NotShowList ] for f in features: # if isinstance(f, int): # f = "L" + str(f) setattr(a, f, '') a.StartOffset = self.StartOffset a.EndOffset = self.EndOffset if self.UpperRelationship: a.UpperRelationship = self.UpperRelationship if self.sons \ and utils.FeatureID_0 not in self.features: a.sons = [s.CleanOutput_FeatureLeave() for s in self.sons] #logging.info("in featureleave" + str(self) + "f:" + str(features)) return a
def GetFeatures(self): featureList = [] for feature in self.features: if feature in FeatureOntology.NotShowList: continue f = FeatureOntology.GetFeatureName(feature) if f: featureList.append(f) else: logging.warning("Can't get feature name of " + self.text + " for id " + str(feature)) return ",".join(sorted(featureList))
def CleanOutput_Propagate(self, propogate_features=None): Features_ToPropogate = { utils.FeatureID_Subj, utils.FeatureID_Obj, utils.FeatureID_Pred } propogate_f = Features_ToPropogate.intersection(self.features) a = JsonClass() a.text = self.text if self.norm != self.text: a.norm = self.norm if self.pnorm: a.pnorm = self.pnorm if self.iepair: a.iepair = self.iepair if self.atom != self.text: a.atom = self.atom a.features = [ FeatureOntology.GetFeatureName(f) for f in self.features if f not in FeatureOntology.NotShowList ] if utils.FeatureID_H in self.features and propogate_features: #logging.info("\t\tApplying " + str(propogate_features) + " to " + str(self)) a.features.extend([ FeatureOntology.GetFeatureName(f) for f in propogate_features ]) propogate_f.update(propogate_features) a.StartOffset = self.StartOffset a.EndOffset = self.EndOffset if self.UpperRelationship: a.UpperRelationship = self.UpperRelationship if self.sons \ and utils.FeatureID_0 not in self.features: a.sons = [s.CleanOutput_Propagate(propogate_f) for s in self.sons] return a
def should_merge(self): feature_names = [ FeatureOntology.GetFeatureName(f) for f in self.features ] text_len = len(self.text.replace(' ', '')) logging.info('feature_names:' + str(feature_names) + ' node len:' + str(len(self.text.replace(' ', '')))) if utils.has_overlap(feature_names, FeatureOntology.MergeTokenList) > 0 \ and 2 <= text_len <= 5: return True elif ('mn' in feature_names or 'NP' in feature_names) and 2 <= text_len <= 4: return True return False
def get_chunk_label(self): feature_names = [ FeatureOntology.GetFeatureName(f) for f in self.features if f not in FeatureOntology.NotShowList ] BarFeature = utils.LastItemIn2DArray(feature_names, FeatureOntology.BarTags) if BarFeature: if self.UpperRelationship == SYM_PAIR_HEAD[0]: return SYM_PAIR_HEAD[1] + BarFeature + ' ' elif self.UpperRelationship: return self.UpperRelationship + SYM_HYPHEN + BarFeature + ' ' else: return BarFeature + ' ' return ''
def newnode(self, start, count, compound=False): #logging.info("new node: start=" + str(start) + " count=" + str(count)) if not self.head: raise RuntimeError( "This SentenceLinkedList is null! Can't combine.") if start + count > self.size: logging.error(self.__str__()) raise RuntimeError("Can't get " + str(count) + " items start from " + str(start) + " from the sentence!") startnode = self.get(start) endnode = self.get(start + count - 1) p = startnode sons = [] EndOffset = p.StartOffset NewText = "" NewNorm = "" NewAtom = "" hasUpperRelations = [] for i in range(count): if i == 0: spaces = "" else: if compound: spaces = "_" else: spaces = " " * (p.StartOffset - EndOffset) EndOffset = p.EndOffset NewText += spaces + p.text NewNorm += spaces + p.norm NewAtom += spaces + p.atom if p.UpperRelationship and p.UpperRelationship != 'H': hasUpperRelations.append( FeatureOntology.GetFeatureID("has" + p.UpperRelationship)) sons.append(p) p = p.next NewNode = SentenceNode(NewText) NewNode.norm = NewNorm NewNode.atom = NewAtom NewNode.sons = sons NewNode.StartOffset = startnode.StartOffset NewNode.EndOffset = endnode.EndOffset Lexicon.ApplyWordLengthFeature(NewNode) for haverelation in hasUpperRelations: NewNode.ApplyFeature(haverelation) return NewNode, startnode, endnode
def get_leaf_label(self): if not self.text: # deal with empty node return '' ret = '' feature_names = [FeatureOntology.GetFeatureName(f) \ for f in self.features if f not in FeatureOntology.NotShowList] BarFeature = utils.LastItemIn2DArray(feature_names, FeatureOntology.BarTags) if not self.UpperRelationship and BarFeature: # syntactic role is empty ret = BarFeature + "/" elif self.UpperRelationship == SYM_PAIR_HEAD[ 0] and BarFeature: # syntactic role is HEAD ret = SYM_PAIR_HEAD[1] + BarFeature + "/" elif self.UpperRelationship == SYM_PAIR_HEAD[0] and not BarFeature: ret = SYM_PAIR_HEAD[1] elif self.UpperRelationship != SYM_PAIR_HEAD[ 0] and BarFeature: # syntactic role is not HEAD ret = self.UpperRelationship + "/" # SYM_HYPHEN + BarFeature # Wei note: POS left out. For leaf label POS, we do not really care in tracking as long as Relationship is right. elif self.UpperRelationship != SYM_PAIR_HEAD[0] and not BarFeature: ret = self.UpperRelationship return ret
def _AddEdge(self, node1id, relation, parentid): #find the write relation to add, if already have a child relation for the same nodes. self.graph.add((node1id, relation, parentid, FeatureOntology.GetFeatureID(relation))) # Use ontology to find the ancestors of the relation relationid = FeatureOntology.GetFeatureID(relation) if FeatureOntology.SearchFeatureOntology(relationid): for ancestor in FeatureOntology.SearchFeatureOntology( relationid).ancestors: ancestorname = FeatureOntology.GetFeatureName(ancestor) if (node1id, ancestorname, parentid, ancestor) in self.graph: self.graph.remove( (node1id, ancestorname, parentid, ancestor)) #Set the parent to have the relation. hasFeatureID = FeatureOntology.GetFeatureID("has" + relation) if hasFeatureID >= 0: self.nodes[parentid].ApplyFeature(hasFeatureID) else: logging.error( "There is no has{} feature in the feature.txt!".format( relation))
def OutputStringTokens_onelinerSA_ben(dag): sentimentfeature = [ "Target", "Pro", "Con", "PosEmo", "NegEmo", "Neutral", "Needed", "Key", "Value" ] sentimentfeatureids = [ FeatureOntology.GetFeatureID(f) for f in sentimentfeature ] sentimentfeatureidset = set(sentimentfeatureids) nodes = dag.nodes nodelist = list(nodes.values()) nodelist.sort(key=lambda x: x.StartOffset) FeatureID_Key = FeatureOntology.GetFeatureID("Key") FeatureID_Value = FeatureOntology.GetFeatureID("Value") outputdict = [] for edge in sorted(dag.graph, key=operator.itemgetter(2, 0, 1)): node1 = nodes.get(edge[2]) node2 = nodes.get(edge[0]) sentimentnode = {} if FeatureID_Key in node1.features and FeatureID_Value in node2.features: sentimentnode["keyid"] = node1.ID sentimentnode["key"] = node1.text sentimentnode["keyfeatures"] = [ FeatureOntology.GetFeatureName(f) for f in node1.features if f in sentimentfeatureids ] sentimentnode["valuyeid"] = node2.ID sentimentnode["value"] = node2.text sentimentnode["valuefeatures"] = [ FeatureOntology.GetFeatureName(f) for f in node2.features if f in sentimentfeatureids ] outputdict.append(sentimentnode) if FeatureID_Key in node2.features and FeatureID_Value in node1.features: sentimentnode["keyid"] = node2.ID sentimentnode["key"] = node2.text sentimentnode["keyfeatures"] = [ FeatureOntology.GetFeatureName(f) for f in node2.features if f in sentimentfeatureids ] sentimentnode["valueid"] = node1.ID sentimentnode["value"] = node1.text sentimentnode["valuefeatures"] = [ FeatureOntology.GetFeatureName(f) for f in node1.features if f in sentimentfeatureids ] outputdict.append(sentimentnode) for nid in dag.nodes: node = dag.nodes[nid] if sentimentfeatureidset.intersection(node.features): Existed = False for snode in outputdict: if snode["keyid"] == node.ID or snode["valueid"] == node.ID: Existed = True break if not Existed: sentimentnode = {} sentimentnode["keyid"] = -1 sentimentnode["key"] = "_Emo" sentimentnode["keyfeatures"] = [] sentimentnode["valueid"] = node.ID sentimentnode["value"] = node.text sentimentnode["valuefeatures"] = [ FeatureOntology.GetFeatureName(f) for f in node.features if f in sentimentfeatureids ] outputdict.append(sentimentnode) return json.dumps(outputdict, default=lambda o: o.__dict__, sort_keys=True, ensure_ascii=False)
def ApplyDagActions(self, OpenNode, node, actinstring, rule): iepairmatch = re.search("(#.*#)", actinstring) if iepairmatch: ieaction = iepairmatch.group(1) actinstring = actinstring.replace(iepairmatch.group(1), '') self.ApplyDagActions_IEPair(OpenNode, node, rule, ieaction) Actions = actinstring.split() for Action in copy.copy(Actions): if "---" in Action: ParentPointer = Action[:Action.rfind( '.')] #find pointer up the the last dot "." parentnodeid = self.FindPointerNode(OpenNode.ID, ParentPointer, rule, node.ID) if not parentnodeid: return if "~---" in Action: self.graph = set([ edge for edge in self.graph if edge[0] != parentnodeid or edge[2] != node.ID ]) logging.debug( "Dag Action {}: Removed all edge from {} to {}".format( Action, parentnodeid, node.ID)) else: self.graph = set([ edge for edge in self.graph if edge[0] != node.ID or edge[2] != parentnodeid ]) logging.debug( "Dag Action {}: Removed all edge from {} to {}".format( Action, parentnodeid, node.ID)) Actions.pop(Actions.index(Action)) for Action in sorted(Actions, key=lambda d: (d[-1])): if Action[0] == '^': ParentPointer = Action[:Action.rfind( '.')] #find pointer up the the last dot "." parentnodeid = self.FindPointerNode(OpenNode.ID, ParentPointer, rule, node.ID) if not parentnodeid: return #logging.warning("DAG Action: This action {} to apply, parent id={}".format(Action, parentnodeid)) if Action[-1] == "-": # remove relation = Action[Action.rfind('.') + 1:-1] self._RemoveEdge(node.ID, relation, parentnodeid) else: relation = Action[Action.rfind('.') + 1:] newedge = [node.ID, relation, parentnodeid] if logging.root.isEnabledFor(logging.DEBUG): logging.debug( "DAG Action:Adding new edge: {}".format(newedge)) self._AddEdge(node.ID, relation, parentnodeid) RelationActionID = FeatureOntology.GetFeatureID(relation) if RelationActionID != -1: node.ApplyFeature(RelationActionID) else: logging.warning( "Wrong Relation Action to apply: {} in action string: {}" .format(relation, actinstring)) continue if Action[-1] == "-": FeatureID = FeatureOntology.GetFeatureID(Action.strip("-")) if FeatureID in node.features: node.features.remove(FeatureID) continue if Action[-1] == "+": if Action[-2] == "+": if Action[-3] == "+": #"+++" logging.error( "There should be no +++ operation in DAG.") else: #"X++": FeatureID = FeatureOntology.GetFeatureID( Action.strip("++")) node.ApplyFeature(FeatureID) else: #"X+" for bar0id in FeatureOntology.BarTagIDs[0]: if bar0id in node.features: node.features.remove(bar0id) for bar0id in [ utils.FeatureID_AC, utils.FeatureID_NC, utils.FeatureID_VC ]: if bar0id in node.features: node.features.remove(bar0id) FeatureID = FeatureOntology.GetFeatureID(Action.strip("+")) node.ApplyFeature(FeatureID) continue if Action[0] == '\'': #Make the norm of the token to this key node.norm = Action[1:-1] continue if Action[0] == '%': # Make the pnorm of the token to this key node.pnorm = Action[1:-1] continue if Action[0] == '/': #Make the atom of the token to this key node.atom = Action[1:-1] continue ActionID = FeatureOntology.GetFeatureID(Action) if ActionID != -1: node.ApplyFeature(ActionID) else: logging.warning("Wrong Action to apply:" + Action + " in action string: " + actinstring) if Action == "NEUTRAL": FeatureOntology.ProcessSentimentTags(node.features)
def FindPointerNode(self, openID, SubtreePointer, rule, CurrentNodeID): if logging.root.isEnabledFor(logging.DEBUG): logging.debug("Dag.FindPointerNode for {}".format(SubtreePointer)) if (openID, SubtreePointer, rule.ID) in self.FindPointerNode_Cache: #logging.debug("FindPointerNode_Cache: hit!") return self.FindPointerNode_Cache[(openID, SubtreePointer, rule.ID)] if len(SubtreePointer) >= 1 and SubtreePointer[0] == '^': SubtreePointer = SubtreePointer[1:] nodeID = None if "+" in SubtreePointer: logging.warning( "There is + sign in SubtreePointer of FindPointerNode(): {} ". format(rule)) for AndCondition in SubtreePointer.split("+"): Negation = False if len(AndCondition) > 1 and AndCondition[0] == "!": #logging.warning("FindPointerNode: Negation! {}".format(SubtreePointer)) Negation = True AndCondition = AndCondition[1:] if "." in AndCondition: pointer, relations = AndCondition.split(".", 1) else: pointer, relations = [AndCondition, ""] #pointers = SubtreePointer.split(".") # Note: here Pointer (subtreepointer) does not have "^" #logging.debug("tree:{}".format(pointers)) # if len(pointers) <=1: # #logging.error("Should have more than 1 pointers! Can't find {} in graph {}".format(SubtreePointer, self.graph)) # return openID nodeID = None if pointer == '': nodeID = openID elif pointer == '~': #logging.warning("THIS POINTER in {}".format(rule)) nodeID = CurrentNodeID else: if pointer.isdigit(): pointer_num = int(pointer) try: nodeID = rule.Tokens[pointer_num].MatchedNodeID except AttributeError as e: #AttributeError: 'RuleToken' object has no attribute 'MatchedNodeID' logging.error(e) logging.error( "FindPointerNode: The rule is written error, because the reference token is not yet matched. Please rewrite!" ) logging.info(rule) return None except IndexError as e: logging.error(e) logging.error( "FindPointerNode: The rule is written error, failed to find pointer {} IndexError!" .format(pointer_num)) logging.info(rule) return None else: pointer = "^" + pointer #logging.info("Finding pointer node {} from TempPointer".format(pointer)) for nodeid in sorted(self.nodes): #logging.debug("DAG.FindPointerNode: evaluating temppointer {} in {} with pointer {}".format(self.nodes[nodeid].TempPointer, self.nodes[nodeid].text, pointer)) if self.nodes[nodeid].TempPointer == pointer: #logging.debug("Matched nodeid {}".format(nodeid)) nodeID = nodeid break #logging.warning("after looping over the nodes, nodeID={}".format(nodeID)) if nodeID and relations: for relation in relations.split("."): Found = False # if relation == "LEFT": # nodeID = self.LinearNodeOffset(nodeID, -1) # if nodeID: # Found = True # elif relation == "RIGHT": # nodeID = self.LinearNodeOffset(nodeID, 1) # if nodeID: # Found = True # else: relationid = FeatureOntology.GetFeatureID(relation) for edge in sorted(self.graph, key=operator.itemgetter(2, 1, 0)): #logging.debug("Evaluating edge{} with relation {}, node {}".format(edge, relation, nodeID)) if edge[2] == nodeID: if relationid == edge[ 3]: # or relationid in FeatureOntology.SearchFeatureOntology(edge[3]): nodeID = edge[0] Found = True if logging.root.isEnabledFor(logging.DEBUG): logging.debug(" Found!") break else: edgerelationnode = FeatureOntology.SearchFeatureOntology( edge[3]) if edgerelationnode and relationid in edgerelationnode.ancestors: nodeID = edge[0] Found = True if logging.root.isEnabledFor( logging.DEBUG): logging.debug( " Found ontology ancesstor relation!" ) break if not Found: if not Negation: return None #logging.warning("Failed to find pointer {} in graph {}".format(SubtreePointer, self)) #return None #Can't find the pointers. #logging.info("Found this node {} for these pointers:{}".format(nodeID, pointers)) if nodeID: self.FindPointerNode_Cache[(openID, SubtreePointer, rule.ID)] = nodeID return nodeID else: logging.warning("Can't find {} pointer in this rule{}".format( SubtreePointer, rule)) return None
Tokenize('響著錄中文规则很长 very long , 为啥是不?') # def LoopTest2(n): # for _ in range(n): # old_Tokenize_cn('響著錄中文规则很长 very long , 为啥是不?') if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG, format='%(asctime)s [%(levelname)s] %(message)s') logging.info("Start") # import ProcessSentence # ProcessSentence.LoadCommon() # too heavy to load for debugging FeatureOntology.LoadFeatureOntology('../../fsa/Y/feature.txt') Lexicon.LoadSegmentLexicon() XLocation = '../../fsa/X/' Lexicon.LoadExtraReference(XLocation + 'CuobieziX.txt', Lexicon._LexiconCuobieziDict) Lexicon.LoadExtraReference(XLocation + 'Fanti.txt', Lexicon._LexiconFantiDict) main_x = Tokenize('科普:。,?带你看懂蜀绣冰壶比赛') #old_Tokenize_cn('很少有科普:3 minutes 三分钟带你看懂蜀绣冰壶比赛') import cProfile, pstats cProfile.run("LoopTest1(100)", 'restatslex') pstat = pstats.Stats('restatslex') pstat.sort_stats('time').print_stats(10)
def ApplyActions(self, actinstring): #self.FailedRuleTokens.clear() Actions = actinstring.split() #logging.debug("Word:" + self.text) if "NEW" in Actions: self.features = set() if "NEUTRAL" in Actions: FeatureOntology.ProcessSentimentTags(self.features) HasBartagAction = False for Action in Actions: # if Action == "NEW": # continue # already process before. # if Action == "NEUTRAL": # continue # already process before. if Action[-1] == "-": if Action[0] == "^": #Remove UpperRelationship if "." in Action: if self.UpperRelationship == Action.split(".", 1)[1][-1]: # TODO: actually break the token. not just delattr delattr(self, "UpperRelationship") logging.warning( " TODO: actually break the token. not just delattr Remove Relationship:" + Action) else: logging.warning("This Action is not right:" + Action) continue FeatureID = FeatureOntology.GetFeatureID(Action.strip("-")) if FeatureID in self.features: self.features.remove(FeatureID) continue if Action[-1] == "+": if Action[-2] == "+": if Action[-3] == "+": #"+++" self.ApplyFeature(utils.FeatureID_0) self.sons = [] #remove the sons of this self.Head0Text = '' #remove Head0Text. else: #"X++": #this should be in a chunk, only apply to the new node HasBartagAction = True FeatureID = FeatureOntology.GetFeatureID( Action.strip("++")) self.ApplyFeature(FeatureID) else: #"X+" #MajorPOSFeatures = ["A", "N", "P", "R", "RB", "X", "V"] #Feb 20, 2018: use the BarTagIDs[0] as the MajorPOSFeatures. for bar0id in FeatureOntology.BarTagIDs[0]: if bar0id in self.features: self.features.remove(bar0id) for bar0id in [ utils.FeatureID_AC, utils.FeatureID_NC, utils.FeatureID_VC ]: if bar0id in self.features: self.features.remove(bar0id) FeatureID = FeatureOntology.GetFeatureID(Action.strip("+")) self.ApplyFeature(FeatureID) continue if Action[0] == "^": if "." in Action: self.UpperRelationship = Action.split(".")[-1] RelationActionID = FeatureOntology.GetFeatureID( self.UpperRelationship) if RelationActionID != -1: self.ApplyFeature(RelationActionID) else: logging.warning("Wrong Relation Action to apply:" + self.UpperRelationship + " in action string: " + actinstring) # apply this "has" to the parent (new) node (chunk) # RelationActionID = FeatureOntology.GetFeatureID("has" + self.UpperRelationship) # if RelationActionID != -1: # self.ApplyFeature(RelationActionID) # else: # logging.warning("Wrong Relation Action to apply:" + self.UpperRelationship + " in action string: " + actinstring) else: logging.error( "The Action is wrong: It does not have dot to link to proper pointer" ) logging.error(" actinstring:" + actinstring) self.UpperRelationship = Action[1:] continue if Action[0] == '\'': #Make the norm of the token to this key self.norm = Action[1:-1] continue if Action[0] == '%': #Make the pnorm of the token to this key self.pnorm = Action[1:-1] continue if Action[0] == '/': #Make the atom of the token to this key self.atom = Action[1:-1] continue ActionID = FeatureOntology.GetFeatureID(Action) if ActionID != -1: self.ApplyFeature(ActionID) else: logging.warning("Wrong Action to apply:" + Action + " in action string: " + actinstring) # strtokens[StartPosition + i + GoneInStrTokens].features.add(ActionID) if HasBartagAction: #only process bartags if there is new bar tag, or trunking (in the combine() function) FeatureOntology.ProcessBarTags(self.features)
def ApplyFeature(self, featureID): self.features.add(featureID) FeatureNode = FeatureOntology.SearchFeatureOntology(featureID) if FeatureNode and FeatureNode.ancestors: self.features.update(FeatureNode.ancestors)
def OutputStringTokens_onelinerSA(dag): output = "" sentimentfeature = [ "Target", "Pro", "Con", "PosEmo", "NegEmo", "Neutral", "Needed", "Key", "Value" ] nodes = dag.nodes nodelist = list(nodes.values()) nodelist.sort(key=lambda x: x.StartOffset) output += '{ "nodes": [' sentence = "" first = True for node in sorted(nodes.values(), key=operator.attrgetter("Index")): if first: first = False else: output += ", " nodeid = node.ID text = node.text sentence += text features = sorted([ FeatureOntology.GetFeatureName(f) for f in node.features if f not in FeatureOntology.NotShowList ]) filteredfeatures = [] for f in features: if f in sentimentfeature: filteredfeatures.append(f) jsondict = dict() jsondict["nodeID"] = nodeid jsondict["text"] = text jsondict["features"] = filteredfeatures output += json.dumps(jsondict, default=lambda o: o.__dict__, sort_keys=True, ensure_ascii=False) havekeyvalue = False tempoutput = '], "edges": [' keyvalueset = set() for edge in sorted(dag.graph, key=operator.itemgetter(2, 0, 1)): nID = edge[2] n = nodes.get(nID) feats = sorted([ FeatureOntology.GetFeatureName(f) for f in n.features if f not in FeatureOntology.NotShowList ]) if "Key" in feats: valueID = edge[0] valuenode = nodes.get(valueID) valuefeats = sorted([ FeatureOntology.GetFeatureName(f) for f in valuenode.features if f not in FeatureOntology.NotShowList ]) if "Value" in valuefeats: if not str(edge[2]) + "\t" + str(edge[0]) in keyvalueset: if not havekeyvalue: tempoutput += '{{ "key":{}, "value":{}}}'.format( edge[2], edge[0]) havekeyvalue = True else: tempoutput += ", " tempoutput += '{{ "key":{}, "value":{}}}'.format( edge[2], edge[0]) keyvalueset.add(str(edge[2]) + "\t" + str(edge[0])) elif "Value" in feats: keyID = edge[0] keynode = nodes.get(keyID) keyfeats = sorted([ FeatureOntology.GetFeatureName(f) for f in keynode.features if f not in FeatureOntology.NotShowList ]) if "Key" in keyfeats: if not str(edge[0]) + "\t" + str(edge[2]) in keyvalueset: if not havekeyvalue: tempoutput += '{{ "key":{}, "value":{}}}'.format( edge[0], edge[2]) havekeyvalue = True else: tempoutput += ", " tempoutput += '{{ "key":{}, "value":{}}}'.format( edge[0], edge[2]) keyvalueset.add(str(edge[0]) + "\t" + str(edge[2])) if not tempoutput == '], "edges": [': output += tempoutput output += '], "sentence": "' + sentence + '"}' # for node in nodelist: # output += node.text + "/" # featureString = node.GetFeatures() # featureSet = featureString.split(",") # # print (featureSet) # if TargetFeature in featureSet: # output += TargetFeature + " " # if ProFeature in featureSet: # output += ProFeature+ " " # if ConFeature in featureSet: # output += ConFeature+ " " # if PosEmo in featureSet: # output += PosEmo+ " " # if NegEmo in featureSet: # output += NegEmo+ " " # if Needed in featureSet: # output += Needed+ " " # if Neutral in featureSet: # output += Neutral+ " " # if Key in featureSet: # output += Key+ " " # if Value in featureSet: # output += Value + " " # if output.endswith("/"): # output = output[:-1] # if not output.endswith(" "): # output += " " return output
def GetFeatureName(FeatureID): return jsonpickle.encode(FeatureOntology.GetFeatureName(int(FeatureID)))
def InitGlobalFeatureID(): global FeatureID_JS, FeatureID_JS2, FeatureID_JM2, FeatureID_JM, FeatureID_0 global FeatureID_CD, FeatureID_punc, FeatureID_SYM, FeatureID_NNP, FeatureID_External global FeatureID_OOV, FeatureID_CM, FeatureID_NEW, FeatureID_SpaceQ, FeatureID_SpaceH, FeatureID_FULLSTRING global FeatureID_VB, FeatureID_Ved, FeatureID_Ving global FeatureID_H, FeatureID_Subj, FeatureID_Obj, FeatureID_Pred global FeatureID_AC, FeatureID_NC, FeatureID_VC, FeatureID_comPair global FeatureID_HIT, FeatureID_HIT2, FeatureID_HIT3 if not FeatureID_JS2: import FeatureOntology FeatureID_JS = FeatureOntology.GetFeatureID("JS") FeatureID_JS2 = FeatureOntology.GetFeatureID("JS2") FeatureID_JM2 = FeatureOntology.GetFeatureID("JM2") FeatureID_JM = FeatureOntology.GetFeatureID("JM") FeatureID_0 = FeatureOntology.GetFeatureID("0") FeatureID_CD = FeatureOntology.GetFeatureID("CD") FeatureID_punc = FeatureOntology.GetFeatureID("punc") FeatureID_SYM = FeatureOntology.GetFeatureID("SYM") FeatureID_NNP = FeatureOntology.GetFeatureID("NNP") FeatureID_External = FeatureOntology.GetFeatureID("External") FeatureID_OOV = FeatureOntology.GetFeatureID("OOV") FeatureID_CM = FeatureOntology.GetFeatureID("CM") FeatureID_NEW = FeatureOntology.GetFeatureID("NEW") FeatureID_SpaceQ = FeatureOntology.GetFeatureID("spaceQ") FeatureID_SpaceH = FeatureOntology.GetFeatureID("spaceH") FeatureID_FULLSTRING = FeatureOntology.GetFeatureID("FULLSTRING") FeatureID_VB = FeatureOntology.GetFeatureID("VB") FeatureID_Ved = FeatureOntology.GetFeatureID("Ved") FeatureID_Ving = FeatureOntology.GetFeatureID("Ving") FeatureID_H = FeatureOntology.GetFeatureID("H") FeatureID_Subj = FeatureOntology.GetFeatureID("Subj") FeatureID_Obj = FeatureOntology.GetFeatureID("Obj") FeatureID_Pred = FeatureOntology.GetFeatureID("Pred") FeatureID_AC = FeatureOntology.GetFeatureID("AC") FeatureID_NC = FeatureOntology.GetFeatureID("NC") FeatureID_VC = FeatureOntology.GetFeatureID("VC") FeatureID_HIT = FeatureOntology.GetFeatureID("HIT") FeatureID_HIT2 = FeatureOntology.GetFeatureID("HIT2") FeatureID_HIT3 = FeatureOntology.GetFeatureID("HIT3") FeatureID_comPair = FeatureOntology.GetFeatureID("comPair") FeatureOntology.BarTagIDs = [[ FeatureOntology.GetFeatureID(t) for t in row ] for row in FeatureOntology.BarTags] for IDList in FeatureOntology.BarTagIDs: FeatureOntology.BarTagIDSet.update(set(IDList)) FeatureOntology.SentimentTagIDSet = [ FeatureOntology.GetFeatureID(t) for t in FeatureOntology.SentimentTags ] FeatureOntology.SentimentTagIDSet = set( FeatureOntology.SentimentTagIDSet)
def GetFeatureID(word): return jsonpickle.encode(FeatureOntology.GetFeatureID(word))