Ejemplo n.º 1
0
    def _CheckEdge(self, node1id, relation, parentid):
        Reverse = False
        if relation[0] == "~":
            logging.debug("_CheckEdge: Reverse! {}".format(relation))
            Reverse = True
            relation = relation[1:]

        relationid = FeatureOntology.GetFeatureID(relation)
        if Reverse:
            edgecandidates = [
                e for e in self.graph if e[0] == parentid and e[2] == node1id
            ]
        else:
            edgecandidates = [
                e for e in self.graph if e[0] == node1id and e[2] == parentid
            ]

        for edge in sorted(edgecandidates, key=operator.itemgetter(2, 1, 0)):
            if relationid == edge[3]:
                return True
            else:
                edgerelationnode = FeatureOntology.SearchFeatureOntology(
                    edge[3])
                if edgerelationnode and relationid in edgerelationnode.ancestors:
                    if logging.root.isEnabledFor(logging.DEBUG):
                        logging.debug("   Found ontology ancesstor relation!")
                    return True
        return False
Ejemplo n.º 2
0
    def CleanOutput(self, KeepOriginFeature=False):
        a = JsonClass()
        a.ID = self.ID
        a.text = self.text
        if self.norm != self.text:
            a.norm = self.norm
        if self.pnorm:
            a.pnorm = self.pnorm
        if self.iepair:
            a.iepair = self.iepair
        if self.atom != self.text:
            a.atom = self.atom
        a.features = sorted([
            FeatureOntology.GetFeatureName(f) for f in self.features
            if f not in FeatureOntology.NotShowList
        ])

        if KeepOriginFeature:
            a.features = sorted(
                [FeatureOntology.GetFeatureName(f) for f in self.features])
            if self.Head0Text:
                a.Head0Text = self.Head0Text

        a.StartOffset = self.StartOffset
        a.EndOffset = self.EndOffset
        if self.UpperRelationship:
            a.UpperRelationship = self.UpperRelationship

        if self.sons \
                and utils.FeatureID_0 not in self.features:  #not to export lower than 0
            a.sons = [s.CleanOutput(KeepOriginFeature) for s in self.sons]

        return a
Ejemplo n.º 3
0
    def _RemoveEdge(self, node1id, relation, parentid):
        if relation[0] == "~":  #revert
            self._RemoveEdge(parentid, relation[1:], node1id)
            return

        relationid = FeatureOntology.GetFeatureID(relation)

        for edge in [
                e for e in self.graph if e[0] == node1id and e[2] == parentid
        ]:
            if relationid == edge[
                    3] or relationid in FeatureOntology.SearchFeatureOntology(
                        edge[3]).ancestors:
                self.graph.remove(edge)
Ejemplo n.º 4
0
    def CleanOutput_FeatureLeave(self):
        a = JsonClass()
        a.text = self.text
        if self.norm != self.text:
            a.norm = self.norm
        if self.pnorm:
            a.pnorm = self.pnorm
        if self.iepair:
            a.iepair = self.iepair
        if self.atom != self.text:
            a.atom = self.atom
        features = [
            FeatureOntology.GetFeatureName(f)
            for f in Lexicon.CopyFeatureLeaves(self.features)
            if f not in FeatureOntology.NotShowList
        ]
        for f in features:
            # if isinstance(f, int):
            #     f = "L" + str(f)
            setattr(a, f, '')
        a.StartOffset = self.StartOffset
        a.EndOffset = self.EndOffset
        if self.UpperRelationship:
            a.UpperRelationship = self.UpperRelationship
        if self.sons \
                and utils.FeatureID_0 not in self.features:
            a.sons = [s.CleanOutput_FeatureLeave() for s in self.sons]

        #logging.info("in featureleave" + str(self) + "f:" + str(features))
        return a
Ejemplo n.º 5
0
 def GetFeatures(self):
     featureList = []
     for feature in self.features:
         if feature in FeatureOntology.NotShowList:
             continue
         f = FeatureOntology.GetFeatureName(feature)
         if f:
             featureList.append(f)
         else:
             logging.warning("Can't get feature name of " + self.text +
                             " for id " + str(feature))
     return ",".join(sorted(featureList))
Ejemplo n.º 6
0
    def CleanOutput_Propagate(self, propogate_features=None):
        Features_ToPropogate = {
            utils.FeatureID_Subj, utils.FeatureID_Obj, utils.FeatureID_Pred
        }
        propogate_f = Features_ToPropogate.intersection(self.features)

        a = JsonClass()
        a.text = self.text
        if self.norm != self.text:
            a.norm = self.norm
        if self.pnorm:
            a.pnorm = self.pnorm
        if self.iepair:
            a.iepair = self.iepair
        if self.atom != self.text:
            a.atom = self.atom
        a.features = [
            FeatureOntology.GetFeatureName(f) for f in self.features
            if f not in FeatureOntology.NotShowList
        ]

        if utils.FeatureID_H in self.features and propogate_features:
            #logging.info("\t\tApplying " + str(propogate_features) + " to " + str(self))
            a.features.extend([
                FeatureOntology.GetFeatureName(f) for f in propogate_features
            ])
            propogate_f.update(propogate_features)

        a.StartOffset = self.StartOffset
        a.EndOffset = self.EndOffset
        if self.UpperRelationship:
            a.UpperRelationship = self.UpperRelationship

        if self.sons \
                and utils.FeatureID_0 not in self.features:
            a.sons = [s.CleanOutput_Propagate(propogate_f) for s in self.sons]

        return a
Ejemplo n.º 7
0
 def should_merge(self):
     feature_names = [
         FeatureOntology.GetFeatureName(f) for f in self.features
     ]
     text_len = len(self.text.replace(' ', ''))
     logging.info('feature_names:' + str(feature_names) + ' node len:' +
                  str(len(self.text.replace(' ', ''))))
     if utils.has_overlap(feature_names, FeatureOntology.MergeTokenList) > 0 \
             and 2 <= text_len <= 5:
         return True
     elif ('mn' in feature_names
           or 'NP' in feature_names) and 2 <= text_len <= 4:
         return True
     return False
Ejemplo n.º 8
0
 def get_chunk_label(self):
     feature_names = [
         FeatureOntology.GetFeatureName(f) for f in self.features
         if f not in FeatureOntology.NotShowList
     ]
     BarFeature = utils.LastItemIn2DArray(feature_names,
                                          FeatureOntology.BarTags)
     if BarFeature:
         if self.UpperRelationship == SYM_PAIR_HEAD[0]:
             return SYM_PAIR_HEAD[1] + BarFeature + ' '
         elif self.UpperRelationship:
             return self.UpperRelationship + SYM_HYPHEN + BarFeature + ' '
         else:
             return BarFeature + ' '
     return ''
Ejemplo n.º 9
0
    def newnode(self, start, count, compound=False):
        #logging.info("new node: start=" + str(start) + " count=" + str(count))
        if not self.head:
            raise RuntimeError(
                "This SentenceLinkedList is null! Can't combine.")
        if start + count > self.size:
            logging.error(self.__str__())
            raise RuntimeError("Can't get " + str(count) +
                               " items start from " + str(start) +
                               " from the sentence!")

        startnode = self.get(start)
        endnode = self.get(start + count - 1)
        p = startnode
        sons = []
        EndOffset = p.StartOffset
        NewText = ""
        NewNorm = ""
        NewAtom = ""
        hasUpperRelations = []
        for i in range(count):
            if i == 0:
                spaces = ""
            else:
                if compound:
                    spaces = "_"
                else:
                    spaces = " " * (p.StartOffset - EndOffset)
            EndOffset = p.EndOffset
            NewText += spaces + p.text
            NewNorm += spaces + p.norm
            NewAtom += spaces + p.atom
            if p.UpperRelationship and p.UpperRelationship != 'H':
                hasUpperRelations.append(
                    FeatureOntology.GetFeatureID("has" + p.UpperRelationship))
            sons.append(p)
            p = p.next

        NewNode = SentenceNode(NewText)
        NewNode.norm = NewNorm
        NewNode.atom = NewAtom
        NewNode.sons = sons
        NewNode.StartOffset = startnode.StartOffset
        NewNode.EndOffset = endnode.EndOffset
        Lexicon.ApplyWordLengthFeature(NewNode)
        for haverelation in hasUpperRelations:
            NewNode.ApplyFeature(haverelation)
        return NewNode, startnode, endnode
Ejemplo n.º 10
0
 def get_leaf_label(self):
     if not self.text:  # deal with empty node
         return ''
     ret = ''
     feature_names = [FeatureOntology.GetFeatureName(f) \
             for f in self.features if f not in FeatureOntology.NotShowList]
     BarFeature = utils.LastItemIn2DArray(feature_names,
                                          FeatureOntology.BarTags)
     if not self.UpperRelationship and BarFeature:  # syntactic role is empty
         ret = BarFeature + "/"
     elif self.UpperRelationship == SYM_PAIR_HEAD[
             0] and BarFeature:  # syntactic role is HEAD
         ret = SYM_PAIR_HEAD[1] + BarFeature + "/"
     elif self.UpperRelationship == SYM_PAIR_HEAD[0] and not BarFeature:
         ret = SYM_PAIR_HEAD[1]
     elif self.UpperRelationship != SYM_PAIR_HEAD[
             0] and BarFeature:  # syntactic role is not HEAD
         ret = self.UpperRelationship + "/"  # SYM_HYPHEN + BarFeature # Wei note: POS left out.  For leaf label POS, we do not really care in tracking as long as Relationship is right.
     elif self.UpperRelationship != SYM_PAIR_HEAD[0] and not BarFeature:
         ret = self.UpperRelationship
     return ret
Ejemplo n.º 11
0
    def _AddEdge(self, node1id, relation, parentid):
        #find the write relation to add, if already have a child relation for the same nodes.
        self.graph.add((node1id, relation, parentid,
                        FeatureOntology.GetFeatureID(relation)))
        # Use ontology to find the ancestors of the relation
        relationid = FeatureOntology.GetFeatureID(relation)
        if FeatureOntology.SearchFeatureOntology(relationid):
            for ancestor in FeatureOntology.SearchFeatureOntology(
                    relationid).ancestors:
                ancestorname = FeatureOntology.GetFeatureName(ancestor)
                if (node1id, ancestorname, parentid, ancestor) in self.graph:
                    self.graph.remove(
                        (node1id, ancestorname, parentid, ancestor))

        #Set the parent to have the relation.
        hasFeatureID = FeatureOntology.GetFeatureID("has" + relation)
        if hasFeatureID >= 0:
            self.nodes[parentid].ApplyFeature(hasFeatureID)
        else:
            logging.error(
                "There is no has{} feature in the feature.txt!".format(
                    relation))
Ejemplo n.º 12
0
def OutputStringTokens_onelinerSA_ben(dag):
    sentimentfeature = [
        "Target", "Pro", "Con", "PosEmo", "NegEmo", "Neutral", "Needed", "Key",
        "Value"
    ]
    sentimentfeatureids = [
        FeatureOntology.GetFeatureID(f) for f in sentimentfeature
    ]
    sentimentfeatureidset = set(sentimentfeatureids)
    nodes = dag.nodes
    nodelist = list(nodes.values())
    nodelist.sort(key=lambda x: x.StartOffset)
    FeatureID_Key = FeatureOntology.GetFeatureID("Key")
    FeatureID_Value = FeatureOntology.GetFeatureID("Value")
    outputdict = []
    for edge in sorted(dag.graph, key=operator.itemgetter(2, 0, 1)):
        node1 = nodes.get(edge[2])
        node2 = nodes.get(edge[0])
        sentimentnode = {}

        if FeatureID_Key in node1.features and FeatureID_Value in node2.features:
            sentimentnode["keyid"] = node1.ID
            sentimentnode["key"] = node1.text
            sentimentnode["keyfeatures"] = [
                FeatureOntology.GetFeatureName(f) for f in node1.features
                if f in sentimentfeatureids
            ]
            sentimentnode["valuyeid"] = node2.ID
            sentimentnode["value"] = node2.text
            sentimentnode["valuefeatures"] = [
                FeatureOntology.GetFeatureName(f) for f in node2.features
                if f in sentimentfeatureids
            ]
            outputdict.append(sentimentnode)

        if FeatureID_Key in node2.features and FeatureID_Value in node1.features:
            sentimentnode["keyid"] = node2.ID
            sentimentnode["key"] = node2.text
            sentimentnode["keyfeatures"] = [
                FeatureOntology.GetFeatureName(f) for f in node2.features
                if f in sentimentfeatureids
            ]
            sentimentnode["valueid"] = node1.ID
            sentimentnode["value"] = node1.text
            sentimentnode["valuefeatures"] = [
                FeatureOntology.GetFeatureName(f) for f in node1.features
                if f in sentimentfeatureids
            ]
            outputdict.append(sentimentnode)

    for nid in dag.nodes:
        node = dag.nodes[nid]
        if sentimentfeatureidset.intersection(node.features):
            Existed = False
            for snode in outputdict:
                if snode["keyid"] == node.ID or snode["valueid"] == node.ID:
                    Existed = True
                    break
            if not Existed:
                sentimentnode = {}
                sentimentnode["keyid"] = -1
                sentimentnode["key"] = "_Emo"
                sentimentnode["keyfeatures"] = []
                sentimentnode["valueid"] = node.ID
                sentimentnode["value"] = node.text
                sentimentnode["valuefeatures"] = [
                    FeatureOntology.GetFeatureName(f) for f in node.features
                    if f in sentimentfeatureids
                ]
                outputdict.append(sentimentnode)

    return json.dumps(outputdict,
                      default=lambda o: o.__dict__,
                      sort_keys=True,
                      ensure_ascii=False)
Ejemplo n.º 13
0
    def ApplyDagActions(self, OpenNode, node, actinstring, rule):
        iepairmatch = re.search("(#.*#)", actinstring)
        if iepairmatch:
            ieaction = iepairmatch.group(1)
            actinstring = actinstring.replace(iepairmatch.group(1), '')
            self.ApplyDagActions_IEPair(OpenNode, node, rule, ieaction)

        Actions = actinstring.split()

        for Action in copy.copy(Actions):
            if "---" in Action:
                ParentPointer = Action[:Action.rfind(
                    '.')]  #find pointer up the the last dot "."
                parentnodeid = self.FindPointerNode(OpenNode.ID, ParentPointer,
                                                    rule, node.ID)
                if not parentnodeid:
                    return

                if "~---" in Action:
                    self.graph = set([
                        edge for edge in self.graph
                        if edge[0] != parentnodeid or edge[2] != node.ID
                    ])
                    logging.debug(
                        "Dag Action {}: Removed all edge from {} to {}".format(
                            Action, parentnodeid, node.ID))
                else:
                    self.graph = set([
                        edge for edge in self.graph
                        if edge[0] != node.ID or edge[2] != parentnodeid
                    ])
                    logging.debug(
                        "Dag Action {}: Removed all edge from {} to {}".format(
                            Action, parentnodeid, node.ID))
                Actions.pop(Actions.index(Action))

        for Action in sorted(Actions, key=lambda d: (d[-1])):
            if Action[0] == '^':
                ParentPointer = Action[:Action.rfind(
                    '.')]  #find pointer up the the last dot "."
                parentnodeid = self.FindPointerNode(OpenNode.ID, ParentPointer,
                                                    rule, node.ID)
                if not parentnodeid:
                    return

                #logging.warning("DAG Action: This action {} to apply, parent id={}".format(Action, parentnodeid))
                if Action[-1] == "-":  # remove
                    relation = Action[Action.rfind('.') + 1:-1]
                    self._RemoveEdge(node.ID, relation, parentnodeid)
                else:
                    relation = Action[Action.rfind('.') + 1:]
                    newedge = [node.ID, relation, parentnodeid]
                    if logging.root.isEnabledFor(logging.DEBUG):
                        logging.debug(
                            "DAG Action:Adding new edge: {}".format(newedge))

                    self._AddEdge(node.ID, relation, parentnodeid)

                    RelationActionID = FeatureOntology.GetFeatureID(relation)
                    if RelationActionID != -1:
                        node.ApplyFeature(RelationActionID)
                    else:
                        logging.warning(
                            "Wrong Relation Action to apply: {} in action string: {}"
                            .format(relation, actinstring))
                continue

            if Action[-1] == "-":
                FeatureID = FeatureOntology.GetFeatureID(Action.strip("-"))
                if FeatureID in node.features:
                    node.features.remove(FeatureID)
                continue

            if Action[-1] == "+":
                if Action[-2] == "+":
                    if Action[-3] == "+":  #"+++"
                        logging.error(
                            "There should be no +++ operation in DAG.")
                    else:  #"X++":
                        FeatureID = FeatureOntology.GetFeatureID(
                            Action.strip("++"))
                        node.ApplyFeature(FeatureID)
                else:  #"X+"
                    for bar0id in FeatureOntology.BarTagIDs[0]:
                        if bar0id in node.features:
                            node.features.remove(bar0id)

                    for bar0id in [
                            utils.FeatureID_AC, utils.FeatureID_NC,
                            utils.FeatureID_VC
                    ]:
                        if bar0id in node.features:
                            node.features.remove(bar0id)

                    FeatureID = FeatureOntology.GetFeatureID(Action.strip("+"))
                    node.ApplyFeature(FeatureID)
                continue

            if Action[0] == '\'':
                #Make the norm of the token to this key
                node.norm = Action[1:-1]
                continue
            if Action[0] == '%':
                # Make the pnorm of the token to this key
                node.pnorm = Action[1:-1]
                continue
            if Action[0] == '/':
                #Make the atom of the token to this key
                node.atom = Action[1:-1]
                continue
            ActionID = FeatureOntology.GetFeatureID(Action)
            if ActionID != -1:
                node.ApplyFeature(ActionID)
            else:
                logging.warning("Wrong Action to apply:" + Action +
                                " in action string: " + actinstring)

            if Action == "NEUTRAL":
                FeatureOntology.ProcessSentimentTags(node.features)
Ejemplo n.º 14
0
    def FindPointerNode(self, openID, SubtreePointer, rule, CurrentNodeID):
        if logging.root.isEnabledFor(logging.DEBUG):
            logging.debug("Dag.FindPointerNode for {}".format(SubtreePointer))
        if (openID, SubtreePointer, rule.ID) in self.FindPointerNode_Cache:
            #logging.debug("FindPointerNode_Cache: hit!")
            return self.FindPointerNode_Cache[(openID, SubtreePointer,
                                               rule.ID)]

        if len(SubtreePointer) >= 1 and SubtreePointer[0] == '^':
            SubtreePointer = SubtreePointer[1:]

        nodeID = None
        if "+" in SubtreePointer:
            logging.warning(
                "There is + sign in SubtreePointer of FindPointerNode(): {} ".
                format(rule))
        for AndCondition in SubtreePointer.split("+"):
            Negation = False
            if len(AndCondition) > 1 and AndCondition[0] == "!":
                #logging.warning("FindPointerNode: Negation! {}".format(SubtreePointer))
                Negation = True
                AndCondition = AndCondition[1:]

            if "." in AndCondition:
                pointer, relations = AndCondition.split(".", 1)
            else:
                pointer, relations = [AndCondition, ""]
            #pointers = SubtreePointer.split(".")  # Note: here Pointer (subtreepointer) does not have "^"
            #logging.debug("tree:{}".format(pointers))
            # if len(pointers) <=1:
            #     #logging.error("Should have more than 1 pointers! Can't find {} in graph {}".format(SubtreePointer, self.graph))
            #     return openID
            nodeID = None
            if pointer == '':
                nodeID = openID
            elif pointer == '~':
                #logging.warning("THIS POINTER in {}".format(rule))
                nodeID = CurrentNodeID
            else:
                if pointer.isdigit():
                    pointer_num = int(pointer)
                    try:
                        nodeID = rule.Tokens[pointer_num].MatchedNodeID
                    except AttributeError as e:  #AttributeError: 'RuleToken' object has no attribute 'MatchedNodeID'
                        logging.error(e)
                        logging.error(
                            "FindPointerNode: The rule is written error, because the reference token is not yet matched. Please rewrite!"
                        )
                        logging.info(rule)
                        return None
                    except IndexError as e:
                        logging.error(e)
                        logging.error(
                            "FindPointerNode: The rule is written error, failed to find pointer {} IndexError!"
                            .format(pointer_num))
                        logging.info(rule)
                        return None

                else:
                    pointer = "^" + pointer
                    #logging.info("Finding pointer node {} from TempPointer".format(pointer))
                    for nodeid in sorted(self.nodes):
                        #logging.debug("DAG.FindPointerNode: evaluating temppointer {} in {} with pointer {}".format(self.nodes[nodeid].TempPointer, self.nodes[nodeid].text, pointer))
                        if self.nodes[nodeid].TempPointer == pointer:
                            #logging.debug("Matched nodeid {}".format(nodeid))
                            nodeID = nodeid
                            break
                #logging.warning("after looping over the nodes, nodeID={}".format(nodeID))
            if nodeID and relations:
                for relation in relations.split("."):
                    Found = False
                    # if relation == "LEFT":
                    #     nodeID = self.LinearNodeOffset(nodeID, -1)
                    #     if nodeID:
                    #         Found = True
                    # elif relation == "RIGHT":
                    #     nodeID = self.LinearNodeOffset(nodeID, 1)
                    #     if nodeID:
                    #         Found = True
                    # else:
                    relationid = FeatureOntology.GetFeatureID(relation)
                    for edge in sorted(self.graph,
                                       key=operator.itemgetter(2, 1, 0)):
                        #logging.debug("Evaluating edge{} with relation {}, node {}".format(edge, relation, nodeID))
                        if edge[2] == nodeID:
                            if relationid == edge[
                                    3]:  # or relationid in FeatureOntology.SearchFeatureOntology(edge[3]):
                                nodeID = edge[0]
                                Found = True
                                if logging.root.isEnabledFor(logging.DEBUG):
                                    logging.debug("   Found!")
                                break
                            else:
                                edgerelationnode = FeatureOntology.SearchFeatureOntology(
                                    edge[3])
                                if edgerelationnode and relationid in edgerelationnode.ancestors:
                                    nodeID = edge[0]
                                    Found = True
                                    if logging.root.isEnabledFor(
                                            logging.DEBUG):
                                        logging.debug(
                                            "   Found ontology ancesstor relation!"
                                        )
                                    break

                    if not Found:
                        if not Negation:
                            return None
                        #logging.warning("Failed to find pointer {} in graph {}".format(SubtreePointer, self))
                        #return None     #Can't find the pointers.

                    #logging.info("Found this node {} for these pointers:{}".format(nodeID, pointers))

        if nodeID:
            self.FindPointerNode_Cache[(openID, SubtreePointer,
                                        rule.ID)] = nodeID
            return nodeID
        else:
            logging.warning("Can't find {} pointer in this rule{}".format(
                SubtreePointer, rule))
            return None
Ejemplo n.º 15
0
        Tokenize('響著錄中文规则很长 very long , 为啥是不?')


# def LoopTest2(n):
#     for _ in range(n):
#         old_Tokenize_cn('響著錄中文规则很长 very long , 为啥是不?')

if __name__ == "__main__":
    logging.basicConfig(level=logging.DEBUG,
                        format='%(asctime)s [%(levelname)s] %(message)s')

    logging.info("Start")
    # import ProcessSentence
    # ProcessSentence.LoadCommon()  # too heavy to load for debugging

    FeatureOntology.LoadFeatureOntology('../../fsa/Y/feature.txt')
    Lexicon.LoadSegmentLexicon()
    XLocation = '../../fsa/X/'
    Lexicon.LoadExtraReference(XLocation + 'CuobieziX.txt',
                               Lexicon._LexiconCuobieziDict)
    Lexicon.LoadExtraReference(XLocation + 'Fanti.txt',
                               Lexicon._LexiconFantiDict)

    main_x = Tokenize('科普:。,?带你看懂蜀绣冰壶比赛')
    #old_Tokenize_cn('很少有科普:3 minutes 三分钟带你看懂蜀绣冰壶比赛')

    import cProfile, pstats

    cProfile.run("LoopTest1(100)", 'restatslex')
    pstat = pstats.Stats('restatslex')
    pstat.sort_stats('time').print_stats(10)
Ejemplo n.º 16
0
    def ApplyActions(self, actinstring):
        #self.FailedRuleTokens.clear()
        Actions = actinstring.split()
        #logging.debug("Word:" + self.text)

        if "NEW" in Actions:
            self.features = set()
        if "NEUTRAL" in Actions:
            FeatureOntology.ProcessSentimentTags(self.features)

        HasBartagAction = False
        for Action in Actions:
            # if Action == "NEW":
            #     continue  # already process before.
            # if Action == "NEUTRAL":
            #     continue  # already process before.

            if Action[-1] == "-":
                if Action[0] == "^":  #Remove UpperRelationship
                    if "." in Action:
                        if self.UpperRelationship == Action.split(".",
                                                                  1)[1][-1]:
                            # TODO:  actually break the token. not just delattr
                            delattr(self, "UpperRelationship")
                            logging.warning(
                                " TODO:  actually break the token. not just delattr Remove Relationship:"
                                + Action)
                    else:
                        logging.warning("This Action is not right:" + Action)
                    continue

                FeatureID = FeatureOntology.GetFeatureID(Action.strip("-"))
                if FeatureID in self.features:
                    self.features.remove(FeatureID)
                continue

            if Action[-1] == "+":
                if Action[-2] == "+":
                    if Action[-3] == "+":  #"+++"
                        self.ApplyFeature(utils.FeatureID_0)
                        self.sons = []  #remove the sons of this
                        self.Head0Text = ''  #remove Head0Text.

                    else:  #"X++":
                        #this should be in a chunk, only apply to the new node
                        HasBartagAction = True
                        FeatureID = FeatureOntology.GetFeatureID(
                            Action.strip("++"))
                        self.ApplyFeature(FeatureID)
                else:  #"X+"
                    #MajorPOSFeatures = ["A", "N", "P", "R", "RB", "X", "V"]
                    #Feb 20, 2018: use the BarTagIDs[0] as the MajorPOSFeatures.
                    for bar0id in FeatureOntology.BarTagIDs[0]:
                        if bar0id in self.features:
                            self.features.remove(bar0id)

                    for bar0id in [
                            utils.FeatureID_AC, utils.FeatureID_NC,
                            utils.FeatureID_VC
                    ]:
                        if bar0id in self.features:
                            self.features.remove(bar0id)

                    FeatureID = FeatureOntology.GetFeatureID(Action.strip("+"))
                    self.ApplyFeature(FeatureID)
                continue

            if Action[0] == "^":
                if "." in Action:
                    self.UpperRelationship = Action.split(".")[-1]
                    RelationActionID = FeatureOntology.GetFeatureID(
                        self.UpperRelationship)
                    if RelationActionID != -1:
                        self.ApplyFeature(RelationActionID)
                    else:
                        logging.warning("Wrong Relation Action to apply:" +
                                        self.UpperRelationship +
                                        " in action string: " + actinstring)
                    # apply this "has" to the parent (new) node (chunk)
                    # RelationActionID = FeatureOntology.GetFeatureID("has" + self.UpperRelationship)
                    # if RelationActionID != -1:
                    #     self.ApplyFeature(RelationActionID)
                    # else:
                    #     logging.warning("Wrong Relation Action to apply:" + self.UpperRelationship + " in action string: " + actinstring)

                else:
                    logging.error(
                        "The Action is wrong: It does not have dot to link to proper pointer"
                    )
                    logging.error("  actinstring:" + actinstring)
                    self.UpperRelationship = Action[1:]
                continue

            if Action[0] == '\'':
                #Make the norm of the token to this key
                self.norm = Action[1:-1]
                continue
            if Action[0] == '%':
                #Make the pnorm of the token to this key
                self.pnorm = Action[1:-1]
                continue
            if Action[0] == '/':
                #Make the atom of the token to this key
                self.atom = Action[1:-1]
                continue
            ActionID = FeatureOntology.GetFeatureID(Action)
            if ActionID != -1:
                self.ApplyFeature(ActionID)
            else:
                logging.warning("Wrong Action to apply:" + Action +
                                " in action string: " + actinstring)

                # strtokens[StartPosition + i + GoneInStrTokens].features.add(ActionID)
        if HasBartagAction:  #only process bartags if there is new bar tag, or trunking (in the combine() function)
            FeatureOntology.ProcessBarTags(self.features)
Ejemplo n.º 17
0
 def ApplyFeature(self, featureID):
     self.features.add(featureID)
     FeatureNode = FeatureOntology.SearchFeatureOntology(featureID)
     if FeatureNode and FeatureNode.ancestors:
         self.features.update(FeatureNode.ancestors)
Ejemplo n.º 18
0
def OutputStringTokens_onelinerSA(dag):
    output = ""
    sentimentfeature = [
        "Target", "Pro", "Con", "PosEmo", "NegEmo", "Neutral", "Needed", "Key",
        "Value"
    ]
    nodes = dag.nodes
    nodelist = list(nodes.values())
    nodelist.sort(key=lambda x: x.StartOffset)

    output += '{  "nodes": ['
    sentence = ""
    first = True
    for node in sorted(nodes.values(), key=operator.attrgetter("Index")):
        if first:
            first = False
        else:
            output += ", "
        nodeid = node.ID
        text = node.text
        sentence += text
        features = sorted([
            FeatureOntology.GetFeatureName(f) for f in node.features
            if f not in FeatureOntology.NotShowList
        ])
        filteredfeatures = []
        for f in features:
            if f in sentimentfeature:
                filteredfeatures.append(f)
        jsondict = dict()
        jsondict["nodeID"] = nodeid
        jsondict["text"] = text
        jsondict["features"] = filteredfeatures

        output += json.dumps(jsondict,
                             default=lambda o: o.__dict__,
                             sort_keys=True,
                             ensure_ascii=False)
    havekeyvalue = False
    tempoutput = '],  "edges": ['
    keyvalueset = set()
    for edge in sorted(dag.graph, key=operator.itemgetter(2, 0, 1)):
        nID = edge[2]
        n = nodes.get(nID)
        feats = sorted([
            FeatureOntology.GetFeatureName(f) for f in n.features
            if f not in FeatureOntology.NotShowList
        ])
        if "Key" in feats:
            valueID = edge[0]
            valuenode = nodes.get(valueID)
            valuefeats = sorted([
                FeatureOntology.GetFeatureName(f) for f in valuenode.features
                if f not in FeatureOntology.NotShowList
            ])
            if "Value" in valuefeats:
                if not str(edge[2]) + "\t" + str(edge[0]) in keyvalueset:
                    if not havekeyvalue:
                        tempoutput += '{{ "key":{}, "value":{}}}'.format(
                            edge[2], edge[0])
                        havekeyvalue = True
                    else:
                        tempoutput += ", "
                        tempoutput += '{{ "key":{}, "value":{}}}'.format(
                            edge[2], edge[0])
                    keyvalueset.add(str(edge[2]) + "\t" + str(edge[0]))
        elif "Value" in feats:
            keyID = edge[0]
            keynode = nodes.get(keyID)
            keyfeats = sorted([
                FeatureOntology.GetFeatureName(f) for f in keynode.features
                if f not in FeatureOntology.NotShowList
            ])
            if "Key" in keyfeats:
                if not str(edge[0]) + "\t" + str(edge[2]) in keyvalueset:
                    if not havekeyvalue:
                        tempoutput += '{{ "key":{}, "value":{}}}'.format(
                            edge[0], edge[2])
                        havekeyvalue = True
                    else:
                        tempoutput += ", "
                        tempoutput += '{{ "key":{}, "value":{}}}'.format(
                            edge[0], edge[2])
                    keyvalueset.add(str(edge[0]) + "\t" + str(edge[2]))

    if not tempoutput == '],  "edges": [':
        output += tempoutput
    output += '],  "sentence": "' + sentence + '"}'

    # for node in nodelist:
    #     output += node.text + "/"
    #     featureString = node.GetFeatures()
    #     featureSet = featureString.split(",")
    #     # print (featureSet)
    #     if TargetFeature in featureSet:
    #         output +=  TargetFeature + " "
    #     if ProFeature in featureSet:
    #         output +=  ProFeature+ " "
    #     if ConFeature in featureSet:
    #         output += ConFeature+ " "
    #     if PosEmo in featureSet:
    #         output +=  PosEmo+ " "
    #     if NegEmo in featureSet:
    #         output +=  NegEmo+ " "
    #     if Needed in featureSet:
    #         output += Needed+ " "
    #     if Neutral in featureSet:
    #         output += Neutral+ " "
    #     if Key in featureSet:
    #         output +=  Key+ " "
    #     if Value in featureSet:
    #         output +=  Value + " "
    #     if output.endswith("/"):
    #         output = output[:-1]
    #     if not output.endswith(" "):
    #         output += " "
    return output
Ejemplo n.º 19
0
def GetFeatureName(FeatureID):
    return jsonpickle.encode(FeatureOntology.GetFeatureName(int(FeatureID)))
Ejemplo n.º 20
0
def InitGlobalFeatureID():
    global FeatureID_JS, FeatureID_JS2, FeatureID_JM2, FeatureID_JM, FeatureID_0
    global FeatureID_CD, FeatureID_punc, FeatureID_SYM, FeatureID_NNP, FeatureID_External
    global FeatureID_OOV, FeatureID_CM, FeatureID_NEW, FeatureID_SpaceQ, FeatureID_SpaceH, FeatureID_FULLSTRING
    global FeatureID_VB, FeatureID_Ved, FeatureID_Ving
    global FeatureID_H, FeatureID_Subj, FeatureID_Obj, FeatureID_Pred
    global FeatureID_AC, FeatureID_NC, FeatureID_VC, FeatureID_comPair
    global FeatureID_HIT, FeatureID_HIT2, FeatureID_HIT3
    if not FeatureID_JS2:
        import FeatureOntology
        FeatureID_JS = FeatureOntology.GetFeatureID("JS")
        FeatureID_JS2 = FeatureOntology.GetFeatureID("JS2")
        FeatureID_JM2 = FeatureOntology.GetFeatureID("JM2")
        FeatureID_JM = FeatureOntology.GetFeatureID("JM")
        FeatureID_0 = FeatureOntology.GetFeatureID("0")
        FeatureID_CD = FeatureOntology.GetFeatureID("CD")
        FeatureID_punc = FeatureOntology.GetFeatureID("punc")
        FeatureID_SYM = FeatureOntology.GetFeatureID("SYM")
        FeatureID_NNP = FeatureOntology.GetFeatureID("NNP")
        FeatureID_External = FeatureOntology.GetFeatureID("External")
        FeatureID_OOV = FeatureOntology.GetFeatureID("OOV")
        FeatureID_CM = FeatureOntology.GetFeatureID("CM")
        FeatureID_NEW = FeatureOntology.GetFeatureID("NEW")
        FeatureID_SpaceQ = FeatureOntology.GetFeatureID("spaceQ")
        FeatureID_SpaceH = FeatureOntology.GetFeatureID("spaceH")
        FeatureID_FULLSTRING = FeatureOntology.GetFeatureID("FULLSTRING")
        FeatureID_VB = FeatureOntology.GetFeatureID("VB")
        FeatureID_Ved = FeatureOntology.GetFeatureID("Ved")
        FeatureID_Ving = FeatureOntology.GetFeatureID("Ving")

        FeatureID_H = FeatureOntology.GetFeatureID("H")
        FeatureID_Subj = FeatureOntology.GetFeatureID("Subj")
        FeatureID_Obj = FeatureOntology.GetFeatureID("Obj")
        FeatureID_Pred = FeatureOntology.GetFeatureID("Pred")

        FeatureID_AC = FeatureOntology.GetFeatureID("AC")
        FeatureID_NC = FeatureOntology.GetFeatureID("NC")
        FeatureID_VC = FeatureOntology.GetFeatureID("VC")

        FeatureID_HIT = FeatureOntology.GetFeatureID("HIT")
        FeatureID_HIT2 = FeatureOntology.GetFeatureID("HIT2")
        FeatureID_HIT3 = FeatureOntology.GetFeatureID("HIT3")

        FeatureID_comPair = FeatureOntology.GetFeatureID("comPair")

        FeatureOntology.BarTagIDs = [[
            FeatureOntology.GetFeatureID(t) for t in row
        ] for row in FeatureOntology.BarTags]
        for IDList in FeatureOntology.BarTagIDs:
            FeatureOntology.BarTagIDSet.update(set(IDList))
        FeatureOntology.SentimentTagIDSet = [
            FeatureOntology.GetFeatureID(t)
            for t in FeatureOntology.SentimentTags
        ]
        FeatureOntology.SentimentTagIDSet = set(
            FeatureOntology.SentimentTagIDSet)
Ejemplo n.º 21
0
def GetFeatureID(word):
    return jsonpickle.encode(FeatureOntology.GetFeatureID(word))