Ejemplo n.º 1
0
    def CleanOutput(self, KeepOriginFeature=False):
        a = JsonClass()
        a.ID = self.ID
        a.text = self.text
        if self.norm != self.text:
            a.norm = self.norm
        if self.pnorm:
            a.pnorm = self.pnorm
        if self.iepair:
            a.iepair = self.iepair
        if self.atom != self.text:
            a.atom = self.atom
        a.features = sorted([
            FeatureOntology.GetFeatureName(f) for f in self.features
            if f not in FeatureOntology.NotShowList
        ])

        if KeepOriginFeature:
            a.features = sorted(
                [FeatureOntology.GetFeatureName(f) for f in self.features])
            if self.Head0Text:
                a.Head0Text = self.Head0Text

        a.StartOffset = self.StartOffset
        a.EndOffset = self.EndOffset
        if self.UpperRelationship:
            a.UpperRelationship = self.UpperRelationship

        if self.sons \
                and utils.FeatureID_0 not in self.features:  #not to export lower than 0
            a.sons = [s.CleanOutput(KeepOriginFeature) for s in self.sons]

        return a
Ejemplo n.º 2
0
    def CleanOutput_FeatureLeave(self):
        a = JsonClass()
        a.text = self.text
        if self.norm != self.text:
            a.norm = self.norm
        if self.pnorm:
            a.pnorm = self.pnorm
        if self.iepair:
            a.iepair = self.iepair
        if self.atom != self.text:
            a.atom = self.atom
        features = [
            FeatureOntology.GetFeatureName(f)
            for f in Lexicon.CopyFeatureLeaves(self.features)
            if f not in FeatureOntology.NotShowList
        ]
        for f in features:
            # if isinstance(f, int):
            #     f = "L" + str(f)
            setattr(a, f, '')
        a.StartOffset = self.StartOffset
        a.EndOffset = self.EndOffset
        if self.UpperRelationship:
            a.UpperRelationship = self.UpperRelationship
        if self.sons \
                and utils.FeatureID_0 not in self.features:
            a.sons = [s.CleanOutput_FeatureLeave() for s in self.sons]

        #logging.info("in featureleave" + str(self) + "f:" + str(features))
        return a
Ejemplo n.º 3
0
 def GetFeatures(self):
     featureList = []
     for feature in self.features:
         if feature in FeatureOntology.NotShowList:
             continue
         f = FeatureOntology.GetFeatureName(feature)
         if f:
             featureList.append(f)
         else:
             logging.warning("Can't get feature name of " + self.text +
                             " for id " + str(feature))
     return ",".join(sorted(featureList))
Ejemplo n.º 4
0
    def CleanOutput_Propagate(self, propogate_features=None):
        Features_ToPropogate = {
            utils.FeatureID_Subj, utils.FeatureID_Obj, utils.FeatureID_Pred
        }
        propogate_f = Features_ToPropogate.intersection(self.features)

        a = JsonClass()
        a.text = self.text
        if self.norm != self.text:
            a.norm = self.norm
        if self.pnorm:
            a.pnorm = self.pnorm
        if self.iepair:
            a.iepair = self.iepair
        if self.atom != self.text:
            a.atom = self.atom
        a.features = [
            FeatureOntology.GetFeatureName(f) for f in self.features
            if f not in FeatureOntology.NotShowList
        ]

        if utils.FeatureID_H in self.features and propogate_features:
            #logging.info("\t\tApplying " + str(propogate_features) + " to " + str(self))
            a.features.extend([
                FeatureOntology.GetFeatureName(f) for f in propogate_features
            ])
            propogate_f.update(propogate_features)

        a.StartOffset = self.StartOffset
        a.EndOffset = self.EndOffset
        if self.UpperRelationship:
            a.UpperRelationship = self.UpperRelationship

        if self.sons \
                and utils.FeatureID_0 not in self.features:
            a.sons = [s.CleanOutput_Propagate(propogate_f) for s in self.sons]

        return a
Ejemplo n.º 5
0
 def should_merge(self):
     feature_names = [
         FeatureOntology.GetFeatureName(f) for f in self.features
     ]
     text_len = len(self.text.replace(' ', ''))
     logging.info('feature_names:' + str(feature_names) + ' node len:' +
                  str(len(self.text.replace(' ', ''))))
     if utils.has_overlap(feature_names, FeatureOntology.MergeTokenList) > 0 \
             and 2 <= text_len <= 5:
         return True
     elif ('mn' in feature_names
           or 'NP' in feature_names) and 2 <= text_len <= 4:
         return True
     return False
Ejemplo n.º 6
0
 def get_chunk_label(self):
     feature_names = [
         FeatureOntology.GetFeatureName(f) for f in self.features
         if f not in FeatureOntology.NotShowList
     ]
     BarFeature = utils.LastItemIn2DArray(feature_names,
                                          FeatureOntology.BarTags)
     if BarFeature:
         if self.UpperRelationship == SYM_PAIR_HEAD[0]:
             return SYM_PAIR_HEAD[1] + BarFeature + ' '
         elif self.UpperRelationship:
             return self.UpperRelationship + SYM_HYPHEN + BarFeature + ' '
         else:
             return BarFeature + ' '
     return ''
Ejemplo n.º 7
0
 def get_leaf_label(self):
     if not self.text:  # deal with empty node
         return ''
     ret = ''
     feature_names = [FeatureOntology.GetFeatureName(f) \
             for f in self.features if f not in FeatureOntology.NotShowList]
     BarFeature = utils.LastItemIn2DArray(feature_names,
                                          FeatureOntology.BarTags)
     if not self.UpperRelationship and BarFeature:  # syntactic role is empty
         ret = BarFeature + "/"
     elif self.UpperRelationship == SYM_PAIR_HEAD[
             0] and BarFeature:  # syntactic role is HEAD
         ret = SYM_PAIR_HEAD[1] + BarFeature + "/"
     elif self.UpperRelationship == SYM_PAIR_HEAD[0] and not BarFeature:
         ret = SYM_PAIR_HEAD[1]
     elif self.UpperRelationship != SYM_PAIR_HEAD[
             0] and BarFeature:  # syntactic role is not HEAD
         ret = self.UpperRelationship + "/"  # SYM_HYPHEN + BarFeature # Wei note: POS left out.  For leaf label POS, we do not really care in tracking as long as Relationship is right.
     elif self.UpperRelationship != SYM_PAIR_HEAD[0] and not BarFeature:
         ret = self.UpperRelationship
     return ret
Ejemplo n.º 8
0
    def _AddEdge(self, node1id, relation, parentid):
        #find the write relation to add, if already have a child relation for the same nodes.
        self.graph.add((node1id, relation, parentid,
                        FeatureOntology.GetFeatureID(relation)))
        # Use ontology to find the ancestors of the relation
        relationid = FeatureOntology.GetFeatureID(relation)
        if FeatureOntology.SearchFeatureOntology(relationid):
            for ancestor in FeatureOntology.SearchFeatureOntology(
                    relationid).ancestors:
                ancestorname = FeatureOntology.GetFeatureName(ancestor)
                if (node1id, ancestorname, parentid, ancestor) in self.graph:
                    self.graph.remove(
                        (node1id, ancestorname, parentid, ancestor))

        #Set the parent to have the relation.
        hasFeatureID = FeatureOntology.GetFeatureID("has" + relation)
        if hasFeatureID >= 0:
            self.nodes[parentid].ApplyFeature(hasFeatureID)
        else:
            logging.error(
                "There is no has{} feature in the feature.txt!".format(
                    relation))
Ejemplo n.º 9
0
def OutputStringTokens_onelinerSA(dag):
    output = ""
    sentimentfeature = [
        "Target", "Pro", "Con", "PosEmo", "NegEmo", "Neutral", "Needed", "Key",
        "Value"
    ]
    nodes = dag.nodes
    nodelist = list(nodes.values())
    nodelist.sort(key=lambda x: x.StartOffset)

    output += '{  "nodes": ['
    sentence = ""
    first = True
    for node in sorted(nodes.values(), key=operator.attrgetter("Index")):
        if first:
            first = False
        else:
            output += ", "
        nodeid = node.ID
        text = node.text
        sentence += text
        features = sorted([
            FeatureOntology.GetFeatureName(f) for f in node.features
            if f not in FeatureOntology.NotShowList
        ])
        filteredfeatures = []
        for f in features:
            if f in sentimentfeature:
                filteredfeatures.append(f)
        jsondict = dict()
        jsondict["nodeID"] = nodeid
        jsondict["text"] = text
        jsondict["features"] = filteredfeatures

        output += json.dumps(jsondict,
                             default=lambda o: o.__dict__,
                             sort_keys=True,
                             ensure_ascii=False)
    havekeyvalue = False
    tempoutput = '],  "edges": ['
    keyvalueset = set()
    for edge in sorted(dag.graph, key=operator.itemgetter(2, 0, 1)):
        nID = edge[2]
        n = nodes.get(nID)
        feats = sorted([
            FeatureOntology.GetFeatureName(f) for f in n.features
            if f not in FeatureOntology.NotShowList
        ])
        if "Key" in feats:
            valueID = edge[0]
            valuenode = nodes.get(valueID)
            valuefeats = sorted([
                FeatureOntology.GetFeatureName(f) for f in valuenode.features
                if f not in FeatureOntology.NotShowList
            ])
            if "Value" in valuefeats:
                if not str(edge[2]) + "\t" + str(edge[0]) in keyvalueset:
                    if not havekeyvalue:
                        tempoutput += '{{ "key":{}, "value":{}}}'.format(
                            edge[2], edge[0])
                        havekeyvalue = True
                    else:
                        tempoutput += ", "
                        tempoutput += '{{ "key":{}, "value":{}}}'.format(
                            edge[2], edge[0])
                    keyvalueset.add(str(edge[2]) + "\t" + str(edge[0]))
        elif "Value" in feats:
            keyID = edge[0]
            keynode = nodes.get(keyID)
            keyfeats = sorted([
                FeatureOntology.GetFeatureName(f) for f in keynode.features
                if f not in FeatureOntology.NotShowList
            ])
            if "Key" in keyfeats:
                if not str(edge[0]) + "\t" + str(edge[2]) in keyvalueset:
                    if not havekeyvalue:
                        tempoutput += '{{ "key":{}, "value":{}}}'.format(
                            edge[0], edge[2])
                        havekeyvalue = True
                    else:
                        tempoutput += ", "
                        tempoutput += '{{ "key":{}, "value":{}}}'.format(
                            edge[0], edge[2])
                    keyvalueset.add(str(edge[0]) + "\t" + str(edge[2]))

    if not tempoutput == '],  "edges": [':
        output += tempoutput
    output += '],  "sentence": "' + sentence + '"}'

    # for node in nodelist:
    #     output += node.text + "/"
    #     featureString = node.GetFeatures()
    #     featureSet = featureString.split(",")
    #     # print (featureSet)
    #     if TargetFeature in featureSet:
    #         output +=  TargetFeature + " "
    #     if ProFeature in featureSet:
    #         output +=  ProFeature+ " "
    #     if ConFeature in featureSet:
    #         output += ConFeature+ " "
    #     if PosEmo in featureSet:
    #         output +=  PosEmo+ " "
    #     if NegEmo in featureSet:
    #         output +=  NegEmo+ " "
    #     if Needed in featureSet:
    #         output += Needed+ " "
    #     if Neutral in featureSet:
    #         output += Neutral+ " "
    #     if Key in featureSet:
    #         output +=  Key+ " "
    #     if Value in featureSet:
    #         output +=  Value + " "
    #     if output.endswith("/"):
    #         output = output[:-1]
    #     if not output.endswith(" "):
    #         output += " "
    return output
Ejemplo n.º 10
0
def OutputStringTokens_onelinerSA_ben(dag):
    sentimentfeature = [
        "Target", "Pro", "Con", "PosEmo", "NegEmo", "Neutral", "Needed", "Key",
        "Value"
    ]
    sentimentfeatureids = [
        FeatureOntology.GetFeatureID(f) for f in sentimentfeature
    ]
    sentimentfeatureidset = set(sentimentfeatureids)
    nodes = dag.nodes
    nodelist = list(nodes.values())
    nodelist.sort(key=lambda x: x.StartOffset)
    FeatureID_Key = FeatureOntology.GetFeatureID("Key")
    FeatureID_Value = FeatureOntology.GetFeatureID("Value")
    outputdict = []
    for edge in sorted(dag.graph, key=operator.itemgetter(2, 0, 1)):
        node1 = nodes.get(edge[2])
        node2 = nodes.get(edge[0])
        sentimentnode = {}

        if FeatureID_Key in node1.features and FeatureID_Value in node2.features:
            sentimentnode["keyid"] = node1.ID
            sentimentnode["key"] = node1.text
            sentimentnode["keyfeatures"] = [
                FeatureOntology.GetFeatureName(f) for f in node1.features
                if f in sentimentfeatureids
            ]
            sentimentnode["valuyeid"] = node2.ID
            sentimentnode["value"] = node2.text
            sentimentnode["valuefeatures"] = [
                FeatureOntology.GetFeatureName(f) for f in node2.features
                if f in sentimentfeatureids
            ]
            outputdict.append(sentimentnode)

        if FeatureID_Key in node2.features and FeatureID_Value in node1.features:
            sentimentnode["keyid"] = node2.ID
            sentimentnode["key"] = node2.text
            sentimentnode["keyfeatures"] = [
                FeatureOntology.GetFeatureName(f) for f in node2.features
                if f in sentimentfeatureids
            ]
            sentimentnode["valueid"] = node1.ID
            sentimentnode["value"] = node1.text
            sentimentnode["valuefeatures"] = [
                FeatureOntology.GetFeatureName(f) for f in node1.features
                if f in sentimentfeatureids
            ]
            outputdict.append(sentimentnode)

    for nid in dag.nodes:
        node = dag.nodes[nid]
        if sentimentfeatureidset.intersection(node.features):
            Existed = False
            for snode in outputdict:
                if snode["keyid"] == node.ID or snode["valueid"] == node.ID:
                    Existed = True
                    break
            if not Existed:
                sentimentnode = {}
                sentimentnode["keyid"] = -1
                sentimentnode["key"] = "_Emo"
                sentimentnode["keyfeatures"] = []
                sentimentnode["valueid"] = node.ID
                sentimentnode["value"] = node.text
                sentimentnode["valuefeatures"] = [
                    FeatureOntology.GetFeatureName(f) for f in node.features
                    if f in sentimentfeatureids
                ]
                outputdict.append(sentimentnode)

    return json.dumps(outputdict,
                      default=lambda o: o.__dict__,
                      sort_keys=True,
                      ensure_ascii=False)
Ejemplo n.º 11
0
def GetFeatureName(FeatureID):
    return jsonpickle.encode(FeatureOntology.GetFeatureName(int(FeatureID)))