예제 #1
0
    def diff(self, goldInstance=NLPInstance, guessInstance=NLPInstance):
        diff = NLPInstance()
        diff.renderType = goldInstance.renderType
        for splitPoint in tuple(goldInstance.splitPoints):
            diff.splitPoints.append(splitPoint)
        diff.addTokens(goldInstance.tokens)
        goldIdentities = set()
        goldIdentities.update(self.createIdentities(goldInstance.getEdges()))
        guessIdentities = set()
        guessIdentities.update(self.createIdentities(guessInstance.getEdges()))
        fn = set()
        fn = goldIdentities - guessIdentities
        fp = set()
        fp =  guessIdentities - goldIdentities
        matches = set()
        matches = goldIdentities & guessIdentities
        for edgeid in fn:
            edge = edgeid.edge
            Type = edge.type +":FN"
            diff.addEdge(edge=Edge(From=edge.From, To=edge.To, label=edge.label, note=edge.note, Type=Type,
                                   renderType=edge.renderType, description=edge.description))
        for edgeid in fp:
            edge = edgeid.edge
            Type = edge.type +":FP"
            diff.addEdge(edge=Edge(From=edge.From, To=edge.To, label=edge.label, note=edge.note, Type=Type,
                                   renderType=edge.renderType, description=edge.description))

        for edgeid in matches:
            edge = edgeid.edge
            Type = edge.type +":Match"
            diff.addEdge(edge=Edge(From=edge.From, To=edge.To, label=edge.label, note=edge.note, Type=Type,
                                   renderType=edge.renderType, description=edge.description))
        return diff
예제 #2
0
    def create(rows):
        instance = NLPInstance()
        sentence = rows[0]
        # Skip <s> and dep count
        for i in range(2, len(sentence)):
            w_t_c = sentence[i].split("|")  # In Python this is not regex
            instance.addToken().\
                addProperty(name="Word", value=w_t_c[0]).\
                addProperty(name="Tag", value=w_t_c[1]).\
                addProperty(name="Category", value=w_t_c[2]).\
                addProperty(name="Index", value=str(i-1))
        # instance.addToken().addProperty("Word", "-Root-")

        mod = 1
        for row in rows:
            row = row.strip().split()
            if row[0] != "<s>" and not re.match("<\\s>$", row[0]):
                # dependency
                try:
                    instance.addEdge(From=int(row[1]), to=int(row[0]), label=row[2] + "_" + row[3], type="dep")
                except:  # XXX TRACK DOWN POSSIBLE EXCEPTION TYPES!
                    print("Can't parse dependency", file=sys.stderr)
                    instance.tokens[mod].addProperty("DepMissing", "missing")
                mod += 1
        return instance
예제 #3
0
    def create(rows):
        instance = NLPInstance()
        index = 0
        for row in rows:
            row = row.strip().split()
            instance.addToken().\
                addProperty(name="Word", value=row[0]).\
                addProperty(name="Index", value=str(index))

            instance.addSpan(index, index, row[1], "pos")
            instance.addSpan(index, index, row[2], "chunk (BIO")
            instance.addSpan(index, index, row[3], "ner (BIO)")
            index += 1

        tabformat = TabFormat(object)  # TODO: object = MainWindow?
        tabformat.extractSpan03(rows=rows,
                                column=2,
                                type="chunk",
                                instance=instance)
        tabformat.extractSpan03(rows=rows,
                                column=3,
                                type="ner",
                                instance=instance)

        return instance
예제 #4
0
 def create(rows):
     instance = NLPInstance()
     instance.addToken().addProperty("Word", "-Root-")
     for row in rows:
         row = row.strip().split()
         instance.addToken().\
             addProperty(name="Word", value=row[1]).\
             addProperty(name="Index", value=str(row[0])).\
             addProperty(name="Lemma", value=row[2]).\
             addProperty(name="CPos", value=row[3]).\
             addProperty(name="Pos", value=row[4]).\
             addProperty(name="Feats", value=row[5])
     for row in rows:
         row = row.strip().split()
         # dependency
         mod = int(row[0])
         try:
             instance.addDependency(From=str(row[6]),
                                    to=str(mod),
                                    label=row[7],
                                    type="dep")
         except:  # XXX TRACK DOWN POSSIBLE EXCEPTION TYPES!
             print("Can't parse dependency", file=sys.stderr)
             instance.tokens[mod].addProperty("DepMissing", "missing")
         # role
     return instance
예제 #5
0
 def create(rows):
     instance = NLPInstance()
     instance.addToken().addProperty(name="Word", value="-Root-")
     index = 1
     for row in rows:
         row = row.strip().split()
         instance.addToken().\
             addProperty(name="Word", value=row[0]).\
             addProperty(name="Index", value=str(index)).\
             addProperty(name="Pos", value=row[1])
         index += 1
     mod = 1
     for row in rows:
         row = row.strip().split()
         # dependency
         try:
             instance.addDependency(From=row[2],
                                    to=str(mod),
                                    label=row[3],
                                    type="dep")
         except:  # XXX TRACK DOWN POSSIBLE EXCEPTION TYPES!
             print("Can't parse dependency", file=sys.stderr)
             instance.tokens[mod].addProperty("DepMissing", "missing")
         # role
         mod += 1
     return instance
예제 #6
0
    def create(self, rows):
        instance = NLPInstance()
        index = 0
        for row in rows:
            if row == "\n":
                continue
            row = row.split()
            instance.addToken().\
                addProperty(property = "Word", value = row[0]).\
                addProperty(property = "Index", value = str(index))
            instance.addSpan(index, index, row[1], "ner (BIO)")
            index += 1

        # TODO: TabFormat.extractSpan00(rows, 1, "ner", instance)

        return instance
예제 #7
0
    def create(rows):

        instance = NLPInstance()
        index = 0
        for row in rows:
            row = row.strip().split()
            instance.addToken().\
                addProperty(name="Word", value=row[0]).\
                addProperty(name="Index", value=str(index))
            instance.addSpan(index, index, row[1], "ner (BIO)")
            index += 1

        tabformat = TabFormat(object)  # TODO: object = MainWindow?
        tabformat.extractSpan00(rows=rows, column=1, type="ner", instance=instance)

        return instance
예제 #8
0
 def create(self, rows):
     instance = NLPInstance()
     instance.addToken().addProperty("Word", "-Root-")
     for row in rows:
         if row == "\n":
             continue
         row = row.split()
         instance.addToken().\
             addProperty(property = "Word", value = row[1]).\
             addProperty(property = "Index", value = row[0]).\
             addProperty(property = "Lemma", value = row[2]).\
             addProperty(property = "CPos", value = row[3]).\
             addProperty(property = "Pos", value = row[4]).\
             addProperty(property = "Feats", value = row[5])
     for row in rows:
         if row == "\n":
             continue
         row = row.split()
         # dependency
         mod = row[0]
         try:
             instance.addEdge(From = row[6], to = mod, label = row[7], type ="dep")
         except:
             print("Can't parse dependency")
             instance.tokens[mod].addProperty("DepMissing", "missing")
         # role
     return instance
예제 #9
0
 def create(self, rows):
     instance = NLPInstance()
     instance.addToken().addProperty("Word", "-Root-")
     index = 1
     for row in rows:
         if row == "\n":
             continue
         row = row.split()
         instance.addToken().\
             addProperty(property = "Word", value = row[0]).\
             addProperty(property="Index", value=str(index)).\
             addProperty(property="Pos", value=row[1])
         index += 1
     mod = 1
     for row in rows:
         if row == "\n":
             continue
         row = row.split()
         # dependency
         try:
             instance.addEdge(From = row[2], to = str(mod), label = row[3], type = "dep")
         except:
             print("Can't parse dependency")
             instance.tokens[mod].addProperty("DepMissing", "missing")
         # role
         mod += 1
     return instance
예제 #10
0
    def create(rows):
        instance = NLPInstance()
        sentence = rows[0]
        # Skip <s> and dep count
        for i in range(2, len(sentence)):
            w_t_c = sentence[i].split("|")  # In Python this is not regex
            instance.addToken().\
                addProperty(name="Word", value=w_t_c[0]).\
                addProperty(name="Tag", value=w_t_c[1]).\
                addProperty(name="Category", value=w_t_c[2]).\
                addProperty(name="Index", value=str(i-1))
        # instance.addToken().addProperty("Word", "-Root-")

        mod = 1
        for row in rows:
            row = row.strip().split()
            if row[0] != "<s>" and not re.match("<\\s>$", row[0]):
                # dependency
                try:
                    instance.addEdge(From=int(row[1]),
                                     to=int(row[0]),
                                     label=row[2] + "_" + row[3],
                                     type="dep")
                except:  # XXX TRACK DOWN POSSIBLE EXCEPTION TYPES!
                    print("Can't parse dependency", file=sys.stderr)
                    instance.tokens[mod].addProperty("DepMissing", "missing")
                mod += 1
        return instance
예제 #11
0
    def create(rows):
        instance = NLPInstance()
        index = 0
        for row in rows:
            row = row.strip().split()
            instance.addToken().\
                addProperty(name="Word", value=row[0]).\
                addProperty(name="Index", value=str(index))
            index += 1
        predicateCount = 0
        index = 0
        for row in rows:
            row = row.strip().split()
            if row[9] != "-":  # TODO: nincs 9 szó ebben?
                sense = row[10] + "." + row[9]
                instance.addSpan(index, index, sense, "sense")

                tabformat = TabFormat(object)  # TODO: object = MainWindow?
                tabformat.extractSpan05(rows, 11+predicateCount, "role", sense+":", instance)

                predicateCount += 1
            index += 1
        return instance
예제 #12
0
 def create(self, rows):
     instance = NLPInstance()
     index = 0
     for row in rows:
         if row == "\n":
             continue
         row = row.split()
         instance.addToken().\
             addProperty(property = "Word", value = row[0]).\
             addProperty(property = "Index", value = str(index))
         index += 1
     predicateCount = 0
     index = 0
     for row in rows:
         if row == "\n":
             continue
         row = row.split()
         if row[9] != "-": # TODO: nincs 9 szó ebben?
             sense = row[10] + "." + row[9]
             instance.addSpan(index, index, sense, "sense")
             # TODO:  TabFormat.extractSpan05(ros, 11 + predicateCount, "role", sense + ":", instance
             predicateCount += 1
         index += 1
     return instance
예제 #13
0
 def create(self, rows):
     instance = NLPInstance()
     index = 0
     for row in rows:
         if row == "\n":
             continue
         row = row.split()
         chunk = row[2]
         instance.addToken().\
             addProperty(property = "Word", value = row[0]).\
             addProperty(property = "Index", value = str(index))
         instance.addSpan(index, index, row[1], "pos")
         instance.addSpan(index, index, chunk, "Chunk (BIO)")
         index += 1
     return instance
예제 #14
0
 def createOpen(self, rows):
     instance = NLPInstance()
     instance.addToken()
     for row in rows:
         row = row.strip().split()
         instance.addToken().\
             addProperty(name=self.ne, value=row[0]).\
             addProperty(name=self.bbn, value=row[1]).\
             addProperty(name=self.wn, value=row[2])
     index = 1
     for row in rows:
         row = row.strip().split()
         # dependency
         instance.addEdge(From=int(row[3]),
                          to=index,
                          label=row[4],
                          type="malt")
         index += 1
     return index
예제 #15
0
 def createOpen(self, rows):
     instance = NLPInstance()
     instance.addToken()
     for row in rows:
         row = row.strip().split()
         instance.addToken().\
             addProperty(name=self.ne, value=row[0]).\
             addProperty(name=self.bbn, value=row[1]).\
             addProperty(name=self.wn, value=row[2])
     index = 1
     for row in rows:
         row = row.strip().split()
         # dependency
         instance.addEdge(From=int(row[3]), to=index, label=row[4], type="malt")
         index += 1
     return index
예제 #16
0
 def createOpen(self, rows):
     instance = NLPInstance()
     instance.addToken()
     for row in rows:
         if row == "\n":
             continue
         row = row.split()
         instance.addToken().\
             addProperty(property=self.ne, value=row[0]).\
             addProperty(property=self.bbn, value=row[1]).\
             addProperty(property=self.wn, value= row[2])
     index = 1
     for row in rows:
         if row == "\n":
             continue
         row = row.split()
         #dependency
         instance.addEdge(From = row[3], to = index, label = row[4], type = "malt")
         index += 1
     return index
예제 #17
0
 def create(rows):
     instance = NLPInstance()
     instance.addToken().addProperty("Word", "-Root-")
     for row in rows:
         row = row.strip().split()
         instance.addToken().\
             addProperty(name="Word", value=row[1]).\
             addProperty(name="Index", value=str(row[0])).\
             addProperty(name="Lemma", value=row[2]).\
             addProperty(name="CPos", value=row[3]).\
             addProperty(name="Pos", value=row[4]).\
             addProperty(name="Feats", value=row[5])
     for row in rows:
         row = row.strip().split()
         # dependency
         mod = int(row[0])
         try:
             instance.addDependency(From=str(row[6]), to=str(mod), label=row[7], type="dep")
         except:  # XXX TRACK DOWN POSSIBLE EXCEPTION TYPES!
             print("Can't parse dependency", file=sys.stderr)
             instance.tokens[mod].addProperty("DepMissing", "missing")
         # role
     return instance
예제 #18
0
 def create(rows):
     instance = NLPInstance()
     instance.addToken().addProperty(name="Word", value="-Root-")
     index = 1
     for row in rows:
         row = row.strip().split()
         instance.addToken().\
             addProperty(name="Word", value=row[0]).\
             addProperty(name="Index", value=str(index)).\
             addProperty(name="Pos", value=row[1])
         index += 1
     mod = 1
     for row in rows:
         row = row.strip().split()
         # dependency
         try:
             instance.addDependency(From=row[2], to=str(mod), label=row[3], type="dep")
         except:  # XXX TRACK DOWN POSSIBLE EXCEPTION TYPES!
             print("Can't parse dependency", file=sys.stderr)
             instance.tokens[mod].addProperty("DepMissing", "missing")
         # role
         mod += 1
     return instance
예제 #19
0
    def create(rows):
        instance = NLPInstance()
        index = 0
        for row in rows:
            row = row.strip().split()
            instance.addToken().\
                addProperty(name="Word", value=row[0]).\
                addProperty(name="Index", value=str(index))
            index += 1
        predicateCount = 0
        index = 0
        for row in rows:
            row = row.strip().split()
            if row[9] != "-":  # TODO: nincs 9 szó ebben?
                sense = row[10] + "." + row[9]
                instance.addSpan(index, index, sense, "sense")

                tabformat = TabFormat(object)  # TODO: object = MainWindow?
                tabformat.extractSpan05(rows, 11 + predicateCount, "role",
                                        sense + ":", instance)

                predicateCount += 1
            index += 1
        return instance
예제 #20
0
    def diff(self, goldInstance=NLPInstance, guessInstance=NLPInstance):
        diff = NLPInstance()
        diff.renderType = goldInstance.renderType
        for splitPoint in tuple(goldInstance.splitPoints):
            diff.splitPoints.append(splitPoint)
        diff.addTokens(goldInstance.tokens)
        goldIdentities = set()
        goldIdentities.update(self.createIdentities(goldInstance.getEdges()))
        guessIdentities = set()
        guessIdentities.update(self.createIdentities(guessInstance.getEdges()))
        fn = set()
        fn = goldIdentities - guessIdentities
        fp = set()
        fp = guessIdentities - goldIdentities
        matches = set()
        matches = goldIdentities & guessIdentities
        for edgeid in fn:
            edge = edgeid.edge
            Type = edge.type + ":FN"
            diff.addEdge(edge=Edge(From=edge.From,
                                   To=edge.To,
                                   label=edge.label,
                                   note=edge.note,
                                   Type=Type,
                                   renderType=edge.renderType,
                                   description=edge.description))
        for edgeid in fp:
            edge = edgeid.edge
            Type = edge.type + ":FP"
            diff.addEdge(edge=Edge(From=edge.From,
                                   To=edge.To,
                                   label=edge.label,
                                   note=edge.note,
                                   Type=Type,
                                   renderType=edge.renderType,
                                   description=edge.description))

        for edgeid in matches:
            edge = edgeid.edge
            Type = edge.type + ":Match"
            diff.addEdge(edge=Edge(From=edge.From,
                                   To=edge.To,
                                   label=edge.label,
                                   note=edge.note,
                                   Type=Type,
                                   renderType=edge.renderType,
                                   description=edge.description))
        return diff
예제 #21
0
 def create(rows):
     instance = NLPInstance()
     instance.addToken().addProperty(name="Word", value="-Root-")
     predicates = []
     for row in rows:
         row = row.strip().split()
         instance.addToken().\
             addProperty(name="Word", value=row[1]).\
             addProperty(name="Index", value=row[0]).\
             addProperty(name="Lemma", value=row[2]).\
             addProperty(name="PLemma", value=row[3]).\
             addProperty(name="PoS", value=row[4]).\
             addProperty(name="PPoS", value=row[5]).\
             addProperty(name="Feat", value=row[6]).\
             addProperty(name="PFeat", value=row[7])
         if row[13] != "_":
             index = int(row[0])
             predicates.append(index)
             instance.addSpan(str(index), str(index), row[13], "sense")
     for row in rows:
         row = row.strip().split()
         # dependency
         if row[8] != "_":
             instance.addDependency(From=str(row[8]),
                                    to=str(row[0]),
                                    label=row[10],
                                    type="dep")
         if row[9] != "_":
             instance.addDependency(From=str(row[9]),
                                    to=str(row[0]),
                                    label=row[11],
                                    type="pdep")
         # role
         for col in range(14, len(row)):
             label = row[col]
             if label != "_":
                 pred = predicates[col - 14]
                 arg = int(row[0])
                 # if arg != pred:
                 instance.addDependency(From=str(pred),
                                        to=str(arg),
                                        label=label,
                                        type="role")
     return instance
예제 #22
0
 def create(rows):
     instance = NLPInstance()
     instance.addToken().addProperty("Word", "-Root-")
     predicates = []
     for row in rows:
         row = row.strip().split()
         instance.addToken().\
             addProperty(name="Word", value=row[1]).\
             addProperty(name="Index", value=str(row[0])).\
             addProperty(name="Lemma", value=row[2]).\
             addProperty(name="Pos", value=row[3]).\
             addProperty(name="Split Form", value=row[5]).\
             addProperty(name="Split Lemma", value=row(6)).\
             addProperty(name="Split PoS", value=row[7])
         if row[10] != "_":
             index = int(row[0])
             predicates.append(index)
             instance.addSpan(str(index), str(index), row[10], "sense")
     for row in rows:
         row = row.strip().split()
         # dependency
         if row[8] != "_":
             instance.addDependency(str(row[8]), str(row[0]), row[9], "dep")
         # role
         for col in range(11, len(row)):
             label = row[col]
             if label != "_":
                 pred = predicates[col - 11]
                 arg = int(row[0])
                 # if arg != pred
                 instance.addEdge(From=pred,
                                  to=arg,
                                  label=label,
                                  type="role")
     return instance
예제 #23
0
 def create(rows):
     instance = NLPInstance()
     instance.addToken().addProperty("Word", "-Root-")
     predicates = []
     for row in rows:
         row = row.strip().split()
         instance.addToken().\
             addProperty(name="Word", value=row[1]).\
             addProperty(name="Index", value=str(row[0])).\
             addProperty(name="Lemma", value=row[2]).\
             addProperty(name="Pos", value=row[3]).\
             addProperty(name="Split Form", value=row[5]).\
             addProperty(name="Split Lemma", value=row(6)).\
             addProperty(name="Split PoS", value=row[7])
         if row[10] != "_":
             index = int(row[0])
             predicates.append(index)
             instance.addSpan(str(index), str(index), row[10], "sense")
     for row in rows:
         row = row.strip().split()
         # dependency
         if row[8] != "_":
             instance.addDependency(str(row[8]), str(row[0]), row[9], "dep")
         # role
         for col in range(11, len(row)):
             label = row[col]
             if label != "_":
                 pred = predicates[col-11]
                 arg = int(row[0])
                 # if arg != pred
                 instance.addEdge(From=pred, to=arg, label=label, type="role")
     return instance
예제 #24
0
 def create(rows):
     instance = NLPInstance()
     instance.addToken().addProperty(name="Word", value="-Root-")
     predicates = []
     for row in rows:
         row = row.strip().split()
         instance.addToken().\
             addProperty(name="Word", value=row[1]).\
             addProperty(name="Index", value=row[0]).\
             addProperty(name="Lemma", value=row[2]).\
             addProperty(name="PLemma", value=row[3]).\
             addProperty(name="PoS", value=row[4]).\
             addProperty(name="PPoS", value=row[5]).\
             addProperty(name="Feat", value=row[6]).\
             addProperty(name="PFeat", value=row[7])
         if row[13] != "_":
             index = int(row[0])
             predicates.append(index)
             instance.addSpan(str(index), str(index), row[13], "sense")
     for row in rows:
         row = row.strip().split()
         # dependency
         if row[8] != "_":
             instance.addDependency(From=str(row[8]), to=str(row[0]), label=row[10], type="dep")
         if row[9] != "_":
             instance.addDependency(From=str(row[9]), to=str(row[0]), label=row[11], type="pdep")
         # role
         for col in range(14, len(row)):
             label = row[col]
             if label != "_":
                 pred = predicates[col-14]
                 arg = int(row[0])
                 # if arg != pred:
                 instance.addDependency(From=str(pred), to=str(arg), label=label, type="role")
     return instance
예제 #25
0
 def filter(self, original=NLPInstance):
     return NLPInstance(tokens=original.tokens,
                        edges=self.filterEdges(original.getEdges()),
                        renderType=original.renderType,
                        splitPoints=original.splitPoints)
예제 #26
0
 def create(self, rows):
     instance = NLPInstance()
     instance.addToken().addProperty(property="Word", value="-Root-")
     predicates = []
     for row in rows:
         if row == "\n":
             continue
         row = row.split()
         instance.addToken().\
             addProperty(property="Word", value=row[1]).\
             addProperty(property="Index", value=row[0]).\
             addProperty(property="Lemma", value=row[2]).\
             addProperty(property="PLemma", value=row[3]).\
             addProperty(property="PoS", value=row[4]).\
             addProperty(property="PPoS", value=row[5]).\
             addProperty(property="Feat", value=row[6]).\
             addProperty(property="PFeat", value=row[7])
         if row[13] != "_":
             index = row[0]
             predicates.append(index)
             instance.addSpan(index, index, row[13], "sense")
     for row in rows:
         if row == "\n":
             continue
         row = row.split()
         #dependency
         if row[0] != "_":
             instance.addEdge(From = row[8], to = row[0], label = row[10], type = "dep")
         if row[9] != "_":
             instance.addEdge(From = row[9], to = row[0], label = row[11], type = "pdep")
         #role
         for col in range(14, len(row)):
             label = row[col]
             if label != "_":
                 pred = predicates[col - 14]
                 arg = row[0]
                 # if arg != pred:
                 instance.addEdge(From = pred, to = arg, label = label, type = "role")
     return instance