Ejemplo n.º 1
0
def loadConll(inFile):
    sents = []
    trees = []
    with open('data/' + inFile, 'rb') as fin:
        sentenceTokens = []
        tree = DependencyTree()
        for line in fin:
            line = line.strip()
            line = line.split('\t')
            if len(line) < 10:
                if len(sentenceTokens) > 0:
                    trees.append(tree)
                    sents.append(sentenceTokens)
                    tree = DependencyTree()
                    sentenceTokens = []
            else:
                word = line[1]
                pos = line[4]
                head = int(line[6])
                depType = line[7]

                token = {}
                token['word'] = word
                token['POS'] = pos
                token['head'] = head
                token['depType'] = depType
                sentenceTokens.append(token)

                tree.add(head, depType)

    return sents, trees
Ejemplo n.º 2
0
def loadConll(inFile):
    sents = []
    trees = []
    counter = 0
    with open('data/' + inFile, 'r') as fin:
        sentenceTokens = []
        tree = DependencyTree()
        for line in fin:
            line = line.strip()
            line = line.split('\t')
            if len(line) < 10:
                if len(sentenceTokens) > 0:
                    trees.append(tree)
                    sents.append(sentenceTokens)
                    tree = DependencyTree()
                    sentenceTokens = []
            else:
                # Added try block to handle exception encountered in python 3
                try:
                    word = line[1]
                    pos = line[4]
                    head = int(line[6])
                    depType = line[7]

                    token = {}
                    token['word'] = word
                    token['POS'] = pos
                    token['head'] = head
                    token['depType'] = depType
                    sentenceTokens.append(token)
                except:
                    continue
                tree.add(head, depType)

    return sents, trees
Ejemplo n.º 3
0
    def __init__(self, sentence):
        self.stack = []
        self.buffer = []

        self.tree = DependencyTree()
        self.sentence = sentence
Ejemplo n.º 4
0
class Configuration:
    def __init__(self, sentence):
        self.stack = []
        self.buffer = []

        self.tree = DependencyTree()
        self.sentence = sentence

    def shift(self):
        k = self.getBuffer(0)
        if k == Config.NONEXIST:
            return False
        self.buffer.pop(0)
        self.stack.append(k)
        return True

    def removeSecondTopStack(self):
        nStack = self.getStackSize()
        if nStack < 2:
            return False
        self.stack.pop(-2)
        return True

    def removeTopStack(self):
        nStack = self.getStackSize()
        if nStack <= 1:
            return False
        self.stack.pop()
        return True

    def getStackSize(self):
        return len(self.stack)

    def getBufferSize(self):
        return len(self.buffer)

    def getSentenceSize(self):
        return len(self.sentence)

    def getHead(self, k):
        return self.tree.getHead(k)

    def getLabel(self, k):
        return self.tree.getLabel(k)

    def getStack(self, k):
        """
            Get the token index of the kth word on the stack.
            If stack doesn't have an element at this index, return Config.NONEXIST
        """
        nStack = self.getStackSize()
        if k >= 0 and k < nStack:
            return self.stack[nStack - 1 - k]
        else:
            return Config.NONEXIST

    def getBuffer(self, k):
        """
            Get the token index of the kth word on the buffer.
            If buffer doesn't have an element at this index, return Config.NONEXIST
        """
        if k >= 0 and k < self.getBufferSize():
            return self.buffer[k]
        else:
            return Config.NONEXIST

    def getWord(self, k):
        """
            Get the word at index k
        """
        if k == 0:
            return Config.ROOT
        else:
            k -= 1

        if k < 0 or k >= len(self.sentence):
            return Config.NULL
        else:
            return self.sentence[k]['word']

    def getPOS(self, k):
        """
        Get the POS at index k
        """
        if k == 0:
            return Config.ROOT
        else:
            k -= 1

        if k < 0 or k >= len(self.sentence):
            return Config.NULL
        else:
            return self.sentence[k]['POS']

    def addArc(self, h, t, l):
        """
            Add an arc with the label l from the head node h to the dependent node t.
        """
        self.tree.set(t, h, l)

    def getLeftChild(self, k, cnt):
        """
            Get cnt-th leftmost child of k.
            (i.e., if cnt = 1, the leftmost child of k will be returned,
                   if cnt = 2, the 2nd leftmost child of k will be returned.)
        """
        if k < 0 or k > self.tree.n:
            return Config.NONEXIST

        c = 0
        for i in range(1, k):
            if self.tree.getHead(i) == k:
                c += 1
                if c == cnt:
                    return i
        return Config.NONEXIST

    def getRightChild(self, k, cnt):
        """
        Get cnt-th rightmost child of k.
        (i.e., if cnt = 1, the rightmost child of k will be returned,
               if cnt = 2, the 2nd rightmost child of k will be returned.)
        """
        if k < 0 or k > self.tree.n:
            return Config.NONEXIST

        c = 0
        for i in range(self.tree.n, k, -1):
            if self.tree.getHead(i) == k:
                c += 1
                if c == cnt:
                    return i
        return Config.NONEXIST

    def hasOtherChild(self, k, goldTree):
        for i in range(1, self.tree.n + 1):
            if goldTree.getHead(i) == k and self.tree.getHead(i) != k:
                return True
        return False

    def getStr(self):
        """
            Returns a string that concatenates all elements on the stack and buffer, and head / label
        """
        s = "[S]"
        for i in range(self.getStackSize()):
            if i > 0:
                s += ","
            s += self.stack[i]

        s += "[B]"
        for i in range(self.getBufferSize()):
            if i > 0:
                s += ","
            s += self.buffer[i]

        s += "[H]"
        for i in range(1, self.tree.n + 1):
            if i > 1:
                s += ","
            s += self.getHead(i) + "(" + self.getLabel(i) + ")"

        return s