예제 #1
0
 def test_GetExtension(self):
     fileDescription = FileDescription("mypath", "1234.train")
     self.assertEqual("train", fileDescription.getExtension())
     fileDescription = FileDescription("mypath", "0000.test")
     self.assertEqual("test", fileDescription.getExtension())
     fileDescription = FileDescription("mypath", "0003.dev")
     self.assertEqual("dev", fileDescription.getExtension())
 def testGenerateAnnotatedSentence2(self):
     self.tree = ParseTreeDrawable(FileDescription("../trees2/",
                                                   "0000.dev"))
     self.assertEqual(
         "{english=The}{posTag=DT} {english=complicated}{posTag=VBN} {english=language}{posTag=NN} {english=in}{posTag=IN} {english=the}{posTag=DT} {english=huge}{posTag=JJ} {english=new}{posTag=JJ} {english=law}{posTag=NN} {english=has}{posTag=VBZ} {english=muddied}{posTag=VBN} {english=the}{posTag=DT} {english=fight}{posTag=NN} {english=.}{posTag=.}",
         self.tree.generateAnnotatedSentence("english").__str__())
     self.tree = ParseTreeDrawable(FileDescription("../trees2/",
                                                   "0001.dev"))
     self.assertEqual(
         "{english=The}{posTag=DT} {english=Ways}{posTag=NNP} {english=and}{posTag=CC} {english=Means}{posTag=NNP} {english=Committee}{posTag=NNP} {english=will}{posTag=MD} {english=hold}{posTag=VB} {english=a}{posTag=DT} {english=hearing}{posTag=NN} {english=on}{posTag=IN} {english=the}{posTag=DT} {english=bill}{posTag=NN} {english=next}{posTag=IN} {english=Tuesday}{posTag=NNP} {english=.}{posTag=.}",
         self.tree.generateAnnotatedSentence("english").__str__())
     self.tree = ParseTreeDrawable(FileDescription("../trees2/",
                                                   "0002.dev"))
     self.assertEqual(
         "{english=We}{posTag=PRP} {english='re}{posTag=VBP} {english=about}{posTag=IN} {english=to}{posTag=TO} {english=see}{posTag=VB} {english=if}{posTag=IN} {english=advertising}{posTag=NN} {english=works}{posTag=VBZ} {english=.}{posTag=.}",
         self.tree.generateAnnotatedSentence("english").__str__())
     self.tree = ParseTreeDrawable(FileDescription("../trees2/",
                                                   "0003.dev"))
     self.assertEqual(
         "{english=This}{posTag=DT} {english=time}{posTag=NN} {english=around}{posTag=RP} {english=,}{posTag=,} {english=they}{posTag=PRP} {english='re}{posTag=VBP} {english=moving}{posTag=VBG} {english=even}{posTag=RB} {english=faster}{posTag=RBR} {english=.}{posTag=.}",
         self.tree.generateAnnotatedSentence("english").__str__())
     self.tree = ParseTreeDrawable(FileDescription("../trees2/",
                                                   "0004.dev"))
     self.assertEqual(
         "{english=Shearson}{posTag=NNP} {english=Lehman}{posTag=NNP} {english=Hutton}{posTag=NNP} {english=Inc.}{posTag=NNP} {english=by}{posTag=IN} {english=yesterday}{posTag=NN} {english=afternoon}{posTag=NN} {english=had}{posTag=VBD} {english=already}{posTag=RB} {english=written}{posTag=VBN} {english=new}{posTag=JJ} {english=TV}{posTag=NN} {english=ads}{posTag=NNS} {english=.}{posTag=.}",
         self.tree.generateAnnotatedSentence("english").__str__())
     self.tree = ParseTreeDrawable(FileDescription("../trees2/",
                                                   "0005.dev"))
     self.assertEqual(
         "{english=This}{posTag=DT} {english=time}{posTag=NN} {english=,}{posTag=,} {english=the}{posTag=DT} {english=firms}{posTag=NNS} {english=were}{posTag=VBD} {english=ready}{posTag=JJ} {english=.}{posTag=.}",
         self.tree.generateAnnotatedSentence("english").__str__())
     self.tree = ParseTreeDrawable(FileDescription("../trees2/",
                                                   "0006.dev"))
     self.assertEqual(
         "{english=``}{posTag=``} {english=To}{posTag=TO} {english=maintain}{posTag=VB} {english=that}{posTag=DT} {english=dialogue}{posTag=NN} {english=is}{posTag=VBZ} {english=absolutely}{posTag=RB} {english=crucial}{posTag=JJ} {english=.}{posTag=.}",
         self.tree.generateAnnotatedSentence("english").__str__())
     self.tree = ParseTreeDrawable(FileDescription("../trees2/",
                                                   "0007.dev"))
     self.assertEqual(
         "{english=It}{posTag=PRP} {english=would}{posTag=MD} {english=have}{posTag=VB} {english=been}{posTag=VBN} {english=too}{posTag=RB} {english=late}{posTag=JJ} {english=to}{posTag=TO} {english=think}{posTag=VB} {english=about}{posTag=IN} {english=on}{posTag=IN} {english=Friday}{posTag=NNP} {english=.}{posTag=.}",
         self.tree.generateAnnotatedSentence("english").__str__())
     self.tree = ParseTreeDrawable(FileDescription("../trees2/",
                                                   "0008.dev"))
     self.assertEqual(
         "{english=We}{posTag=PRP} {english=had}{posTag=VBD} {english=to}{posTag=TO} {english=think}{posTag=VB} {english=about}{posTag=IN} {english=it}{posTag=PRP} {english=ahead}{posTag=RB} {english=of}{posTag=IN} {english=time}{posTag=NN} {english=.}{posTag=.} {english=''}{posTag=''}",
         self.tree.generateAnnotatedSentence("english").__str__())
     self.tree = ParseTreeDrawable(FileDescription("../trees2/",
                                                   "0009.dev"))
     self.assertEqual(
         "{english=It}{posTag=PRP} {english=goes}{posTag=VBZ} {english=on}{posTag=RB} {english=to}{posTag=TO} {english=plug}{posTag=VB} {english=a}{posTag=DT} {english=few}{posTag=JJ} {english=diversified}{posTag=JJ} {english=Fidelity}{posTag=NNP} {english=funds}{posTag=NNS} {english=by}{posTag=IN} {english=name}{posTag=NN} {english=.}{posTag=.}",
         self.tree.generateAnnotatedSentence("english").__str__())
예제 #3
0
 def __init__(self, fileDescription, path: str = None):
     if path is None:
         if isinstance(fileDescription, FileDescription):
             self.__fileDescription = fileDescription
             self.readFromFile(
                 self.__fileDescription.getFileName(
                     fileDescription.getPath()))
         elif isinstance(fileDescription, str):
             self.readFromFile(fileDescription)
     else:
         self.__fileDescription = FileDescription(
             path, fileDescription.getExtension(),
             fileDescription.getIndex())
         self.readFromFile(
             self.__fileDescription.getFileName(fileDescription.getPath()))
예제 #4
0
 def test_GetFileName(self):
     fileDescription = FileDescription("mypath", "0003.train")
     self.assertEqual("mypath/0003.train", fileDescription.getFileName())
     self.assertEqual("newpath/0003.train",
                      fileDescription.getFileName("newpath"))
     self.assertEqual("newpath/0000.train",
                      fileDescription.getFileNameWithIndex("newpath", 0))
     self.assertEqual("newpath/0020.train",
                      fileDescription.getFileNameWithIndex("newpath", 20))
     self.assertEqual("newpath/0103.train",
                      fileDescription.getFileNameWithIndex("newpath", 103))
     self.assertEqual(
         "newpath/0000.dev",
         fileDescription.getFileNameWithIndex("newpath", 0, "dev"))
     self.assertEqual(
         "newpath/0020.dev",
         fileDescription.getFileNameWithIndex("newpath", 20, "dev"))
     self.assertEqual(
         "newpath/0103.dev",
         fileDescription.getFileNameWithIndex("newpath", 103, "dev"))
 def setUp(self) -> None:
     self.tree0 = ParseTreeDrawable(FileDescription("../trees/",
                                                    "0000.dev"))
     self.tree1 = ParseTreeDrawable(FileDescription("../trees/",
                                                    "0001.dev"))
     self.tree2 = ParseTreeDrawable(FileDescription("../trees/",
                                                    "0002.dev"))
     self.tree3 = ParseTreeDrawable(FileDescription("../trees/",
                                                    "0003.dev"))
     self.tree4 = ParseTreeDrawable(FileDescription("../trees/",
                                                    "0004.dev"))
     self.tree5 = ParseTreeDrawable(FileDescription("../trees/",
                                                    "0005.dev"))
     self.tree6 = ParseTreeDrawable(FileDescription("../trees/",
                                                    "0006.dev"))
     self.tree7 = ParseTreeDrawable(FileDescription("../trees/",
                                                    "0007.dev"))
     self.tree8 = ParseTreeDrawable(FileDescription("../trees/",
                                                    "0008.dev"))
     self.tree9 = ParseTreeDrawable(FileDescription("../trees/",
                                                    "0009.dev"))
예제 #6
0
 def __init__(self, folder: str = None, pattern: str = None):
     self.parseTrees = []
     if str is not None:
         for root, dirs, files in os.walk(folder):
             for file in files:
                 fileName = os.path.join(root, file)
                 if (pattern is None or pattern in fileName) and re.match(
                         "\\d+\\.", file):
                     parseTree = ParseTreeDrawable(fileName)
                     if parseTree.getRoot() is not None:
                         parseTree.setName(fileName)
                         parseTree.setFileDescription(
                             FileDescription(root, file))
                         self.parseTrees.append(parseTree)
예제 #7
0
 def test_GetIndex(self):
     fileDescription = FileDescription("mypath", "1234.train")
     self.assertEqual(1234, fileDescription.getIndex())
     fileDescription = FileDescription("mypath", "0000.test")
     self.assertEqual(0, fileDescription.getIndex())
     fileDescription = FileDescription("mypath", "0003.dev")
     self.assertEqual(3, fileDescription.getIndex())
     fileDescription = FileDescription("mypath", "0020.train")
     self.assertEqual(20, fileDescription.getIndex())
     fileDescription = FileDescription("mypath", "0304.dev")
     self.assertEqual(304, fileDescription.getIndex())
예제 #8
0
class ParseTreeDrawable(ParseTree):

    __fileDescription: FileDescription
    __name: str

    def __init__(self, fileDescription, path: str = None):
        if path is None:
            if isinstance(fileDescription, FileDescription):
                self.__fileDescription = fileDescription
                self.readFromFile(
                    self.__fileDescription.getFileName(
                        fileDescription.getPath()))
            elif isinstance(fileDescription, str):
                self.readFromFile(fileDescription)
        else:
            self.__fileDescription = FileDescription(
                path, fileDescription.getExtension(),
                fileDescription.getIndex())
            self.readFromFile(
                self.__fileDescription.getFileName(fileDescription.getPath()))

    def setFileDescription(self, fileDescription: FileDescription):
        self.__fileDescription = fileDescription

    def getFileDescription(self) -> FileDescription:
        return self.__fileDescription

    def reload(self):
        self.readFromFile(
            self.__fileDescription.getFileName(
                self.__fileDescription.getPath()))

    def readFromFile(self, fileName: str):
        inputFile = open(fileName, encoding="utf8")
        line = inputFile.readline().strip()
        if "(" in line and ")" in line:
            line = line[line.index("(") + 1:line.rindex(")")].strip()
            self.root = ParseNodeDrawable(None, line, False, 0)
        else:
            self.root = None
        inputFile.close()

    def setName(self, name: str):
        self.__name = name

    def getName(self) -> str:
        return self.__name

    def nextTree(self, count: int):
        if self.__fileDescription.nextFileExists(count):
            self.__fileDescription.addToIndex(count)
            self.reload()

    def previousTree(self, count: int):
        if self.__fileDescription.previousFileExists(count):
            self.__fileDescription.addToIndex(-count)
            self.reload()

    def save(self):
        outputFile = open(self.__fileDescription.getFileName(),
                          mode='w',
                          encoding="utf8")
        outputFile.write("( " + self.__str__() + " )\n")
        outputFile.close()

    def saveWithPath(self, newPath: str):
        outputFile = open(self.__fileDescription.getFileName(newPath),
                          mode='w',
                          encoding="utf8")
        outputFile.write("( " + self.__str__() + " )\n")
        outputFile.close()

    def maxDepth(self) -> int:
        if isinstance(self.root, ParseNodeDrawable):
            return self.root.maxDepth()

    def moveLeft(self, node: ParseNode):
        if self.root != node:
            self.root.moveLeft(node)

    def moveRight(self, node: ParseNode):
        if self.root != node:
            self.root.moveRight(node)

    def layerExists(self, viewLayerType: ViewLayerType) -> bool:
        if self.root is not None and isinstance(self.root, ParseNodeDrawable):
            return self.root.layerExists(viewLayerType)
        else:
            return False

    def layerAll(self, viewLayerType: ViewLayerType) -> bool:
        if self.root is not None and isinstance(self.root, ParseNodeDrawable):
            return self.root.layerAll(viewLayerType)
        else:
            return False

    def clearLayer(self, viewLayerType: ViewLayerType):
        if self.root is not None and isinstance(self.root, ParseNodeDrawable):
            self.root.clearLayer(viewLayerType)

    def generateAnnotatedSentence(self,
                                  language: str = None) -> AnnotatedSentence:
        sentence = AnnotatedSentence()
        if language is None:
            nodeDrawableCollector = NodeDrawableCollector(
                self.root, IsTurkishLeafNode())
            leafList = nodeDrawableCollector.collect()
            for parseNode in leafList:
                if isinstance(parseNode, ParseNodeDrawable):
                    layers = parseNode.getLayerInfo()
                    for i in range(layers.getNumberOfWords()):
                        sentence.addWord(layers.toAnnotatedWord(i))
        else:
            nodeDrawableCollector = NodeDrawableCollector(
                self.root, IsEnglishLeafNode())
            leafList = nodeDrawableCollector.collect()
            for parseNode in leafList:
                if isinstance(parseNode, ParseNodeDrawable):
                    newWord = AnnotatedWord(
                        "{" + language + "=" + parseNode.getData().getName() +
                        "}{posTag=" +
                        parseNode.getParent().getData().getName() + "}")
                    sentence.addWord(newWord)
        return sentence

    def extractNodesWithVerbs(self, wordNet: WordNet) -> list:
        nodeDrawableCollector = NodeDrawableCollector(self.root,
                                                      IsVerbNode(wordNet))
        return nodeDrawableCollector.collect()

    def extractNodesWithPredicateVerbs(self, wordNet: WordNet) -> list:
        nodeDrawableCollector = NodeDrawableCollector(
            self.root, IsPredicateVerbNode(wordNet))
        return nodeDrawableCollector.collect()