def __init__(self, folder: str, pattern: str): """ Constructor for the DisambiguationCorpusGenerator which takes input the data directory and the pattern for the training files included. The constructor loads the treebank from the given directory including the given files the given pattern. PARAMETERS ---------- folder : str Directory where the treebank files reside. pattern : str Pattern of the tree files to be included in the treebank. Use "." for all files. """ self.__treeBank = TreeBankDrawable(folder, pattern)
def test_Accuracy(self): correct = 0 total = 0 lesk = Lesk(self.wordNet, self.fsm) treeBank1 = TreeBankDrawable("../../new-trees") treeBank2 = TreeBankDrawable("../../old-trees") for i in range(treeBank1.size()): parseTree1 = treeBank1.get(i) parseTree2 = treeBank2.get(i) lesk.autoSemantic(parseTree1) nodeDrawableCollector1 = NodeDrawableCollector( parseTree1.getRoot(), IsTurkishLeafNode()) leafList1 = nodeDrawableCollector1.collect() nodeDrawableCollector2 = NodeDrawableCollector( parseTree2.getRoot(), IsTurkishLeafNode()) leafList2 = nodeDrawableCollector2.collect() for j in range(len(leafList1)): total = total + 1 parseNode1 = leafList1[j] parseNode2 = leafList2[j] if parseNode1.getLayerData( ViewLayerType.SEMANTICS ) is not None and parseNode1.getLayerData( ViewLayerType.SEMANTICS) == parseNode2.getLayerData( ViewLayerType.SEMANTICS): correct = correct + 1 self.assertEqual(475, total) self.assertEqual(247, correct)
def __init__(self, folder: str, pattern: str, instanceGenerator: InstanceGenerator): """ Constructor for the DataSetGenerator which takes input the data directory, the pattern for the training files included, and an instanceGenerator. The constructor loads the treebank from the given directory including the given files having the given pattern. If punctuations are not included, they are removed from the data. PARAMETERS ---------- folder : str Directory where the treebank files reside. pattern : str Pattern of the tree files to be included in the treebank. Use "." for all files. instanceGenerator : InstanceGenerator The instance generator used to generate the dataset. """ self.__treeBank = TreeBankDrawable(folder, pattern) self.instanceGenerator = instanceGenerator
class TreeDisambiguationCorpusGenerator: __treeBank: TreeBankDrawable def __init__(self, folder: str, pattern: str): """ Constructor for the DisambiguationCorpusGenerator which takes input the data directory and the pattern for the training files included. The constructor loads the treebank from the given directory including the given files the given pattern. PARAMETERS ---------- folder : str Directory where the treebank files reside. pattern : str Pattern of the tree files to be included in the treebank. Use "." for all files. """ self.__treeBank = TreeBankDrawable(folder, pattern) def generate(self) -> DisambiguationCorpus: """ Creates a morphological disambiguation corpus from the treeBank. Calls generateAnnotatedSentence for each parse tree in the treebank. RETURNS ------- DisambiguationCorpus Created disambiguation corpus. """ corpus = DisambiguationCorpus() for i in range(self.__treeBank.size()): parseTree = self.__treeBank.get(i) if parseTree.layerAll(ViewLayerType.INFLECTIONAL_GROUP): sentence = parseTree.generateAnnotatedSentence() disambiguationSentence = AnnotatedSentence() for j in range(sentence.wordCount()): annotatedWord = sentence.getWord(j) if isinstance(annotatedWord, AnnotatedWord): disambiguationSentence.addWord( DisambiguatedWord(annotatedWord.getName(), annotatedWord.getParse())) corpus.addSentence(disambiguationSentence) return corpus
class NERCorpusGenerator: __treeBank: TreeBankDrawable def __init__(self, folder: str, pattern: str): """ Constructor for the NERCorpusGenerator which takes input the data directory and the pattern for the training files included. The constructor loads the treebank from the given directory including the given files the given pattern. PARAMETERS ---------- folder : str Directory where the treebank files reside. pattern : str Pattern of the tree files to be included in the treebank. Use "." for all files. """ self.__treeBank = TreeBankDrawable(folder, pattern) def generate(self) -> NERCorpus: """ Creates a morphological disambiguation corpus from the treeBank. Calls generateAnnotatedSentence for each parse tree in the treebank. RETURNS ------- DisambiguationCorpus Created disambiguation corpus. """ corpus = NERCorpus() for i in range(self.__treeBank.size()): parseTree = self.__treeBank.get(i) if parseTree.layerAll(ViewLayerType.NER): sentence = parseTree.generateAnnotatedSentence() corpus.addSentence(sentence) return corpus
class DataSetGenerator: __treeBank: TreeBankDrawable instanceGenerator: InstanceGenerator def __init__(self, folder: str, pattern: str, instanceGenerator: InstanceGenerator): """ Constructor for the DataSetGenerator which takes input the data directory, the pattern for the training files included, and an instanceGenerator. The constructor loads the treebank from the given directory including the given files having the given pattern. If punctuations are not included, they are removed from the data. PARAMETERS ---------- folder : str Directory where the treebank files reside. pattern : str Pattern of the tree files to be included in the treebank. Use "." for all files. instanceGenerator : InstanceGenerator The instance generator used to generate the dataset. """ self.__treeBank = TreeBankDrawable(folder, pattern) self.instanceGenerator = instanceGenerator def setInstanceGenerator(self, instanceGenerator: InstanceGenerator): """ Mutator for the instanceGenerator attribute. PARAMETERS ---------- instanceGenerator : InstanceGenerator Input instanceGenerator """ self.instanceGenerator = instanceGenerator def generateInstanceListFromTree(self, parseTree: ParseTreeDrawable) -> list: """ The method generates a set of instances (an instance from each word in the tree) from a single tree. The method calls the instanceGenerator for each word in the sentence. PARAMETERS ---------- parseTree : ParseTreeDrawable Parsetree for which a set of instances will be created RETURNS ------- list A list of instances. """ instanceList = [] annotatedSentence = parseTree.generateAnnotatedSentence() for i in range(annotatedSentence.wordCount()): generatedSentence = self.instanceGenerator.generateInstanceFromSentence( annotatedSentence, i) if generatedSentence is not None: instanceList.append(generatedSentence) return instanceList def generate(self) -> DataSet: """ Creates a dataset from the treeBank. Calls generateInstanceListFromTree for each parse tree in the treebank. RETURNS ------- DataSet Created dataset. """ dataSet = DataSet() for i in range(self.__treeBank.size()): parseTree = self.__treeBank.get(i) dataSet.addInstanceList( self.generateInstanceListFromTree(parseTree)) return dataSet
def __init__(self, folder1: str, folder2: str, pattern: str = None): self.__fromTreeBank = TreeBankDrawable(folder1, pattern) self.__toTreeBank = TreeBankDrawable(folder2, pattern) self.removeDifferentTrees()
class ParallelTreeBankDrawable: __fromTreeBank: TreeBankDrawable __toTreeBank: TreeBankDrawable def __init__(self, folder1: str, folder2: str, pattern: str = None): self.__fromTreeBank = TreeBankDrawable(folder1, pattern) self.__toTreeBank = TreeBankDrawable(folder2, pattern) self.removeDifferentTrees() def removeDifferentTrees(self): i = 0 j = 0 while i < self.__fromTreeBank.size() and j < self.__toTreeBank.size(): if self.__fromTreeBank.get(i).getName() < self.__toTreeBank.get(j).getName(): self.__fromTreeBank.removeTree(i) elif self.__fromTreeBank.get(i).getName() > self.__toTreeBank.get(j).getName(): self.__toTreeBank.removeTree(j) else: i = i + 1 j = j + 1 while i < self.__fromTreeBank.size(): self.__fromTreeBank.removeTree(i) while j < self.__toTreeBank.size(): self.__toTreeBank.removeTree(j) def size(self) -> int: return self.__fromTreeBank.size() def fromTree(self, index: int) -> ParseTreeDrawable: return self.__fromTreeBank.get(index) def toTree(self, index: int) -> ParseTreeDrawable: return self.__toTreeBank.get(index) def fromTreeBank(self) -> TreeBankDrawable: return self.__fromTreeBank def toTreeBank(self) -> TreeBankDrawable: return self.__toTreeBank