def __init__(self, folder: str, pattern: str): """ Constructor for the DisambiguationCorpusGenerator which takes input the data directory and the pattern for the training files included. The constructor loads the corpus from the given directory including the given files the given pattern. PARAMETERS ---------- folder : str Directory where the sentence files reside. pattern : str Pattern of the tree files to be included in the corpus. Use "." for all files. """ self.__annotatedCorpus = AnnotatedCorpus(folder, pattern)
class SentenceDisambiguationCorpusGenerator: __annotatedCorpus: AnnotatedCorpus def __init__(self, folder: str, pattern: str): """ Constructor for the DisambiguationCorpusGenerator which takes input the data directory and the pattern for the training files included. The constructor loads the corpus from the given directory including the given files the given pattern. PARAMETERS ---------- folder : str Directory where the sentence files reside. pattern : str Pattern of the tree files to be included in the corpus. Use "." for all files. """ self.__annotatedCorpus = AnnotatedCorpus(folder, pattern) def generate(self) -> DisambiguationCorpus: """ Creates a morphological disambiguation corpus from the corpus. RETURNS ------- DisambiguationCorpus Created disambiguation corpus. """ corpus = DisambiguationCorpus() for i in range(self.__annotatedCorpus.sentenceCount()): sentence = self.__annotatedCorpus.getSentence(i) disambiguationSentence = AnnotatedSentence() for j in range(sentence.wordCount()): annotatedWord = sentence.getWord(j) if isinstance(annotatedWord, AnnotatedWord): disambiguationSentence.addWord( DisambiguatedWord(annotatedWord.getName(), annotatedWord.getParse())) corpus.addSentence(disambiguationSentence) return corpus
def __init__(self, folder: str, pattern: str, instanceGenerator: InstanceGenerator): """ Constructor for the AnnotatedDataSetGenerator which takes input the data directory, the pattern for the training files included, and an instanceGenerator. The constructor loads the sentence corpus from the given directory including the given files having the given pattern. PARAMETERS ---------- folder : str Directory where the corpus files reside. pattern : str Pattern of the tree files to be included in the treebank. Use "." for all files. instanceGenerator : InstanceGenerator The instance generator used to generate the dataset. """ self.__corpus = AnnotatedCorpus(folder, pattern) self.instanceGenerator = instanceGenerator
def test_Accuracy(self): correct = 0 total = 0 lesk = Lesk(self.wordNet, self.fsm) corpus1 = AnnotatedCorpus("../../new-sentences") corpus2 = AnnotatedCorpus("../../old-sentences") for i in range(corpus1.sentenceCount()): sentence1 = corpus1.getSentence(i) lesk.autoSemantic(sentence1) sentence2 = corpus2.getSentence(i) for j in range(sentence1.wordCount()): total = total + 1 word1 = sentence1.getWord(j) word2 = sentence2.getWord(j) if word1.getSemantic() is not None and word1.getSemantic() == word2.getSemantic(): correct = correct + 1 self.assertEqual(549, total) self.assertEqual(268, correct)