def __init__(self, folder: str, pattern: str):
        """
        Constructor for the DisambiguationCorpusGenerator which takes input the data directory and the pattern for the
        training files included. The constructor loads the corpus from the given directory including the given files
        the given pattern.

        PARAMETERS
        ----------
        folder : str
            Directory where the sentence files reside.
        pattern : str
            Pattern of the tree files to be included in the corpus. Use "." for all files.
        """
        self.__annotatedCorpus = AnnotatedCorpus(folder, pattern)
class SentenceDisambiguationCorpusGenerator:

    __annotatedCorpus: AnnotatedCorpus

    def __init__(self, folder: str, pattern: str):
        """
        Constructor for the DisambiguationCorpusGenerator which takes input the data directory and the pattern for the
        training files included. The constructor loads the corpus from the given directory including the given files
        the given pattern.

        PARAMETERS
        ----------
        folder : str
            Directory where the sentence files reside.
        pattern : str
            Pattern of the tree files to be included in the corpus. Use "." for all files.
        """
        self.__annotatedCorpus = AnnotatedCorpus(folder, pattern)

    def generate(self) -> DisambiguationCorpus:
        """
        Creates a morphological disambiguation corpus from the corpus.

        RETURNS
        -------
        DisambiguationCorpus
            Created disambiguation corpus.
        """
        corpus = DisambiguationCorpus()
        for i in range(self.__annotatedCorpus.sentenceCount()):
            sentence = self.__annotatedCorpus.getSentence(i)
            disambiguationSentence = AnnotatedSentence()
            for j in range(sentence.wordCount()):
                annotatedWord = sentence.getWord(j)
                if isinstance(annotatedWord, AnnotatedWord):
                    disambiguationSentence.addWord(
                        DisambiguatedWord(annotatedWord.getName(),
                                          annotatedWord.getParse()))
            corpus.addSentence(disambiguationSentence)
        return corpus
예제 #3
0
    def __init__(self, folder: str, pattern: str,
                 instanceGenerator: InstanceGenerator):
        """
        Constructor for the AnnotatedDataSetGenerator which takes input the data directory, the pattern for the
        training files included, and an instanceGenerator. The constructor loads the sentence corpus from the given
        directory including the given files having the given pattern.

        PARAMETERS
        ----------
        folder : str
            Directory where the corpus files reside.
        pattern : str
            Pattern of the tree files to be included in the treebank. Use "." for all files.
        instanceGenerator : InstanceGenerator
            The instance generator used to generate the dataset.
        """
        self.__corpus = AnnotatedCorpus(folder, pattern)
        self.instanceGenerator = instanceGenerator
예제 #4
0
 def test_Accuracy(self):
     correct = 0
     total = 0
     lesk = Lesk(self.wordNet, self.fsm)
     corpus1 = AnnotatedCorpus("../../new-sentences")
     corpus2 = AnnotatedCorpus("../../old-sentences")
     for i in range(corpus1.sentenceCount()):
         sentence1 = corpus1.getSentence(i)
         lesk.autoSemantic(sentence1)
         sentence2 = corpus2.getSentence(i)
         for j in range(sentence1.wordCount()):
             total = total + 1
             word1 = sentence1.getWord(j)
             word2 = sentence2.getWord(j)
             if word1.getSemantic() is not None and word1.getSemantic() == word2.getSemantic():
                 correct = correct + 1
     self.assertEqual(549, total)
     self.assertEqual(268, correct)