コード例 #1
0
def simplified_lesk(word: str, sentence: str) -> Synset:
    """
    Computes the max_overlap to understand what is the best sense
    :param word: word to dissmbiguate
    :param sentence: sentence in which word appears
    :return: Synset that maximizes the overlap
    """
    synsets = wordnet.synsets(word)

    try:
        lemmatizer = WordNetLemmatizer()

        best_sense = wordnet.synsets(word)[0]
        max_overlap = 0
        context = set(lemmatizer.lemmatize(word)for word in sentence.split(" "))

        for sense in synsets:
            signature = set(lemmatizer.lemmatize(word)for word in sense.definition().split(" "))
            for example in sense.examples():
                signature.union(set(lemmatizer.lemmatize(word)for word in example.split(" ")))

            overlap = len(signature.intersection(context))
            if overlap > max_overlap:
                max_overlap = overlap
                best_sense = sense

        return best_sense

    except:
        return Synset(None)
コード例 #2
0
def extended_context_lesk(word: str, sentence: str) -> Synset:
    """
    Computes the max_overlap to understand what is the best sense, using hypernyms and hyponyms
    :param word: the word to be disambiguated
    :param sentence: input sentence which contains param 'word'
    :return: best_sense, which is a Wordnet Synset, for param 'word'
    """
    stopwords_set = set(stopwords.words('english'))
    synsets = wordnet.synsets(word)

    try:
        lemmatizer = WordNetLemmatizer()

        best_sense = wordnet.synsets(word)[0]
        max_overlap = 0
        context = set(lemmatizer.lemmatize(word)for word in sentence.split(" "))

        for sense in synsets:
            signature = set(lemmatizer.lemmatize(word)for word in sense.definition().split(" "))

            for example in sense.examples():
                signature.union(set(lemmatizer.lemmatize(word)for word in example.split(" ")))

            for hypernym in sense.hypernyms():
                signature = signature.union(set(lemmatizer.lemmatize(word)for word in hypernym.definition().split(" ")))

            for hyponym in sense.hyponyms():
                signature = signature.union(set(lemmatizer.lemmatize(word)for word in hyponym.definition().split(" ")))

            signature.difference(stopwords_set)

            overlap = len(signature.intersection(context))
            if overlap > max_overlap:
                max_overlap = overlap
                best_sense = sense

        return best_sense

    except:
        return Synset(None)
コード例 #3
0
ファイル: wordnet.py プロジェクト: Trixter9994/lazero
    def lowest_common_hypernyms(self,
                                synset,
                                other,
                                simulate_root=False,
                                use_min_depth=False):
        '''
        -- NOTE: THIS CODE IS COPIED FROM NLTK3 --
        Get a list of lowest synset(s) that both synsets have as a hypernym.
        When `use_min_depth == False` this means that the synset which
        appears as a hypernym of both `self` and `other` with the lowest
        maximum depth is returned or if there are multiple such synsets at
        the same depth they are all returned

        However, if `use_min_depth == True` then the synset(s) which has/have
        the lowest minimum depth and appear(s) in both paths is/are returned.

        By setting the use_min_depth flag to True, the behavior of NLTK2 can
        be preserved. This was changed in NLTK3 to give more accurate results
        in a small set of cases, generally with synsets concerning people.
        (eg: 'chef.n.01', 'fireman.n.01', etc.)

        This method is an implementation of Ted Pedersen's "Lowest Common
        Subsumer" method from the Perl Wordnet module. It can return either
        "self" or "other" if they are a hypernym of the other.

        :type other: Synset
        :param other: other input synset
        :type simulate_root: bool
        :param simulate_root: The various verb taxonomies do not
            share a single root which disallows this metric from working for
            synsets that are not connected. This flag (False by default)
            creates a fake root that connects all the taxonomies. Set it
            to True to enable this behavior. For the noun taxonomy,
            there is usually a default root except for WordNet version 1.6.
            If you are using wordnet 1.6, a fake root will need to be added
            for nouns as well.
        :type use_min_depth: bool
        :param use_min_depth: This setting mimics older (v2) behavior of NLTK
            wordnet. If True, will use the min_depth function to calculate
            the lowest common hypernyms. This is known to give strange
            results for some synset pairs (eg: 'chef.n.01', 'fireman.n.01')
            but is retained for backwards compatibility
        :return: The synsets that are the lowest common hypernyms of both
            synsets
        '''

        fake_synset = Synset(None)
        fake_synset._name = '*ROOT*'
        fake_synset.hypernyms = lambda: []
        fake_synset.instance_hypernyms = lambda: []

        if simulate_root:
            self_hypernyms = chain(synset._iter_hypernym_lists(),
                                   [[fake_synset]])
            other_hypernyms = chain(other._iter_hypernym_lists(),
                                    [[fake_synset]])
        else:
            self_hypernyms = synset._iter_hypernym_lists()
            other_hypernyms = other._iter_hypernym_lists()

        synsets = set(s for synsets in self_hypernyms for s in synsets)
        others = set(s for synsets in other_hypernyms for s in synsets)
        if self.core_taxonomy is not None:
            synsets.intersection_update(
                map(lambda syn: wordnet.synset(syn), self.known_concepts))
            others.intersection_update(
                map(lambda syn: wordnet.synset(syn), self.known_concepts))
        synsets.intersection_update(others)

        try:
            if use_min_depth:
                max_depth = max(s.min_depth() for s in synsets)
                unsorted_lch = [
                    s for s in synsets if s.min_depth() == max_depth
                ]
            else:
                max_depth = max(s.max_depth() for s in synsets)
                unsorted_lch = [
                    s for s in synsets if s.max_depth() == max_depth
                ]
            return sorted(unsorted_lch)
        except ValueError:
            return []