コード例 #1
0
ファイル: sameHeadFilter.py プロジェクト: josubg/CorefGraph
    def filter(self, mention, prev_mentions):
        """ check if the mention is inside a mention and have the same head.

        :param mention: The mention to test.
        :return: True or False.
        """
        sentence = self.graph_builder.get_root(mention)
        sentence_words = self.graph_builder.get_sentence_words(sentence)
        sentence_span = sentence[SPAN]
        span = mention[SPAN]
        head_word = self.graph_builder.get_head_word(mention)
        relative_span = (span[0] - sentence_span[0],
                         span[1] - sentence_span[0])
        for prev_mention in prev_mentions:
            # Not check with itself
            if prev_mention[ID] == mention[ID]:
                continue
            # Check if those have the same head
            prev_head_word = self.graph_builder.get_head_word(prev_mention)
            if head_word[ID] == prev_head_word[ID] and\
                    self.graph_builder.is_inside(span, prev_mention[SPAN]):
                if "," in mention[FORM]:
                    return True
                # If the next word is a comma, it may be in a  enumeration
                if self.next_comma and (relative_span[1] + 1 <
                                        len(sentence_words)):
                    next_word = sentence_words[relative_span[1] + 1]
                    if pos_tags.conjunction(
                            next_word[POS]) or next_word[FORM] == ",":
                        if self.graph_builder.is_inside(
                                next_word[SPAN], prev_mention[SPAN]):
                            self.logger.debug(
                                "NO filtered inside an ENUMERATION/APPOSITION:(%s)",
                                prev_mention[FORM])
                            continue
                last_word = sentence_words[relative_span[1]]
                # If the last word of the mention is a comma, it may be in a enumeration
                if self.end_comma:
                    if pos_tags.conjunction(
                            last_word[POS]) or last_word[FORM] == ",":
                        self.logger.debug(
                            "NO filtered inside an ENUMERATION/APPOSITION:(%s)",
                            prev_mention[FORM])
                        continue
                # If the prev word is a comma, it may be in a enumeration
                if self.prev_comma and (relative_span[0] - 1 > 0):
                    prev_word = sentence_words[relative_span[0] - 1]
                    if pos_tags.conjunction(
                            prev_word[POS]) or prev_word[FORM] == ",":
                        self.logger.debug(
                            "NO filtered inside an ENUMERATION:(%s)",
                            prev_mention[FORM])
                        continue
                self.logger.debug(
                    "Filtered: have same head word %s(%s) prev:%s",
                    mention[FORM], mention[ID], prev_mention[ID])
                return True
        return False
コード例 #2
0
    def _catch_mention(self, mention_candidate):
        """ check if the mention is part of an enumeration.

        :param mention_candidate : The mention candidate to test.
        :return: True or False.
        """

        # mention is usable NP|NNP|NML
        mention_pos = mention_candidate.get(POS)
        mention_tag = mention_candidate.get(TAG)
        if not (pos_tags.enumerable_mention_words(mention_pos) or constituent_tags.enumerable(mention_tag)):
            return False
        # parent is NP
        mention_candidate_parent = self.graph_builder.get_syntactic_parent(
            mention_candidate)
        if not constituent_tags.noun_phrase(
                mention_candidate_parent.get(TAG)):
            return False
        if ner_tags.mention_ner(mention_candidate_parent.get(NER)):
            return False
        # Search if the next brother is usable
        siblings = self.graph_builder.get_syntactic_sibling(
            mention_candidate)
        position = siblings.index(mention_candidate)
        # Search for a coma or a conjunction between mention and the end
        for index, brother in enumerate(siblings[position+1:]):
            brother_pos = brother.get(POS)
            if pos_tags.conjunction(brother_pos) or brother[FORM] == ",":
                # Check if next to conjunction (or comma) exist a
                # enumerable sibling
                for post_comma_brother in siblings[index + 1:]:
                    brother_pos = post_comma_brother.get(POS)
                    brother_tag = post_comma_brother.get(TAG)
                    if pos_tags.enumerable_mention_words(brother_pos) or\
                            constituent_tags.noun_phrase(brother_tag):
                        self.logger.debug(
                            "Mention is inside enumeration(Forward): %s",
                            mention_candidate[FORM])
                        return True
        # Check comma or conjunction before mention and previous sibling is usable
        for index, brother in enumerate(siblings[:position]):
            brother_pos = brother.get(POS)
            if pos_tags.conjunction(brother_pos) or brother[FORM] == ",":
                for post_comma_brother in siblings[:index]:
                    post_comma_brother_pos = post_comma_brother.get(POS)
                    post_comma_brother_tag = post_comma_brother.get(TAG)
                    if pos_tags.enumerable_mention_words(post_comma_brother_pos) or \
                            constituent_tags.noun_phrase(post_comma_brother_tag):
                        self.logger.debug(
                            "Mention is inside enumeration(Backward): %s",
                            mention_candidate[FORM])
                        return True
        return False
コード例 #3
0
def is_enumeration(graph_builder, mention):
    """ Check if the mention is an enumeration

        :param mention:  The mention that can be a enumeration.

        :return: True or false
        """
    mention_words = graph_builder.get_words(mention)
    last_comma = 0
    last_conjuction = 0
    for index, word in enumerate(mention_words):
        if word[FORM] == ",":
            last_comma = index
        if pos_tags.conjunction(word.get(POS)):
            last_conjuction = index
    if last_conjuction and last_conjuction > last_comma:
        return True
    return False
コード例 #4
0
def is_enumeration(graph_builder,  constituent):
    """ Check if the constituent is a enumeration.
    :param constituent: The constituent to check
    :return: True or False
    """
    coordination = False
    np_pre_coordination = False
    for child in graph_builder.get_syntactic_children_sorted(constituent):
        child_tag = child.get(TAG)
        if constituent_tags.noun_phrase(child_tag):
            if coordination:
                return True
            else:
                np_pre_coordination = True
        else:
            child_pos = child.get(POS)
            if pos_tags.conjunction(child_pos) and np_pre_coordination:
                coordination = True
    return False