Beispiel #1
0
    def get_candidates(self, text_order, candidate_order, mention, index_sent):
        """ Gets the candidates ordered in a for the sieve check.

        :param text_order: The list of sentences that contain the list of mentions that form the text.
        :param candidate_order: The list of sentences that contain the list of mentions that form the text in bts order.
        :param mention: The mention whose candidates whe need.
        :param index_sent: The index of the current sentence.

        @rtype : list
        :return: A list of ordered candidates.
        """

        mention_index = [c[ID] for c in candidate_order[index_sent]
                         ].index(mention["id"])
        if len(candidate_order[index_sent][mention_index]["entity"]
               [1]) == 1 and self.is_pronoun(mention):
            self.logger.debug("ORDERING: pronoun order")
            sentence_candidates = self.pronoun_order(
                candidate_order[index_sent][:mention_index], mention)
            other_candidates = [
                m for s in reversed(text_order[:index_sent]) for m in s
            ]
            if pronouns.relative(mention[FORM].lower()):
                self.logger.debug("ORDERING: Relative pronoun order")
                sentence_candidates.reverse()
            return sentence_candidates + other_candidates
        else:
            return super(PronounSieve,
                         self).get_candidates(text_order, candidate_order,
                                              mention, index_sent)
Beispiel #2
0
def is_relative_pronoun(graph_builder, first_constituent, second_constituent):
    """ Check if tho constituents are in relative pronoun construction.
    Also mark they.

    :param graph_builder: The graph manager.
    :param first_constituent:
    :param second_constituent:
    :return: Boolean
    """
    # NP < (NP=m1 $.. (SBAR < (WHNP < WP|WDT=m2)))
    if not graph_builder.same_sentence(first_constituent, second_constituent):
        return False
    if not pronouns.relative(second_constituent[FORM].lower()):
        return False
    if first_constituent[SPAN] > second_constituent[SPAN]:
        return False
    enclosing_np = graph_builder.get_syntactic_parent(first_constituent)

    upper = graph_builder.get_syntactic_parent(second_constituent)
    while upper and (upper[graph_builder.node_type] !=
                     graph_builder.root_type):
        if graph_builder.is_inside(upper[SPAN], enclosing_np[SPAN]):
            upper = graph_builder.get_syntactic_parent(upper)
        elif upper[ID] == enclosing_np[ID]:
            # TODO check path element
            return True
        else:
            return False

    return False
Beispiel #3
0
    def are_coreferent(self, entity, mention, candidate_entity, candidate):
        """A pronoun is coreferent with a candidate?

        :param mention: The selected mention to represent the entity.
        :param entity: The entity that mention is part.
        :param candidate: The candidate that may corefer the entity.
        :param candidate_entity: The entity that candidate is part of it.

        :return: True or false

        """
        if pronouns.relative(candidate[FORM]):
            return False

        if candidate.get(PLEONASTIC, False):
            return False

        if self.FORBID_POSSESSIVES \
                and self.is_possessive(mention):
            return False

        if self.RESTRICT_POSSESSIVES_WITH_POSSESSIONS \
                and self.is_pronoun(mention) and self.is_possessive(mention)\
                and self.starts_with_possessive(candidate)\
                and not self.is_pronoun(candidate):
            return False

        if self.RESTRICT_ADJACENT and mention[SPAN][
                0] == candidate[SPAN][1] + 1:
            return False

        return super(SpanishPronounMatch,
                     self).are_coreferent(entity, mention, candidate_entity,
                                          candidate)
Beispiel #4
0
    def extract_and_mark(self, mention):
        """ Determine the type of the mention. Also check some mention related
        features.

        :param mention: The mention to be classified.
        """
        words = self.graph_builder.get_words(mention)
        head = self.graph_builder.get_head_word(mention)
        head_pos = head[POS]
        head_form = head[FORM].lower()
        head_word_ner = head.get(HEAD_OF_NER)
        first_form = words[0][FORM].lower()
        if pronouns.relative(first_form) and len(words) == 1:
            mention[RELATIVE_PRONOUN] = True
        else:
            mention[RELATIVE_PRONOUN] = False

        if determiners.indefinite_articles(first_form):
            mention[STARTED_BY_INDEFINITE_ARTICLE] = True
        else:
            mention[STARTED_BY_INDEFINITE_ARTICLE] = False

        if pronouns.indefinite(first_form):
            mention[STARTED_BY_INDEFINITE_PRONOUN] = True
        else:
            mention[STARTED_BY_INDEFINITE_PRONOUN] = False
        # Enumeration mention
        if rules.is_enumeration(self.graph_builder, mention):
            self._set_mention_type(mention, ENUMERATION_MENTION)
        # Pronoun mention
        elif (len(words) == 1 and pos_tags.pronoun(head_pos)) or\
                (len(words) == 1 and (pronouns.all(head_form) or pronouns.relative(head_form)) and
                 # not ner_tags.mention_ner(head_word_ner)):
                 True):
            self._set_mention_type(mention, PRONOUN_MENTION)
        # Proper Mention
        elif pos_tags.proper_noun(head_pos):  # or ner_tags.all(head_word_ner):
            self._set_mention_type(mention, PROPER_MENTION)
        # In other case is nominal
        else:
            self._set_mention_type(mention, NOMINAL_MENTION)
Beispiel #5
0
    def validate(self, mention, entity):
        """ Only pronouns can be used for this sieve

        :param mention: The mention to check.
        :param entity: The entity of the mention.
        """
        if not super(SpanishPronounMatch, self).validate(mention, entity):
            return False

        if pronouns.relative(mention[FORM]):
            return False

        if mention[PLEONASTIC]:
            return False

        return True
Beispiel #6
0
def is_appositive_construction_child(graph_builder, constituent):
    """ Check if the mention is in a appositive construction.

    "NP=m1 < (NP=m2 $.. (/,/ $.. NP=m3))";
    "NP=m1 < (NP=m2 $.. (/,/ $.. (SBAR < (WHNP < WP|WDT=m3))))";
    "/^NP(?:-TMP|-ADV)?$/=m1 < (NP=m2 $- /^,$/ $-- NP=m3 !$ CC|CONJP)";
    "/^NP(?:-TMP|-ADV)?$/=m1 <
                  (PRN=m2 < (NP < /^NNS?|CD$/ $-- /^-LRB-$/ $+ /^-RRB-$/))";

    :param graph_builder: The graphBuilder
    :param constituent: The mention to check
    """
    constituent = constituent.get("constituent", constituent)

    # mention is inside a NP
    # TODO Improve the precision
    parent = graph_builder.get_syntactic_parent(constituent)
    if not constituent_tags.noun_phrase(parent[TAG]):
        return False
    siblings = graph_builder.get_syntactic_sibling(constituent)
    # Check if
    while siblings:
        actual = siblings.pop(0)
        if actual == constituent:
            break
    else:
        return False

    while siblings:
        actual = siblings.pop(0)
        if actual[FORM] == ",":
            break
    else:
        return False

    while siblings:
        actual = siblings.pop(0)
        if constituent_tags.noun_phrase(actual.get(TAG)):
            return parent
        if pronouns.relative(graph_builder.get_words(actual)[0].get("form")):
            return parent

    return False
Beispiel #7
0
    def process_graph(self):
        from corefgraph.multisieve.features.constants import MENTION
        """ Prepare the graph for output.
        """
        self.meta[self.graph_builder.doc_type] = self.graph_builder.get_doc_type()
        from corefgraph.resources.tagset import pos_tags
        from corefgraph.resources.dictionaries import pronouns
        self.meta["sentences"] = {
            'words_histogram': [len(self.graph_builder.get_words(sentence))
                                for sentence in self.graph_builder.get_all_sentences()],
            'pronouns_histogram': [len([word for word in self.graph_builder.get_words(sentence) if(pos_tags.pronoun(word[POS]) or pronouns.all(word[FORM]) or pronouns.relative(word[FORM]))])
                                   for sentence in self.graph_builder.get_all_sentences()],
            'named_entities_histogram': [len(self.graph_builder.get_sentence_named_entities(sentence))
                                         for sentence in self.graph_builder.get_all_sentences()],
            'mentions_histogram': [len(self.graph_builder.get_sentence_gold_mentions(sentence))
                                   for sentence in self.graph_builder.get_all_sentences()]
        }

        self.meta["features"] = {
            'counters': defaultdict(Counter),
            'mentions': defaultdict(dict)}
        for index, sentence in enumerate(self.coreference_processor.mentions_textual_order):
            self.logger.debug("Featuring Sentence %d", index)
            sentence_mentions = []
            # self.meta["sentences"].append(sentence_mentions)
            for mention in sentence:
                # Store mentions id in the meta
                sentence_mentions.append(mention[ID])
                self.feature_extractor.characterize_mention(mention)
        # Resolve the coreference
        self.logger.debug("Resolve Coreference...")
        self.coreference_processor.resolve_text()

        self.meta["overall"] = {
            'words': Counter([word[POS] for word in self.graph_builder.get_all_words()]),
            'namedEntities': Counter([ne[NER] for ne in self.graph_builder.get_all_named_entities()]),
            'constituents': Counter([constituent[TAG] for constituent in self.graph_builder.get_all_constituents()]),
            'mentions': Counter([mention.get(MENTION) for mention in self.graph_builder.get_all_gold_mentions()]),
            'mentions_size': [len(self.graph_builder.get_words(mention)) for mention in self.graph_builder.get_all_gold_mentions()],
            'mentions_deep': [mention.get(CONSTITUENT, {DEEP: -1})[DEEP] for mention in self.graph_builder.get_all_gold_mentions()],
            'mentions_per_entity': Counter([mention[GOLD_ENTITY] for mention in self.graph_builder.get_all_gold_mentions()]).values()
        }