def filter(self, mention, prev_mentions): """ check if the mention is inside a mention and have the same head. :param mention: The mention to test. :return: True or False. """ sentence = self.graph_builder.get_root(mention) sentence_words = self.graph_builder.get_sentence_words(sentence) sentence_span = sentence[SPAN] span = mention[SPAN] head_word = self.graph_builder.get_head_word(mention) relative_span = (span[0] - sentence_span[0], span[1] - sentence_span[0]) for prev_mention in prev_mentions: # Not check with itself if prev_mention[ID] == mention[ID]: continue # Check if those have the same head prev_head_word = self.graph_builder.get_head_word(prev_mention) if head_word[ID] == prev_head_word[ID] and\ self.graph_builder.is_inside(span, prev_mention[SPAN]): if "," in mention[FORM]: return True # If the next word is a comma, it may be in a enumeration if self.next_comma and (relative_span[1] + 1 < len(sentence_words)): next_word = sentence_words[relative_span[1] + 1] if pos_tags.conjunction( next_word[POS]) or next_word[FORM] == ",": if self.graph_builder.is_inside( next_word[SPAN], prev_mention[SPAN]): self.logger.debug( "NO filtered inside an ENUMERATION/APPOSITION:(%s)", prev_mention[FORM]) continue last_word = sentence_words[relative_span[1]] # If the last word of the mention is a comma, it may be in a enumeration if self.end_comma: if pos_tags.conjunction( last_word[POS]) or last_word[FORM] == ",": self.logger.debug( "NO filtered inside an ENUMERATION/APPOSITION:(%s)", prev_mention[FORM]) continue # If the prev word is a comma, it may be in a enumeration if self.prev_comma and (relative_span[0] - 1 > 0): prev_word = sentence_words[relative_span[0] - 1] if pos_tags.conjunction( prev_word[POS]) or prev_word[FORM] == ",": self.logger.debug( "NO filtered inside an ENUMERATION:(%s)", prev_mention[FORM]) continue self.logger.debug( "Filtered: have same head word %s(%s) prev:%s", mention[FORM], mention[ID], prev_mention[ID]) return True return False
def _catch_mention(self, mention_candidate): """ check if the mention is part of an enumeration. :param mention_candidate : The mention candidate to test. :return: True or False. """ # mention is usable NP|NNP|NML mention_pos = mention_candidate.get(POS) mention_tag = mention_candidate.get(TAG) if not (pos_tags.enumerable_mention_words(mention_pos) or constituent_tags.enumerable(mention_tag)): return False # parent is NP mention_candidate_parent = self.graph_builder.get_syntactic_parent( mention_candidate) if not constituent_tags.noun_phrase( mention_candidate_parent.get(TAG)): return False if ner_tags.mention_ner(mention_candidate_parent.get(NER)): return False # Search if the next brother is usable siblings = self.graph_builder.get_syntactic_sibling( mention_candidate) position = siblings.index(mention_candidate) # Search for a coma or a conjunction between mention and the end for index, brother in enumerate(siblings[position+1:]): brother_pos = brother.get(POS) if pos_tags.conjunction(brother_pos) or brother[FORM] == ",": # Check if next to conjunction (or comma) exist a # enumerable sibling for post_comma_brother in siblings[index + 1:]: brother_pos = post_comma_brother.get(POS) brother_tag = post_comma_brother.get(TAG) if pos_tags.enumerable_mention_words(brother_pos) or\ constituent_tags.noun_phrase(brother_tag): self.logger.debug( "Mention is inside enumeration(Forward): %s", mention_candidate[FORM]) return True # Check comma or conjunction before mention and previous sibling is usable for index, brother in enumerate(siblings[:position]): brother_pos = brother.get(POS) if pos_tags.conjunction(brother_pos) or brother[FORM] == ",": for post_comma_brother in siblings[:index]: post_comma_brother_pos = post_comma_brother.get(POS) post_comma_brother_tag = post_comma_brother.get(TAG) if pos_tags.enumerable_mention_words(post_comma_brother_pos) or \ constituent_tags.noun_phrase(post_comma_brother_tag): self.logger.debug( "Mention is inside enumeration(Backward): %s", mention_candidate[FORM]) return True return False
def is_enumeration(graph_builder, mention): """ Check if the mention is an enumeration :param mention: The mention that can be a enumeration. :return: True or false """ mention_words = graph_builder.get_words(mention) last_comma = 0 last_conjuction = 0 for index, word in enumerate(mention_words): if word[FORM] == ",": last_comma = index if pos_tags.conjunction(word.get(POS)): last_conjuction = index if last_conjuction and last_conjuction > last_comma: return True return False
def is_enumeration(graph_builder, constituent): """ Check if the constituent is a enumeration. :param constituent: The constituent to check :return: True or False """ coordination = False np_pre_coordination = False for child in graph_builder.get_syntactic_children_sorted(constituent): child_tag = child.get(TAG) if constituent_tags.noun_phrase(child_tag): if coordination: return True else: np_pre_coordination = True else: child_pos = child.get(POS) if pos_tags.conjunction(child_pos) and np_pre_coordination: coordination = True return False