def add_ner_tags_dataframe_all_tokens_at_once(df, ner_extractor): """ :param df: :param ner_extractor: :return: """ ner_extractor_name = ner_extractor.__name__ tokens = list(df.word) # convert to unicode tokens = map(unicode, tokens) # tag ners ner_list = ner_extractor(tokens) ner_list = strip_tag_characters(ner_list) assert len(ner_list) == len(df.word) df.loc[:, ner_extractor_name] = ner_list return df
def add_ner_tags_dataframe_by_sentences(df, ner_extractor): """ :param df: :param ner_extractor: :return: """ ner_extractor_name = ner_extractor.__name__ ner_list = [] for sent_ind in df.index.levels[0]: tokens = df.loc[sent_ind].word # convert to unicode tokens = map(unicode, tokens) # tag ners res = ner_extractor(tokens) res = strip_tag_characters(res) ner_list += res assert len(ner_list) == len(df.word) df.loc[:, ner_extractor_name] = ner_list return df
:param start_index: :param tag_type: :return: """ ranges = [] for key, group in groupby(enumerate(ind_list), lambda (index, item): index - item): group = map(itemgetter(1), group) if len(group) > 1: ranges.append((xrange(group[0]-start_index, group[-1] - start_index+1), tag_type)) else: ranges.append((xrange(group[0] - start_index, group[0] - start_index+1), tag_type)) return ranges full_tags = ["PERSON", "LOCATION", "ORGANIZATION", "MISC"] striped_tags = strip_tag_characters(full_tags) def get_relations(entities, sent, threshold=1): """ :param entities: MITIE xrange entities :param sent: dataframe of sentence :return: dict of {'sentence': sent, 'entities': entities_dict, 'relations': relations} """ # all possible combinations neighboring_entities = [(ent1[0], ent2[0]) for ent1 in entities for ent2 in entities if ent1 != ent2] tokens = map(unicode, sent.word.tolist())