コード例 #1
0
ファイル: agregator_ner_utils.py プロジェクト: bfurlan/IE4MAS
def add_ner_tags_dataframe_all_tokens_at_once(df, ner_extractor):
    """

    :param df:
    :param ner_extractor:
    :return:
    """
    ner_extractor_name = ner_extractor.__name__

    tokens = list(df.word)
    # convert to unicode
    tokens = map(unicode, tokens)
    # tag ners
    ner_list = ner_extractor(tokens)
    ner_list = strip_tag_characters(ner_list)

    assert len(ner_list) == len(df.word)

    df.loc[:, ner_extractor_name] = ner_list

    return df
コード例 #2
0
ファイル: agregator_ner_utils.py プロジェクト: bfurlan/IE4MAS
def add_ner_tags_dataframe_by_sentences(df, ner_extractor):
    """

    :param df:
    :param ner_extractor:
    :return:
    """
    ner_extractor_name = ner_extractor.__name__

    ner_list = []
    for sent_ind in df.index.levels[0]:
        tokens = df.loc[sent_ind].word
        # convert to unicode
        tokens = map(unicode, tokens)
        # tag ners
        res = ner_extractor(tokens)
        res = strip_tag_characters(res)
        ner_list += res

    assert len(ner_list) == len(df.word)

    df.loc[:, ner_extractor_name] = ner_list

    return df
コード例 #3
0
ファイル: agregator_rel_utils.py プロジェクト: bfurlan/IE4MAS
    :param start_index:
    :param tag_type:
    :return:
    """
    ranges = []
    for key, group in groupby(enumerate(ind_list), lambda (index, item): index - item):
        group = map(itemgetter(1), group)
        if len(group) > 1:
            ranges.append((xrange(group[0]-start_index, group[-1] - start_index+1), tag_type))
        else:
            ranges.append((xrange(group[0] - start_index, group[0] - start_index+1), tag_type))
    return ranges


full_tags = ["PERSON", "LOCATION", "ORGANIZATION", "MISC"]
striped_tags = strip_tag_characters(full_tags)




def get_relations(entities, sent, threshold=1):
    """

    :param entities: MITIE xrange entities
    :param sent: dataframe of sentence
    :return: dict of {'sentence': sent, 'entities': entities_dict, 'relations': relations}
    """
    # all possible combinations
    neighboring_entities = [(ent1[0], ent2[0]) for ent1 in entities for ent2 in entities if ent1 != ent2]
    tokens = map(unicode, sent.word.tolist())