Example #1
0
def men_articles_coref_psql(data_source):
    men_art_tuple = Dictionaries.db_conn_obj.fetch_all_men_articles_source(data_source)
    men_ent_dict = dict(men_art_tuple)
    doc_idx = 0
    coref = Coreference()
    for men, m_text in men_ent_dict.items():
        doc_idx += 1
        print("Processsing doc {}, progress {}/{} = {:.2f}".format(men, doc_idx, 500, doc_idx / 500))
        doc_id_hash = Dictionaries.doc_to_hash(m_text)
        coref.coref_input_doc(m_text, men, doc_id_hash, data_source)
Example #2
0
        mention, ent_tok_entropy_dict, redis_key='sm50')
    Dictionaries.redis_db_obj.save_men_ents_tokens_idf_redis(mention,
                                                             ent_tok_idf_dict,
                                                             redis_key='sm50')
    Dictionaries.redis_db_obj.save_men_ents_tokens_tf_redis(mention,
                                                            ent_tok_tf_dict,
                                                            redis_key='sm50')
    return ent_tok_entropy_dict, ent_tok_idf_dict, ent_tok_tf_dict


def cal_fq(tok_idxs_dict):
    total = 0
    for tok, idxs in tok_idxs_dict.items():
        total += len(idxs)
    return total


if __name__ == '__main__':
    # Dictionaries.spacy_init = spacy.load('en')
    Dictionaries.init_dictionaries()
    # ents = ['Japan', 'Beijing']
    # ss = Dictionaries.spacy_init.tokenizer('Beijing is a city')
    # print(ss)
    # ent_article_tok(ents)
    # texts = 'Beijig is a great city, EB, Japanese, Asia, Japanese, Asia, Japanese, Asia, Japanese, Asia, Japanese, Asia, Pacific sdfdf     Japan japnasfd dfa Japan.'
    # ent_tok_dict, men_toks_idx = ent_article_tok(ents), men_toks('Beijing', texts)
    # gen_comm_tokens_web(ent_tok_dict, men_toks_idx)
    # ent_articles_tok_redis()
    # men_articles_tok_redis()
    # ent_articles_tok_redis_single_test()