Exemple #1
0
def show_instances(class_name=''):
    name = 'train'
    if name == 'train':
        filename = Const.origin_all_train_filename
    elif name == 'dev':
        filename = Const.origin_all_dev_filename
    f = open(filename, 'r')
    content = f.readlines()
    html_doc = ' '.join(content)
    sentences_string, triples_string = utils.parse(html_doc)
    sentences_words = utils.sentence_tokenize(sentences_string)
    position_triples = utils.find_entity_position(sentences_words,
                                                  triples_string)
    sentences_word_id, sentence_triples_id = utils.turn2id(
        sentences_words, position_triples)
    utils.triples_type(sentence_triples_id)
    if class_name == 'normal':
        func = utils.is_normal_triple
    elif class_name == 'single_entity_overlap':
        func = utils.is_over_lapping
    else:
        func = utils.is_multi_label

    words2id = utils.load_words2id()
    id2words = {v: k for k, v in words2id.items()}
    for sent_words_id, triples_id in zip(sentences_word_id,
                                         sentence_triples_id):
        if func(triples_id, is_relation_first=False):
            print ' '.join([id2words[x] for x in sent_words_id])
            print triples_id
            print '-----------------------------------'
Exemple #2
0
def run_word_vectors():
    print 'reading nyt_vec.bin'
    all_w2vec = utils.read_vec_bin()
    words2id = utils.load_words2id()
    print 'prepare w2vec'
    w2vec = utils.word_vectors(words2id, all_w2vec)
    print 'dumping'
    json.dump(w2vec, open(Const.words_id2vector_filename, 'w'))
Exemple #3
0
def run_word_vectors():
    print('reading nyt_vec.bin')
    all_w2vec = utils.read_vec_bin()
    words2id = utils.load_words2id()
    print('prepare w2vec')
    w2vec = utils.word_vectors(words2id, all_w2vec)
    print('dumping')
    json.dump(w2vec, open(Const.words_id2vector_filename, 'w', encoding='utf-8'))