def show_instances(class_name=''): name = 'train' if name == 'train': filename = Const.origin_all_train_filename elif name == 'dev': filename = Const.origin_all_dev_filename f = open(filename, 'r') content = f.readlines() html_doc = ' '.join(content) sentences_string, triples_string = utils.parse(html_doc) sentences_words = utils.sentence_tokenize(sentences_string) position_triples = utils.find_entity_position(sentences_words, triples_string) sentences_word_id, sentence_triples_id = utils.turn2id( sentences_words, position_triples) utils.triples_type(sentence_triples_id) if class_name == 'normal': func = utils.is_normal_triple elif class_name == 'single_entity_overlap': func = utils.is_over_lapping else: func = utils.is_multi_label words2id = utils.load_words2id() id2words = {v: k for k, v in words2id.items()} for sent_words_id, triples_id in zip(sentences_word_id, sentence_triples_id): if func(triples_id, is_relation_first=False): print ' '.join([id2words[x] for x in sent_words_id]) print triples_id print '-----------------------------------'
def run_word_vectors(): print 'reading nyt_vec.bin' all_w2vec = utils.read_vec_bin() words2id = utils.load_words2id() print 'prepare w2vec' w2vec = utils.word_vectors(words2id, all_w2vec) print 'dumping' json.dump(w2vec, open(Const.words_id2vector_filename, 'w'))
def run_word_vectors(): print('reading nyt_vec.bin') all_w2vec = utils.read_vec_bin() words2id = utils.load_words2id() print('prepare w2vec') w2vec = utils.word_vectors(words2id, all_w2vec) print('dumping') json.dump(w2vec, open(Const.words_id2vector_filename, 'w', encoding='utf-8'))