def create_ntee_from_compoents(dir_path): word_dict_path = dir_path + '/dict.word' word_embs_path = dir_path + '/word_embeddings.npy' entity_dict_path = dir_path + '/dict.entity' entity_embs_path = dir_path + '/entity_embeddings.npy' W_path = dir_path + '/W.npy' b_path = dir_path + '/b.npy' print('load voca and embeedings') word_voca, word_embs = utils.load_voca_embs(word_dict_path, word_embs_path) entity_voca, entity_embs = utils.load_voca_embs(entity_dict_path, entity_embs_path) config = { 'word_embeddings': word_embs, 'entity_embeddings': entity_embs, 'word_voca': word_voca, 'entity_voca': entity_voca, 'emb_dims': word_embs.shape[1] } print("word_embs.shape:", word_embs.shape, "entity_embs.shape:", entity_embs.shape) # create model print('create model') model = NTEE(config) W = np.load(W_path) b = np.load(b_path) model.linear.weight = nn.parameter(torch.FloatTensor(W).t()) model.linear.bias = nn.parameter(torch.FloatTensor(b)) return model
type=int, help="number of LBP loops", default=10) # args for debugging parser.add_argument("--print_rel", action='store_true') parser.add_argument("--print_incorrect", action='store_true') args = parser.parse_args() if __name__ == "__main__": print('load conll at', datadir) conll = D.CoNLLDataset(datadir, person_path, conll_path) print('create model') word_voca, word_embeddings = utils.load_voca_embs( voca_emb_dir + 'dict.word', voca_emb_dir + 'word_embeddings.npy') print('word voca size', word_voca.size()) snd_word_voca, snd_word_embeddings = utils.load_voca_embs( voca_emb_dir + '/glove/dict.word', voca_emb_dir + '/glove/word_embeddings.npy') print('snd word voca size', snd_word_voca.size()) entity_voca, entity_embeddings = utils.load_voca_embs( voca_emb_dir + 'dict.entity', voca_emb_dir + 'entity_embeddings_0.2_11max.npy') _, aet_entity_embeddings = utils.load_voca_embs( voca_emb_dir + 'dict.entity', voca_emb_dir + 'aligned_atee_entity_vec_100d.npy') aet_word_voca, aet_word_embeddings = utils.load_voca_embs( voca_emb_dir + 'atee_type_dict_100d.txt', voca_emb_dir + 'atee_type_vec_100d.npy')
type=int, help="number of LBP loops", default=10) # args for debugging parser.add_argument("--print_rel", action='store_true') parser.add_argument("--print_incorrect", action='store_true') args = parser.parse_args() if __name__ == "__main__": print('load conll at', datadir) conll = D.CoNLLDataset(datadir, person_path, conll_path) print('create model') word_voca, word_embeddings = utils.load_voca_embs( voca_emb_dir + 'dict.word', voca_emb_dir + 'word_embeddings.npy') print('word voca size', word_voca.size()) snd_word_voca, snd_word_embeddings = utils.load_voca_embs( voca_emb_dir + '/glove/dict.word', voca_emb_dir + '/glove/word_embeddings.npy') print('snd word voca size', snd_word_voca.size()) dhl_voca_emb_dir = '/home/hldai/data/el/AIDA/deeped/' # entity_voca, entity_embeddings = utils.load_voca_embs(voca_emb_dir + 'dict.entity', # voca_emb_dir + 'entity_embeddings.npy') # entity_voca, entity_embeddings = utils.load_voca_embs(voca_emb_dir + 'entity-vocab-aida.txt', # voca_emb_dir + 'entity-vecs-aida.npy') # entity_voca, entity_embeddings = utils.load_voca_embs(dhl_voca_emb_dir + 'mrel-dhl-entity-vocab.txt', # dhl_voca_emb_dir + 'entity-vecs-dhl.npy') entity_voca, entity_embeddings = utils.load_voca_embs( dhl_voca_emb_dir + 'mrel-dhl-entity-vocab.txt',
default=1000000) parser.add_argument("--dev_enr", type=str, help="dev net path", default=None) # args for debugging parser.add_argument("--print_rel", action='store_true') parser.add_argument("--print_incorrect", action='store_true') args = parser.parse_args() if (args.semisup or args.multi_instance) and args.n_negs < 1: raise Exception("multi instance requires at least 1 negative sample") if __name__ == "__main__": print('create model') word_voca, word_embeddings = utils.load_voca_embs(voca_emb_dir + 'dict.word', voca_emb_dir + 'word_embeddings.npy') print('word voca size', word_voca.size()) snd_word_voca, snd_word_embeddings = utils.load_voca_embs(voca_emb_dir + '/glove/dict.word', voca_emb_dir + '/glove/word_embeddings.npy') print('snd word voca size', snd_word_voca.size()) entity_voca, entity_embeddings = utils.load_voca_embs(voca_emb_dir + 'dict.entity', voca_emb_dir + 'entity_embeddings.npy') print('entity voca size', entity_voca.size()) config = {'hid_dims': args.hid_dims, 'emb_dims': entity_embeddings.shape[1], 'freeze_embs': True, 'tok_top_n': args.tok_top_n, 'margin': args.margin, 'word_voca': word_voca,
import sys from nel.vocabulary import Vocabulary import nel.utils as utils import numpy as np if __name__ == "__main__": core_voca_path = sys.argv[1] word_embs_dir = sys.argv[2] print('load core voca from', core_voca_path) core_voca = Vocabulary.load(core_voca_path) print('load full voca and embs') full_voca, full_embs = utils.load_voca_embs( word_embs_dir + '/all_dict.word', word_embs_dir + '/all_word_embeddings.npy') print('select word ids') selected = [] for word in core_voca.id2word: word_id = full_voca.word2id.get(word, -1) if word_id >= 0: selected.append(word_id) print('save...') selected_embs = full_embs[selected, :] np.save(word_embs_dir + '/word_embeddings', selected_embs) with open(word_embs_dir + '/dict.word', 'w', encoding='utf8') as f: for i in selected: f.write(full_voca.id2word[i] + '\t1000\n')
type=int, help="number of LBP loops", default=10) # args for debugging parser.add_argument("--print_rel", action='store_true') parser.add_argument("--print_incorrect", action='store_true') args = parser.parse_args() if __name__ == "__main__": print('load conll at', datadir) conll = D.CoNLLDataset(datadir, person_path, conll_path) print('create model') word_voca, word_embeddings = utils.load_voca_embs( voca_emb_dir + 'dict.word', voca_emb_dir + 'word_embeddings.npy') print('word voca size', word_voca.size()) snd_word_voca, snd_word_embeddings = utils.load_voca_embs( voca_emb_dir + '/glove/dict.word', voca_emb_dir + '/glove/word_embeddings.npy') print('snd word voca size', snd_word_voca.size()) dhl_voca_emb_dir = '/home/data/hldai/el/AIDA/deeped/' deeped_emb_dir = '/home/data/hldai/el/deepedemb/' # entity_voca, entity_embeddings = utils.load_voca_embs(voca_emb_dir + 'dict.entity', # voca_emb_dir + 'entity_embeddings.npy') # entity_voca, entity_embeddings = utils.load_voca_embs(voca_emb_dir + 'entity-vocab-aida.txt', # voca_emb_dir + 'entity-vecs-aida.npy') entity_voca, entity_embeddings = utils.load_voca_embs( deeped_emb_dir + 'mrel-aidatac-entity-vocab.txt', deeped_emb_dir + 'entity-vecs-aidatac.npy')