def main(): Logger.GLOBAL_LOG_LEVEL = LogLevel.INFO #Config.backend = Backends.TENSORFLOW Config.backend = Backends.TORCH Config.cuda = True Config.dropout = 0.1 Config.hidden_size = 128 Config.embedding_size = 256 Config.L2 = 0.00003 do_process = False if do_process: preprocess_SNLI(delete_data=True) p = Pipeline('snli_example') vocab = p.state['vocab'] vocab.load_from_disk() batch_size = 128 if Config.backend == Backends.TENSORFLOW: from spodernet.backends.tfbackend import TensorFlowConfig TensorFlowConfig.init_batch_size(batch_size) train_batcher = StreamBatcher('snli_example', 'snli_train', batch_size, randomize=True, loader_threads=8) #train_batcher.subscribe_to_batch_prepared_event(SomeExpensivePreprocessing()) dev_batcher = StreamBatcher('snli_example', 'snli_dev', batch_size) test_batcher = StreamBatcher('snli_example', 'snli_test', batch_size) #train_batcher.subscribe_to_events(AccuracyHook('Train', print_every_x_batches=1000)) train_batcher.subscribe_to_events( LossHook('Train', print_every_x_batches=100)) train_batcher.subscribe_to_events( AccuracyHook('Train', print_every_x_batches=100)) dev_batcher.subscribe_to_events( AccuracyHook('Dev', print_every_x_batches=100)) dev_batcher.subscribe_to_events(LossHook('Dev', print_every_x_batches=100)) eta = ETAHook(print_every_x_batches=100) train_batcher.subscribe_to_events(eta) train_batcher.subscribe_to_start_of_epoch_event(eta) net = Net(vocab.num_embeddings, vocab.num_labels) if Config.cuda: net.cuda() epochs = 10 opt = torch.optim.Adam(net.parameters(), lr=0.001) net.train() for epoch in range(epochs): for str2var in train_batcher: opt.zero_grad() loss, argmax = net(str2var) loss.backward() opt.step() train_batcher.state.loss = loss train_batcher.state.targets = str2var['target'] train_batcher.state.argmax = argmax net.eval() for i, str2var in enumerate(dev_batcher): t = str2var['target'] idx = str2var['index'] loss, argmax = net(str2var) dev_batcher.state.loss = loss dev_batcher.state.targets = str2var['target'] dev_batcher.state.argmax = argmax
def main(): if Config.process: preprocess(Config.dataset, delete_data=True) input_keys = ['e1', 'rel', 'e2', 'e2_multi1', 'e2_multi2'] p = Pipeline(Config.dataset, keys=input_keys) p.load_vocabs() vocab = p.state['vocab'] num_entities = vocab['e1'].num_token dict_tokentoid, dict_idtotoken = vocab['e1'].tokendicts() dict_reltoid, dict_idtorel = vocab['rel'].tokendicts() num_rel = vocab['rel'].num_token train_batcher = StreamBatcher(Config.dataset, 'train', Config.batch_size, randomize=True, keys=input_keys) dev_rank_batcher = StreamBatcher(Config.dataset, 'dev_ranking', Config.batch_size, randomize=False, loader_threads=4, keys=input_keys, is_volatile=True) test_rank_batcher = StreamBatcher(Config.dataset, 'test_ranking', Config.batch_size, randomize=False, loader_threads=4, keys=input_keys, is_volatile=True) if Config.model_name is None: model = ConvE(vocab['e1'].num_token, vocab['rel'].num_token) elif Config.model_name == 'ConvE': model = ConvE(vocab['e1'].num_token, num_rel) elif Config.model_name == 'DistMult': model = DistMult(vocab['e1'].num_token, vocab['rel'].num_token) elif Config.model_name == 'ComplEx': model = Complex(vocab['e1'].num_token, vocab['rel'].num_token) else: log.info('Unknown model: {0}', Config.model_name) raise Exception("Unknown model!") train_batcher.at_batch_prepared_observers.insert(1,TargetIdx2MultiTarget(num_entities, 'e2_multi1', 'e2_multi1_binary')) eta = ETAHook('train', print_every_x_batches=100) train_batcher.subscribe_to_events(eta) train_batcher.subscribe_to_start_of_epoch_event(eta) train_batcher.subscribe_to_events(LossHook('train', print_every_x_batches=100)) if Config.cuda: model.cuda() if load: model_params = torch.load(model_path) print(model) total_param_size = [] params = [(key, value.size(), value.numel()) for key, value in model_params.items()] for key, size, count in params: total_param_size.append(count) print(key, size, count) print(np.array(total_param_size).sum()) model.load_state_dict(model_params) model.eval() ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation') ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation') else: model.init() total_param_size = [] params = [value.numel() for value in model.parameters()] print(params) print(np.sum(params)) ################################################ loading model.load_state_dict(torch.load('embeddings/auto-embeddings.pt')) opt = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=Config.learning_rate, weight_decay=Config.L2) # One hot encoding buffer that you create out of the loop and just keep reusing y_onehot_e1 = torch.FloatTensor(Config.batch_size, num_entities) # One hot encoding buffer that you create out of the loop and just keep reusing y_onehot_r = torch.FloatTensor(Config.batch_size, num_rel) model.eval() train_data =[] with open('data/'+Config.dataset+'/train.txt', 'r') as f: for i, line in enumerate(f): e1, rel, e2 = line.decode('utf-8').split('\t') e1 = e1.strip()#.lower() e2 = e2.strip()#.lower() rel = rel.strip()#.lower() train_data += [[e1, rel, e2]] print len(train_data) attack_list = [] E2_list = [] with open('data/'+Config.dataset+'/test.txt', 'r') as f: for i, line in enumerate(f): e1, rel, e2 = line.decode('utf-8').split('\t') e1 = e1.strip().lower() e2 = e2.strip().lower() rel = rel.strip().lower() attack_list += [[dict_tokentoid[e1], dict_reltoid[rel], dict_tokentoid[e2]]] E2_list += [e2] print len(attack_list) E2_list = set(E2_list) E2_dict = {} for i in train_data: if i[2].lower() in E2_list: if dict_tokentoid[i[2].lower()] in E2_dict: E2_dict[dict_tokentoid[i[2].lower()]] += [[dict_tokentoid[i[0].lower()], dict_reltoid[i[1].lower()]]] else: E2_dict[dict_tokentoid[i[2].lower()]] = [[dict_tokentoid[i[0].lower()], dict_reltoid[i[1].lower()]]] str_at = [] embd_e = model.emb_e.weight.data.cpu().numpy() embd_rel = model.emb_rel.weight.data.cpu().numpy() n_t = 0 for trip in attack_list: if n_t % 500 == 0: print 'Number of processed triple: ', n_t n_t += 1 e1 = trip[0] rel = trip[1] e2_or = trip[2] e1 = torch.cuda.LongTensor([e1]) rel = torch.cuda.LongTensor([rel]) e2 = torch.cuda.LongTensor([e2_or]) pred = model.encoder(e1, rel) E2 = model.encoder_2(e2) A, B = find_best_at(-pred, E2) attack_ext = -A*pred+B*E2 if e2_or in E2_dict: nei = E2_dict[e2_or] #attack = find_best_attack(E2.data.cpu().numpy(), pred.data.cpu().numpy(), nei, embd_e, embd_rel, attack_ext) #attack = torch.autograd.Variable(torch.from_numpy(attack)).cuda().float() attack = attack_ext else: attack = attack_ext E1, R = model.decoder(attack) _, predicted_e1 = torch.max(E1, 1) _, predicted_R = torch.max(R, 1) str_at += [[str(dict_idtotoken[predicted_e1.data.cpu().numpy()[0]]), str(dict_idtorel[predicted_R.data.cpu().numpy()[0]]), str(dict_idtotoken[e2_or])]] new_train = str_at + train_data print len(new_train) with open('data/new_'+Config.dataset+'/train.txt', 'w') as f: for item in new_train: f.write("%s\n" % "\t".join(map(str, item)))
def main(args, model_path): if args.preprocess: preprocess(args.data, delete_data=True) input_keys = ['e1', 'rel', 'rel_eval', 'e2', 'e2_multi1', 'e2_multi2'] p = Pipeline(args.data, keys=input_keys) p.load_vocabs() vocab = p.state['vocab'] num_entities = vocab['e1'].num_token train_batcher = StreamBatcher(args.data, 'train', args.batch_size, randomize=True, keys=input_keys, loader_threads=args.loader_threads) dev_rank_batcher = StreamBatcher(args.data, 'dev_ranking', args.test_batch_size, randomize=False, loader_threads=args.loader_threads, keys=input_keys) test_rank_batcher = StreamBatcher(args.data, 'test_ranking', args.test_batch_size, randomize=False, loader_threads=args.loader_threads, keys=input_keys) if args.model is None: model = ConvE(args, vocab['e1'].num_token, vocab['rel'].num_token) elif args.model == 'conve': model = ConvE(args, vocab['e1'].num_token, vocab['rel'].num_token) elif args.model == 'distmult': model = DistMult(args, vocab['e1'].num_token, vocab['rel'].num_token) elif args.model == 'complex': model = Complex(args, vocab['e1'].num_token, vocab['rel'].num_token) elif args.model == 'interacte': model = InteractE(args, vocab['e1'].num_token, vocab['rel'].num_token) else: log.info('Unknown model: {0}', args.model) raise Exception("Unknown model!") train_batcher.at_batch_prepared_observers.insert(1,TargetIdx2MultiTarget(num_entities, 'e2_multi1', 'e2_multi1_binary')) eta = ETAHook('train', print_every_x_batches=args.log_interval) train_batcher.subscribe_to_events(eta) train_batcher.subscribe_to_start_of_epoch_event(eta) train_batcher.subscribe_to_events(LossHook('train', print_every_x_batches=args.log_interval)) model.cuda() if args.resume: model_params = torch.load(model_path) print(model) total_param_size = [] params = [(key, value.size(), value.numel()) for key, value in model_params.items()] for key, size, count in params: total_param_size.append(count) print(key, size, count) print(np.array(total_param_size).sum()) model.load_state_dict(model_params) model.eval() ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation') ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation') else: model.init() total_param_size = [] params = [value.numel() for value in model.parameters()] print(params) print(np.sum(params)) opt = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2) for epoch in range(args.epochs): model.train() for i, str2var in enumerate(train_batcher): opt.zero_grad() e1 = str2var['e1'] rel = str2var['rel'] e2_multi = str2var['e2_multi1_binary'].float() # label smoothing e2_multi = ((1.0-args.label_smoothing)*e2_multi) + (1.0/e2_multi.size(1)) pred = model.forward(e1, rel) loss = model.loss(pred, e2_multi) loss.backward() opt.step() train_batcher.state.loss = loss.cpu() print('saving to {0}'.format(model_path)) torch.save(model.state_dict(), model_path) model.eval() with torch.no_grad(): if epoch % 5 == 0 and epoch > 0: ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation') if epoch % 5 == 0: if epoch > 0: ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation')
def main(): if Config.process: preprocess(Config.dataset, delete_data=True) input_keys = ['e1', 'rel', 'e2', 'e2_multi1', 'e2_multi2'] p = Pipeline(Config.dataset, keys=input_keys) p.load_vocabs() vocab = p.state['vocab'] num_entities = vocab['e1'].num_token train_batcher = StreamBatcher(Config.dataset, 'train', Config.batch_size, randomize=True, keys=input_keys) dev_rank_batcher = StreamBatcher(Config.dataset, 'dev_ranking', Config.batch_size, randomize=False, loader_threads=4, keys=input_keys, is_volatile=True) test_rank_batcher = StreamBatcher(Config.dataset, 'test_ranking', Config.batch_size, randomize=False, loader_threads=4, keys=input_keys, is_volatile=True) # Load literals numerical_literals = np.load( f'data/{Config.dataset}/literals/numerical_literals.npy') # Normalize literals max_lit, min_lit = np.max(numerical_literals, axis=0), np.min(numerical_literals, axis=0) numerical_literals = (numerical_literals - min_lit) / (max_lit - min_lit + 1e-8) # Load literal models if Config.model_name is None: model = DistMultLiteral(vocab['e1'].num_token, vocab['rel'].num_token, numerical_literals) elif Config.model_name == 'DistMultLiteral_highway': model = DistMultLiteral_highway(vocab['e1'].num_token, vocab['rel'].num_token, numerical_literals) elif Config.model_name == 'DistMultLiteral_gate': model = DistMultLiteral_gate(vocab['e1'].num_token, vocab['rel'].num_token, numerical_literals) elif Config.model_name == 'ComplEx': model = ComplexLiteral(vocab['e1'].num_token, vocab['rel'].num_token, numerical_literals) elif Config.model_name == 'ConvE': model = ConvELiteral(vocab['e1'].num_token, vocab['rel'].num_token, numerical_literals) elif Config.model_name == 'ConvEAlt': model = ConvELiteralAlt(vocab['e1'].num_token, vocab['rel'].num_token, numerical_literals) elif Config.model_name == 'DistMultNN': model = DistMultLiteralNN(vocab['e1'].num_token, vocab['rel'].num_token, numerical_literals) elif Config.model_name == 'DistMultNN2': model = DistMultLiteralNN2(vocab['e1'].num_token, vocab['rel'].num_token, numerical_literals) else: log.info('Unknown model: {0}', Config.model_name) raise Exception("Unknown model!") train_batcher.at_batch_prepared_observers.insert( 1, TargetIdx2MultiTarget(num_entities, 'e2_multi1', 'e2_multi1_binary')) eta = ETAHook('train', print_every_x_batches=100) train_batcher.subscribe_to_events(eta) train_batcher.subscribe_to_start_of_epoch_event(eta) train_batcher.subscribe_to_events( LossHook('train', print_every_x_batches=100)) if Config.cuda: model.cuda() if load: model_params = torch.load(model_path) print(model) total_param_size = [] params = [(key, value.size(), value.numel()) for key, value in model_params.items()] for key, size, count in params: total_param_size.append(count) print(key, size, count) print(np.array(total_param_size).sum()) model.load_state_dict(model_params) model.eval() ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation') ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation') else: model.init() total_param_size = [] params = [value.numel() for value in model.parameters()] print(params) print(np.sum(params)) opt = torch.optim.Adam(model.parameters(), lr=Config.learning_rate, weight_decay=Config.L2) for epoch in range(epochs): model.train() for i, str2var in enumerate(train_batcher): opt.zero_grad() e1 = str2var['e1'] rel = str2var['rel'] e2_multi = str2var['e2_multi1_binary'].float() # label smoothing #e2_multi = ((1.0-Config.label_smoothing_epsilon)*e2_multi) + (1.0/e2_multi.size(1)) pred = model.forward(e1, rel) loss = model.loss(pred, e2_multi) loss.backward() opt.step() train_batcher.state.loss = loss print('saving to {0}'.format(model_path)) torch.save(model.state_dict(), model_path) model.eval() ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation') if epoch % 3 == 0: if epoch > 0: ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation')
def main(): if Config.process: preprocess(Config.dataset, delete_data=True) input_keys = ['e1', 'rel', 'rel_eval', 'e2', 'e2_multi1', 'e2_multi2'] p = Pipeline(Config.dataset, keys=input_keys) p.load_vocabs() vocab = p.state['vocab'] num_entities = vocab['e1'].num_token train_batcher = StreamBatcher(Config.dataset, 'train', Config.batch_size, randomize=True, keys=input_keys) dev_rank_batcher = StreamBatcher(Config.dataset, 'dev_ranking', Config.batch_size, randomize=False, loader_threads=4, keys=input_keys) test_rank_batcher = StreamBatcher(Config.dataset, 'test_ranking', Config.batch_size, randomize=False, loader_threads=4, keys=input_keys) if Config.model_name is None: model = ConvE(vocab['e1'].num_token, vocab['rel'].num_token) elif Config.model_name == 'ConvE': model = ConvE(vocab['e1'].num_token, vocab['rel'].num_token) elif Config.model_name == 'DistMult': model = DistMult(vocab['e1'].num_token, vocab['rel'].num_token) elif Config.model_name == 'ComplEx': model = Complex(vocab['e1'].num_token, vocab['rel'].num_token) else: log.info('Unknown model: {0}', Config.model_name) raise Exception("Unknown model!") train_batcher.at_batch_prepared_observers.insert( 1, TargetIdx2MultiTarget(num_entities, 'e2_multi1', 'e2_multi1_binary')) eta = ETAHook('train', print_every_x_batches=100) train_batcher.subscribe_to_events(eta) train_batcher.subscribe_to_start_of_epoch_event(eta) train_batcher.subscribe_to_events( LossHook('train', print_every_x_batches=100)) if Config.cuda: model.cuda() if load: model_params = torch.load(model_path) print(model) total_param_size = [] params = [(key, value.size(), value.numel()) for key, value in model_params.items()] for key, size, count in params: total_param_size.append(count) print(key, size, count) print(np.array(total_param_size).sum()) model.load_state_dict(model_params) model.eval() ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation') ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation') else: model.init() total_param_size = [] params = [value.numel() for value in model.parameters()] print(params) print(np.sum(params)) max_mrr = 0 count = 0 max_count = 3 opt = torch.optim.Adam(model.parameters(), lr=Config.learning_rate, weight_decay=Config.L2) for epoch in range(1, epochs + 1): model.train() for i, str2var in enumerate(train_batcher): opt.zero_grad() e1 = str2var['e1'] rel = str2var['rel'] e2_multi = str2var['e2_multi1_binary'].float() # label smoothing e2_multi = ((1.0 - Config.label_smoothing_epsilon) * e2_multi) + (1.0 / e2_multi.size(1)) pred = model.forward(e1, rel) loss = model.loss(pred, e2_multi) loss.backward() opt.step() train_batcher.state.loss = loss.cpu() print('saving to {0}'.format(model_path)) torch.save(model.state_dict(), model_path) model.eval() with torch.no_grad(): # ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation') if epoch % 15 == 0: mrr = ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation') if mrr <= max_mrr: count += 1 if count > max_count: break else: count = 0 max_mrr = mrr mrr_test = ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation')
def main(args, model_path): if args.preprocess: preprocess(args.data, delete_data=True) input_keys = ['e1', 'rel', 'rel_eval', 'e2', 'e2_multi1', 'e2_multi2'] p = Pipeline(args.data, keys=input_keys) p.load_vocabs() vocab = p.state['vocab'] # 都要把数据转换成对象存储起来。这里用的是spodernet 中的Vocab对象 num_entities = vocab['e1'].num_token # 得到总共有多少个实体(sense) # 生成三批数据 train_batcher = StreamBatcher(args.data, 'train', args.batch_size, randomize=True, keys=input_keys, loader_threads=args.loader_threads) dev_rank_batcher = StreamBatcher(args.data, 'dev_ranking', args.test_batch_size, randomize=False, loader_threads=args.loader_threads, keys=input_keys) test_rank_batcher = StreamBatcher(args.data, 'test_ranking', args.test_batch_size, randomize=False, loader_threads=args.loader_threads, keys=input_keys) model = ConvE(args, vocab['e1'].num_token, vocab['rel'].num_token) train_batcher.at_batch_prepared_observers.insert( 1, TargetIdx2MultiTarget(num_entities, 'e2_multi1', 'e2_multi1_binary')) # 这部分功能应该是:在训练完之后使用一个回调 eta = ETAHook('train', print_every_x_batches=args.log_interval) train_batcher.subscribe_to_events(eta) train_batcher.subscribe_to_start_of_epoch_event(eta) train_batcher.subscribe_to_events( LossHook('train', print_every_x_batches=args.log_interval)) P = Preprocessor("../external/wordnet-mlj12") tokenidx_to_synset = vocab['e1'].idx2token encoder = DefinitionEncoder() encoder.cuda() model.cuda() if args.initialize: model_params = torch.load(args.initialize) print(model) total_param_size = [] params = [(key, value.size(), value.numel()) for key, value in model_params.items()] for key, size, count in params: total_param_size.append(count) print(key, size, count) print(np.array(total_param_size).sum()) model.load_state_dict(model_params) model.eval() ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation') ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation') # 赋值definition encoder,但是在model的属性中,没有找到 encoder model.encoder = encoder model.encoder.init() elif args.resume: model.encoder = encoder model_params = torch.load(model_path) print(model) total_param_size = [] params = [(key, value.size(), value.numel()) for key, value in model_params.items()] for key, size, count in params: total_param_size.append(count) print(key, size, count) print(np.array(total_param_size).sum()) model.load_state_dict(model_params) model.eval() ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation', tokenidx_to_synset, P.get_batch) ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation', tokenidx_to_synset, P.get_batch) else: model.encoder = encoder model.encoder.init() model.init() total_param_size = [] params = [value.numel() for value in model.parameters()] print(params) print(np.sum(params)) opt = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2) best_dev_mrr = 0 model.eval() dev_mrr = ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation', tokenidx_to_synset, P.get_batch) # 准备训练 for epoch in range(args.epochs): model.train() for i, str2var in enumerate(train_batcher): opt.zero_grad() e1 = str2var['e1'] rel = str2var['rel'] e1_tokens = [ tokenidx_to_synset[idx] for idx in e1.detach().cpu().numpy().ravel() ] batch, lengths = P.get_batch(e1_tokens) # e1_emb 就是使用 bilstm 得到的embedding e1_emb = model.encoder((batch.cuda(), lengths))[0] e2_multi = str2var['e2_multi1_binary'].float() # label smoothing e2_multi = ((1.0 - args.label_smoothing) * e2_multi) + (1.0 / e2_multi.size(1)) # 放到 pred = model.forward(e1_emb, rel, e1_encoded=True) loss = model.loss(pred, e2_multi) loss.backward() opt.step() train_batcher.state.loss = loss.cpu() #saving on improvement in dev score #print('saving to {0}'.format(model_path)) #torch.save(model.state_dict(), model_path) model.eval() with torch.no_grad(): if epoch % 5 == 0 and epoch > 0: dev_mrr = ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation', tokenidx_to_synset, P.get_batch) if dev_mrr > best_dev_mrr: print('saving to {} MRR {}->{}'.format( model_path, best_dev_mrr, dev_mrr)) best_dev_mrr = dev_mrr torch.save(model.state_dict(), model_path) if epoch % 5 == 0: if epoch > 0: ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation', tokenidx_to_synset, P.get_batch) if args.represent: P = Preprocessor() synsets = [P.idx_to_synset[idx] for idx in range(len(P.idx_to_synset))] embeddings = [] embeddings_proj = [] for i in tqdm(range(0, len(synsets), args.test_batch_size)): synsets_batch = synsets[i:i + args.test_batch_size] with torch.no_grad(): batch, lengths = P.get_batch(synsets_batch) emb_proj, emb = model.encoder((batch.cuda(), lengths)) embeddings_proj.append(emb_proj.detach().cpu()) embeddings.append(emb.detach().cpu()) embeddings = torch.cat(embeddings, 0).numpy() embeddings_proj = torch.cat(embeddings_proj, 0).numpy() print('embeddings', embeddings.shape, embeddings_proj.shape) basename, ext = os.path.splitext(args.represent) fname = args.represent np.savez_compressed(fname, embeddings=embeddings, synsets=synsets) fname = basename + '_projected' + ext np.savez_compressed(fname, embeddings=embeddings_proj, synsets=synsets)
def main(): if Config.process: preprocess(Config.dataset, delete_data=True) train_triples_path = path_root + 'data/{0}/train.txt'.format( Config.dataset) # dev_triples_path = 'data/{0}/valid.txt'.format(Config.dataset) # used for development test_triples_path = path_root + 'data/{0}/test.txt'.format(Config.dataset) input_keys = ['e1', 'rel', 'rel_eval', 'e2', 'e2_multi1', 'e2_multi2'] p = Pipeline(Config.dataset, keys=input_keys) p.load_vocabs() vocab = p.state['vocab'] num_entities = vocab['e1'].num_token train_batcher = StreamBatcher(Config.dataset, 'train', Config.batch_size, randomize=True, keys=input_keys) dev_rank_batcher = StreamBatcher(Config.dataset, 'dev_ranking', Config.batch_size, randomize=False, loader_threads=4, keys=input_keys) test_rank_batcher = StreamBatcher(Config.dataset, 'test_ranking', Config.batch_size, randomize=False, loader_threads=4, keys=input_keys) allRels = get_AllRels(vocab) allEntTokens = get_AllEntities(vocab) if Config.model_name is None: model = ConvE(vocab['e1'].num_token, vocab['rel'].num_token) elif Config.model_name == 'ConvE': if not test: model = ConvE(vocab['e1'].num_token, vocab['rel'].num_token, allEntTokens, allRels) else: if testEntGraph: types2E, types2rel2idx, types2rels = read_graphs( gpath, f_post_fix, featIdx, isCCG, lower) model = ConvE(vocab['e1'].num_token, vocab['rel'].num_token, allEntTokens, allRels, types2E, types2rel2idx, types2rels) else: model = ConvE(vocab['e1'].num_token, vocab['rel'].num_token, allEntTokens, allRels) elif Config.model_name == 'DistMult': model = DistMult(vocab['e1'].num_token, vocab['rel'].num_token) elif Config.model_name == 'ComplEx': model = Complex(vocab['e1'].num_token, vocab['rel'].num_token) else: print('Unknown model: {0}', Config.model_name) raise Exception("Unknown model!") train_batcher.at_batch_prepared_observers.insert( 1, TargetIdx2MultiTarget(num_entities, 'e2_multi1', 'e2_multi1_binary')) eta = ETAHook('train', print_every_x_batches=100) train_batcher.subscribe_to_events(eta) train_batcher.subscribe_to_start_of_epoch_event(eta) train_batcher.subscribe_to_events( LossHook('train', print_every_x_batches=100)) if Config.cuda: model.cuda() if load: model_params = torch.load(model_path) print(model) total_param_size = [] params = [(key, value.size(), value.numel()) for key, value in model_params.items()] for key, size, count in params: total_param_size.append(count) print(key, size, count) print(np.array(total_param_size).sum()) model.load_state_dict(model_params) if test: if computeAllProbs: fout_probs = open(Config.probs_file_path, 'w') model.eval() with torch.no_grad(): compute_probs(model, test_rank_batcher, vocab, 'test_probs', fout_probs, test_triples_path) else: model.eval() with torch.no_grad(): if testEntGraph: # ranking_and_hits_entGraph(model, dev_rank_batcher, vocab, relW2idx, Config.model_name, 'dev_evaluation',train_triples_path, 20) ranking_and_hits_entGraph(model, test_rank_batcher, vocab, 'test_evaluation', train_triples_path, 20) else: # ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation', train_triples_path, 20) ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation', train_triples_path, 20) return else: model.init() params = [value.numel() for value in model.parameters()] print(params) print(np.sum(params)) opt = torch.optim.Adam(model.parameters(), lr=Config.learning_rate, weight_decay=Config.L2) for epoch in range(epochs): model.train() for i, str2var in enumerate(train_batcher): opt.zero_grad() e1 = str2var['e1'] rel = str2var['rel'] e2_multi = str2var['e2_multi1_binary'].float() # label smoothing e2_multi = ((1.0 - Config.label_smoothing_epsilon) * e2_multi) + (1.0 / e2_multi.size(1)) pred = model.forward(e1, rel) loss = model.loss(pred, e2_multi) loss.backward() opt.step() train_batcher.state.loss = loss.cpu() if save: print('saving to {0}'.format(model_path)) if not os.path.isdir(path_root + 'saved_models'): os.mkdir(path_root + 'saved_models') torch.save(model.state_dict(), model_path) model.eval() if epoch % 5 == 0: with torch.no_grad(): ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation', train_triples_path, 20) if epoch % 10 == 0: #This was 10 if epoch > 0: ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation', train_triples_path, 20) # Let's write the rel embeddings! if model_name == "ConvE": fout = open( 'ents2emb_tmp_' + Config.model_name + '_' + Config.dataset + '.txt', 'w') lookup_tensor = torch.tensor( [i for i in range(vocab['e1'].num_token)], dtype=torch.long).to('cuda') emb_e = model.emb_e(lookup_tensor).cpu().detach().numpy() for i in range(vocab['e1'].num_token): fout.write(vocab['e1'].idx2token[i] + '\t' + str(emb_e[i]) + '\n') fout.close() fout = open( 'rels2emb_' + Config.model_name + '_' + Config.dataset + '_tmp.txt', 'w') for i in range(vocab['rel'].num_token): if i in model.relIdx2Embed: fout.write(vocab['rel'].idx2token[i] + '\t' + str(model.relIdx2Embed[i]) + '\n') fout.close() if model_name == "ConvE": #Let's write the final rel embeddings! fout = open( 'rels2emb_' + Config.model_name + '_' + Config.dataset + '.txt', 'w') for i in range(vocab['rel'].num_token): if i in model.relIdx2Embed: fout.write(vocab['rel'].idx2token[i] + '\t' + str(model.relIdx2Embed[i]) + '\n') else: print("doesn't have: ", vocab['rel'].idx2token[i])
def main(): if Config.process: preprocess(Config.dataset, delete_data=True) input_keys = ['e1', 'rel', 'e2', 'e2_multi1', 'e2_multi2'] p = Pipeline(Config.dataset, keys=input_keys) p.load_vocabs() vocab = p.state['vocab'] num_entities = vocab['e1'].num_token train_batcher = StreamBatcher(Config.dataset, 'train', Config.batch_size, randomize=True, keys=input_keys) dev_rank_batcher = StreamBatcher(Config.dataset, 'dev_ranking', Config.batch_size, randomize=False, loader_threads=4, keys=input_keys, is_volatile=True) test_rank_batcher = StreamBatcher(Config.dataset, 'test_ranking', Config.batch_size, randomize=False, loader_threads=4, keys=input_keys, is_volatile=True) # Load literals numerical_literals = np.load(f'data/{Config.dataset}/literals/numerical_literals.npy') # Initialize KBLN RBF parameters X_train = np.load(f'data/{Config.dataset}/bin/train.npy') h = X_train[:, 0] t = X_train[:, 2] n = numerical_literals[h, :] - numerical_literals[t, :] c = np.mean(n, axis=0).astype('float32') # size: (n_literals) var = np.var(n, axis=0) + 1e-6 # size: (n_literals), added eps to avoid degenerate case # Get normalized literals (for LiteralE) max_lit, min_lit = np.max(numerical_literals, axis=0), np.min(numerical_literals, axis=0) numerical_literals_normalized = (numerical_literals - min_lit) / (max_lit - min_lit + 1e-8) # Load literal models if Config.model_name is None or Config.model_name == 'KBLN': model = KBLN(vocab['e1'].num_token, vocab['rel'].num_token, numerical_literals, c, var) print('Chosen model: KBLN') elif Config.model_name == 'LiteralE_KBLN': model = LiteralE_KBLN(vocab['e1'].num_token, vocab['rel'].num_token, numerical_literals, numerical_literals_normalized, c, var) print('Chosen model: LiteralE_KBLN') train_batcher.at_batch_prepared_observers.insert(1, TargetIdx2MultiTarget(num_entities, 'e2_multi1', 'e2_multi1_binary')) eta = ETAHook('train', print_every_x_batches=100) train_batcher.subscribe_to_events(eta) train_batcher.subscribe_to_start_of_epoch_event(eta) train_batcher.subscribe_to_events(LossHook('train', print_every_x_batches=100)) if Config.cuda: model.cuda() if load: model_params = torch.load(model_path) print(model) total_param_size = [] params = [(key, value.size(), value.numel()) for key, value in model_params.items()] for key, size, count in params: total_param_size.append(count) print(key, size, count) print(np.array(total_param_size).sum()) model.load_state_dict(model_params) model.eval() ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation') ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation') else: model.init() total_param_size = [] params = [value.numel() for value in model.parameters()] print(params) print(np.sum(params)) opt = torch.optim.Adam(model.parameters(), lr=Config.learning_rate, weight_decay=Config.L2) for epoch in range(epochs): model.train() for i, str2var in enumerate(train_batcher): opt.zero_grad() e1 = str2var['e1'] rel = str2var['rel'] e2_multi = str2var['e2_multi1_binary'].float() # label smoothing e2_multi = ((1.0-Config.label_smoothing_epsilon)*e2_multi) + (1.0/e2_multi.size(1)) pred = model.forward(e1, rel) loss = model.loss(pred, e2_multi) loss.backward() opt.step() train_batcher.state.loss = loss print('saving to {0}'.format(model_path)) torch.save(model.state_dict(), model_path) model.eval() ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation') if epoch % 3 == 0: if epoch > 0: ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation')
def main(): if Config.process: preprocess(Config.dataset, delete_data=True) input_keys = ['e1', 'rel', 'rel_eval', 'e2', 'e2_multi1', 'e2_multi2'] p = Pipeline(Config.dataset, keys=input_keys) p.load_vocabs() vocab = p.state['vocab'] num_entities = vocab['e1'].num_token train_batcher = StreamBatcher(Config.dataset, 'train', Config.batch_size, randomize=True, keys=input_keys) dev_rank_batcher = StreamBatcher(Config.dataset, 'dev_ranking', Config.batch_size, randomize=False, loader_threads=4, keys=input_keys) test_rank_batcher = StreamBatcher(Config.dataset, 'test_ranking', Config.batch_size, randomize=False, loader_threads=4, keys=input_keys) if Config.model_name is None: model = ConvE(vocab['e1'].num_token, vocab['rel'].num_token) elif Config.model_name == 'ConvE': model = ConvE(vocab['e1'].num_token, vocab['rel'].num_token) elif Config.model_name == 'DistMult': model = DistMult(vocab['e1'].num_token, vocab['rel'].num_token) elif Config.model_name == 'ComplEx': model = Complex(vocab['e1'].num_token, vocab['rel'].num_token) elif Config.model_name == 'RNNDist': model = RNNDist(vocab['e1'].num_token, vocab['rel'].num_token) else: log.info('Unknown model: {0}', Config.model_name) raise Exception("Unknown model!") train_batcher.at_batch_prepared_observers.insert( 1, TargetIdx2MultiTarget(num_entities, 'e2_multi1', 'e2_multi1_binary')) eta = ETAHook('train', print_every_x_batches=100) train_batcher.subscribe_to_events(eta) train_batcher.subscribe_to_start_of_epoch_event(eta) train_batcher.subscribe_to_events( LossHook('train', print_every_x_batches=100)) if Config.dataset == 'ICEWS18': lengths = [ 1618, 956, 815, 1461, 1634, 1596, 1754, 1494, 800, 979, 1588, 1779, 1831, 1762, 1566, 812, 820, 1707, 1988, 1845, 1670, 1695, 956, 930, 1641, 1813, 1759, 1664, 1616, 1021, 998, 1668, 1589, 1720 ] else: lengths = [ 1090, 730, 646, 939, 681, 783, 546, 526, 524, 586, 656, 741, 562, 474, 493, 487, 474, 477, 460, 532, 348, 530, 402, 493, 503, 452, 668, 512, 406, 467, 524, 563, 524, 418, 441, 487, 515, 475, 478, 532, 387, 479, 485, 417, 542, 496, 487, 445, 504, 350, 432, 445, 401, 570, 554, 504, 505, 483, 587, 441, 489, 501, 487, 513, 513, 524, 655, 545, 599, 702, 734, 519, 603, 579, 537, 635, 437, 422, 695, 575, 553, 485, 429, 663, 475, 673, 527, 559, 540, 591, 558, 698, 422, 1145, 969, 1074, 888, 683, 677, 910, 902, 644, 777, 695, 571, 656, 797, 576, 468, 676, 687, 549, 482, 1007, 778, 567, 813, 788, 879, 557, 724, 850, 809, 685, 714, 554, 799, 727, 208, 946, 979, 892, 859, 1092, 1038, 999, 1477, 1126, 1096, 1145, 955, 100, 1264, 1287, 962, 1031, 1603, 1662, 1179, 1064, 1179, 1105, 1465, 1176, 1219, 1137, 1112, 791, 829, 2347, 917, 913, 1107, 960, 850, 1005, 1045, 871, 972, 921, 1019, 984, 1033, 848, 918, 699, 1627, 1580, 1354, 1119, 1065, 1208, 1037, 1134, 980, 1249, 1031, 908, 787, 819, 804, 764, 959, 1057, 770, 691, 816, 620, 788, 829, 895, 1128, 1023, 1038, 1030, 1016, 991, 866, 878, 1013, 977, 914, 976, 717, 740, 904, 912, 1043, 1117, 930, 1116, 1028, 946, 922, 1151, 1092, 967, 1189, 1081, 1158, 943, 981, 1212, 1104, 941, 912, 1347, 1241, 1479, 1188, 1152, 1164, 1167, 1173, 1280, 979, 142, 1458, 910, 1126, 1053, 1083, 897, 1021, 1075, 881, 1054, 941, 927, 860, 1081, 876, 1952, 1576, 1560, 1599, 1226, 1083, 964, 1059, 1179, 982, 1032, 933, 877, 1032, 957, 884, 909, 846, 850, 798, 843, 1183, 1108, 1185, 797, 915, 952, 1181, 744, 86, 889, 1151, 925, 1119, 1115, 1036, 772, 1052, 837, 897, 1095, 926, 1034, 1031, 995, 907, 969, 981, 1135, 915, 1161, 100, 1269, 1244, 1331, 1124, 1074, 1162, 1159, 1078, 1311, 1210, 1308, 945, 1183, 1580, 1406, 1417, 1173, 1348, 1274, 1179, 893, 1107, 950, 1028, 1055, 1059, 1244, 1082, 1179, 1011, 955, 886, 865, 857 ] if Config.cuda: model.cuda() if load: # if True: model_params = torch.load(model_path) print(model) total_param_size = [] params = [(key, value.size(), value.numel()) for key, value in model_params.items()] for key, size, count in params: total_param_size.append(count) print(key, size, count) print(np.array(total_param_size).sum()) model.load_state_dict(model_params) model.eval() ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation') # ranks = ranking_and_hits2(model, test_rank_batcher, vocab, 'test_evaluation') print(len(ranks)) mrr = [] curr_step = 0 for i in range(len(lengths)): rr = np.array(ranks[curr_step:curr_step + 2 * lengths[i]]) mrr.append(np.mean(1 / rr)) curr_step += 2 * lengths[i] with open(Config.dataset + 'mrr.txt', 'w') as f: for i, mr in enumerate(mrr): print("MRR (filtered) @ {}th day: {:.6f}".format(i, mr)) f.write(str(mr) + '\n') h10 = [] curr_step = 0 for i in range(len(lengths)): rr = np.array(ranks[curr_step:curr_step + 2 * lengths[i]]) h10.append(np.mean(rr <= 10)) with open(Config.dataset + 'h10.txt', 'w') as f: for i, mr in enumerate(h10): print("h10 (filtered) @ {}th day: {:.6f}".format(i, mr)) f.write(str(mr) + '\n') h10 = [] for i in range(len(lengths)): rr = np.array(ranks[curr_step:curr_step + 2 * lengths[i]]) h10.append(np.mean(rr <= 3)) with open(Config.dataset + 'h3.txt', 'w') as f: for i, mr in enumerate(h10): print("h10 (filtered) @ {}th day: {:.6f}".format(i, mr)) f.write(str(mr) + '\n') h10 = [] for i in range(len(lengths)): rr = np.array(ranks[curr_step:curr_step + 2 * lengths[i]]) h10.append(np.mean(rr <= 1)) with open(Config.dataset + 'h1.txt', 'w') as f: for i, mr in enumerate(h10): print("h10 (filtered) @ {}th day: {:.6f}".format(i, mr)) f.write(str(mr) + '\n') print("length", len(ranks)) print("length_2", 2 * sum(lengths)) # ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation') else: model.init() total_param_size = [] params = [value.numel() for value in model.parameters()] print(params) print(np.sum(params)) opt = torch.optim.Adam(model.parameters(), lr=Config.learning_rate, weight_decay=Config.L2) for epoch in range(epochs): # break model.train() for i, str2var in enumerate(train_batcher): opt.zero_grad() e1 = str2var['e1'] rel = str2var['rel'] e2_multi = str2var['e2_multi1_binary'].float() # label smoothing # e2_multi = ((1.0-Config.label_smoothing_epsilon)*e2_multi) + (1.0/e2_multi.size(1)) # print("this",Config.label_smoothing_epsilon, e2_multi.size(1)) pred = model.forward(e1, rel) # loss = model.loss(pred, e2_multi) # # loss = torch.zeros(1).cuda() for j in range(128): position = torch.nonzero(e2_multi[j])[0].cuda() label = torch.cat( [torch.ones(len(position)), torch.zeros(len(position))]).cuda() neg_position = torch.randint(e2_multi.shape[1], (len(position), )).long().cuda() position = torch.cat([position, neg_position]) loss += model.loss(pred[j, position], label) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) # clip gradients opt.step() train_batcher.state.loss = loss.cpu() print('saving to {0}'.format(model_path)) torch.save(model.state_dict(), model_path) model.eval() with torch.no_grad(): # ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation') if epoch == 50: ranks = ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation')
def main(): if Config.process: preprocess(Config.dataset, delete_data=True) input_keys = ['e1', 'rel', 'rel_eval', 'e2', 'e2_multi1', 'e2_multi2'] p = Pipeline(Config.dataset, keys=input_keys) p.load_vocabs() vocab = p.state['vocab'] num_entities = vocab['e1'].num_token train_batcher = StreamBatcher(Config.dataset, 'train', Config.batch_size, randomize=True, keys=input_keys) dev_rank_batcher = StreamBatcher(Config.dataset, 'dev_ranking', Config.batch_size, randomize=False, loader_threads=4, keys=input_keys) test_rank_batcher = StreamBatcher(Config.dataset, 'test_ranking', Config.batch_size, randomize=False, loader_threads=4, keys=input_keys) if Config.model_name is None: model = ConvE(vocab['e1'].num_token, vocab['rel'].num_token) elif Config.model_name == 'ConvE': model = ConvE(vocab['e1'].num_token, vocab['rel'].num_token) elif Config.model_name == 'DistMult': model = DistMult(vocab['e1'].num_token, vocab['rel'].num_token) elif Config.model_name == 'ComplEx': model = Complex(vocab['e1'].num_token, vocab['rel'].num_token) else: log.info('Unknown model: {0}', Config.model_name) raise Exception("Unknown model!") train_batcher.at_batch_prepared_observers.insert(1,TargetIdx2MultiTarget(num_entities, 'e2_multi1', 'e2_multi1_binary')) eta = ETAHook('train', print_every_x_batches=100) train_batcher.subscribe_to_events(eta) train_batcher.subscribe_to_start_of_epoch_event(eta) train_batcher.subscribe_to_events(LossHook('train', print_every_x_batches=100)) if Config.cuda: model.cuda() if load: model_params = torch.load(model_path) print(model) total_param_size = [] params = [(key, value.size(), value.numel()) for key, value in model_params.items()] for key, size, count in params: total_param_size.append(count) print(key, size, count) print(np.array(total_param_size).sum()) model.load_state_dict(model_params) model.eval() ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation',epochs,True) ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation',epochs,False) else: model.init() total_param_size = [] params = [value.numel() for value in model.parameters()] print(params) print(np.sum(params)) opt = torch.optim.Adam(model.parameters(), lr=Config.learning_rate, weight_decay=Config.L2) for epoch in range(epochs): model.train() for i, str2var in tqdm(enumerate(train_batcher)): opt.zero_grad() e1 = str2var['e1'] rel = str2var['rel'] e2_multi = str2var['e2_multi1_binary'].float() # label smoothing pred = model.forward(e1, rel) loss = torch.zeros(1).cuda() for j in range(128): position = torch.nonzero(e2_multi[j])[0].cuda() label = torch.cat([torch.ones(len(position)), torch.zeros(len(position))]).cuda() neg_position = torch.randint(e2_multi.shape[1], (len(position),)).long().cuda() position = torch.cat([position, neg_position]) loss += model.loss(pred[j, position], label) loss.backward() opt.step() train_batcher.state.loss = loss.cpu() print('saving to {0}'.format(model_path)) torch.save(model.state_dict(), model_path) model.eval() with torch.no_grad(): if epoch % 100 == 0: if epoch > 0: ranking_and_hits(model, test_rank_batcher, vocab, Config.dataset + "-" + Config.model_name,epoch,False) if epoch + 1 == epochs: ranking_and_hits(model, test_rank_batcher, vocab, Config.dataset,epoch,True)
def main(): if Config.process: preprocess(Config.dataset, delete_data=True) input_keys = ['e1', 'rel', 'e2', 'e2_multi1', 'e2_multi2'] p = Pipeline(Config.dataset, keys=input_keys) p.load_vocabs() vocab = p.state['vocab'] num_entities = vocab['e1'].num_token train_batcher = StreamBatcher(Config.dataset, 'train', Config.batch_size, randomize=True, keys=input_keys) dev_rank_batcher = StreamBatcher(Config.dataset, 'dev_ranking', Config.batch_size, randomize=False, loader_threads=4, keys=input_keys, is_volatile=True) test_rank_batcher = StreamBatcher(Config.dataset, 'test_ranking', Config.batch_size, randomize=False, loader_threads=4, keys=input_keys, is_volatile=True) # Load literals numerical_literals = np.load( f'data/{Config.dataset}/literals/numerical_literals.npy') # Normalize literals max_lit, min_lit = np.max(numerical_literals, axis=0), np.min(numerical_literals, axis=0) numerical_literals = (numerical_literals - min_lit) / (max_lit - min_lit + 1e-8) # Load Multitask models model = MTKGNN_DistMult(vocab['e1'].num_token, vocab['rel'].num_token, numerical_literals) train_batcher.at_batch_prepared_observers.insert( 1, TargetIdx2MultiTarget(num_entities, 'e2_multi1', 'e2_multi1_binary')) eta = ETAHook('train', print_every_x_batches=100) train_batcher.subscribe_to_events(eta) train_batcher.subscribe_to_start_of_epoch_event(eta) train_batcher.subscribe_to_events( LossHook('train', print_every_x_batches=100)) if Config.cuda: model.cuda() if load: model_params = torch.load(model_path) print(model) total_param_size = [] params = [(key, value.size(), value.numel()) for key, value in model_params.items()] for key, size, count in params: total_param_size.append(count) print(key, size, count) print(np.array(total_param_size).sum()) model.load_state_dict(model_params) model.eval() ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation') ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation') else: model.init() total_param_size = [] params = [value.numel() for value in model.parameters()] print(params) print(np.sum(params)) opt_rel = torch.optim.Adam(model.rel_params, lr=Config.learning_rate, weight_decay=Config.L2) opt_attr = torch.optim.Adam(model.attr_params, lr=Config.learning_rate, weight_decay=Config.L2) for epoch in range(epochs): model.train() for i, str2var in enumerate(train_batcher): opt_rel.zero_grad() opt_attr.zero_grad() e1 = str2var['e1'] e2 = str2var['e2'] rel = str2var['rel'] e2_multi = str2var['e2_multi1_binary'].float() # label smoothing e2_multi = ((1.0 - Config.label_smoothing_epsilon) * e2_multi) + (1.0 / e2_multi.size(1)) pred = model.forward(e1, rel) loss_rel = model.loss_rel(pred, e2_multi) loss_rel.backward() opt_rel.step() pred_left, target_left = model.forward_attr(e1, 'left') pred_right, target_right = model.forward_attr(e1, 'right') loss_attr_left = model.loss_attr(pred_left, target_left) loss_attr_right = model.loss_attr(pred_right, target_right) loss_attr = loss_attr_left + loss_attr_right loss_attr.backward() opt_attr.step() train_batcher.state.loss = loss_rel + loss_attr # Attribute Specific Training for k in range(4): pred_left, pred_right, target = model.forward_AST() loss_AST = model.loss_attr(pred_left, target) + model.loss_attr( pred_right, target) loss_AST.backward() opt_attr.step() print('saving to {0}'.format(model_path)) torch.save(model.state_dict(), model_path) model.eval() if epoch % 3 == 0: if epoch > 0: ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation') ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation')
def main(): if do_process: preprocess(dataset_name, delete_data=True) input_keys = ['e1', 'rel', 'e2', 'e2_multi1', 'e2_multi2'] p = Pipeline(dataset_name, keys=input_keys) p.load_vocabs() vocab = p.state['vocab'] num_entities = vocab['e1'].num_token train_batcher = StreamBatcher(dataset_name, 'train', Config.batch_size, randomize=True, keys=input_keys) dev_rank_batcher = StreamBatcher(dataset_name, 'dev_ranking', Config.batch_size, randomize=False, loader_threads=4, keys=input_keys, is_volatile=True) test_rank_batcher = StreamBatcher(dataset_name, 'test_ranking', Config.batch_size, randomize=False, loader_threads=4, keys=input_keys, is_volatile=True) #model = Complex(vocab['e1'].num_token, vocab['rel'].num_token) #model = DistMult(vocab['e1'].num_token, vocab['rel'].num_token) model = ConvE(vocab['e1'].num_token, vocab['rel'].num_token) train_batcher.at_batch_prepared_observers.insert( 1, TargetIdx2MultiTarget(num_entities, 'e2_multi1', 'e2_multi1_binary')) eta = ETAHook('train', print_every_x_batches=100) train_batcher.subscribe_to_events(eta) train_batcher.subscribe_to_start_of_epoch_event(eta) train_batcher.subscribe_to_events( LossHook('train', print_every_x_batches=100)) if Config.cuda: model.cuda() if load: model_params = torch.load(model_path) print(model) print([(key, value.size()) for key, value in model_params.items()]) model.load_state_dict(model_params) model.eval() ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation') ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation') else: model.init() opt = torch.optim.Adam(model.parameters(), lr=Config.learning_rate, weight_decay=Config.L2) for epoch in range(epochs): model.train() for i, str2var in enumerate(train_batcher): opt.zero_grad() e1 = str2var['e1'] rel = str2var['rel'] e2_multi = str2var['e2_multi1_binary'].float() # label smoothing e2_multi = ((1.0 - Config.label_smoothing_epsilon) * e2_multi) + (1.0 / e2_multi.size(1)) pred = model.forward(e1, rel) loss = model.loss(pred, e2_multi) loss.backward() opt.step() train_batcher.state.loss = loss print('saving to {0}'.format(model_path)) torch.save(model.state_dict(), model_path) model.eval() ranking_and_hits(model, dev_rank_batcher, vocab, 'dev_evaluation') if epoch % 3 == 0: if epoch > 0: ranking_and_hits(model, test_rank_batcher, vocab, 'test_evaluation')