dest='version', help='version control', type=str, default="default") args = parser.parse_args() return args if __name__ == '__main__': __C = config.__C args = parse_args() cfg_file = "cfgs/{}_model.yml".format(args.model) with open(cfg_file, 'r') as f: yaml_dict = yaml.load(f) args_dict = edict({**yaml_dict, **vars(args)}) config.add_edit(args_dict, __C) config.proc(__C) print('Hyper Parameters:') config.config_print(__C) # __C.check_path() if __C.model == "bilinear": execution = Trainer(__C) execution.run(__C.run_mode) else: exit()
_u = PF.embed(u, vocab_size, embedding_size) _v = PF.embed(v, vocab_size, embedding_size) _neg = PF.embed(negative_samples, vocab_size, embedding_size) _neg = F.transpose(_neg, axes=(0, 2, 1)) loss = loss_function(_u, _v, _neg) nn.get_parameters()["embed/W"].d = I.UniformInitializer( [-0.01, 0.01])(shape=(vocab_size, embedding_size)) solver = RiemannianSgd(lr=0.1) solver.set_parameters(nn.get_parameters()) trainer = Trainer(inputs=[u, v, negative_samples], loss=loss, solver=solver) trainer.run(train_data_iter, None, epochs=max_epoch) line_points = [['mustang.n.01', 'odd-toed_ungulate.n.01'], ['elk.n.01', 'even-toed_ungulate.n.01'], ['even-toed_ungulate.n.01', 'ungulate.n.01'], ['squirrel.n.01', 'rodent.n.01'], ['beagle.n.01', 'dog.n.01'], ['dog.n.01', 'canine.n.02'], ['liger.n.01', 'carnivore.n.01'], ['bison.n.01', 'even-toed_ungulate.n.01'], ['collie.n.01', 'dog.n.01'], ['odd-toed_ungulate.n.01', 'ungulate.n.01'], ['ungulate.n.01', 'mammal.n.01'], ['german_shepherd.n.01', 'dog.n.01'], ['border_collie.n.01', 'dog.n.01'], ['cat.n.01', 'carnivore.n.01'], ['antelope.n.01', 'even-toed_ungulate.n.01'], ['domestic_cat.n.01', 'cat.n.01'],
x, t, accuracy, loss = build_self_attention_model(train=True) solver = S.Adam() solver.set_parameters(nn.get_parameters()) x, t, accuracy, loss = build_self_attention_model(train=True) trainer = Trainer(inputs=[x, t], loss=loss, metrics={ 'cross entropy': loss, 'accuracy': accuracy }, solver=solver) for epoch in range(max_epoch): x, t, accuracy, loss = build_self_attention_model(train=True) trainer.update_variables(inputs=[x, t], loss=loss, metrics={ 'cross entropy': loss, 'accuracy': accuracy }) trainer.run(train_data_iter, None, epochs=1, verbose=1) x, t, accuracy, loss = build_self_attention_model(train=False) trainer.update_variables(inputs=[x, t], loss=loss, metrics={ 'cross entropy': loss, 'accuracy': accuracy }) trainer.evaluate(dev_data_iter, verbose=1)
_t_neg = PF.embed(t_neg, vocab_size, embedding_size) # (batch_size, k, embedding_size) t_score = F.sigmoid(F.reshape(F.batch_matmul(_t, h), shape=(batch_size, 1))) t_neg_score = F.sigmoid( F.reshape(F.batch_matmul(_t_neg, h), shape=(batch_size, k))) t_loss = F.binary_cross_entropy(t_score, F.constant(1, shape=(batch_size, 1))) t_neg_loss = F.binary_cross_entropy(t_neg_score, F.constant(0, shape=(batch_size, k))) loss = F.mean(F.sum(t_loss, axis=1) + F.sum(t_neg_loss, axis=1)) # Create solver. solver = S.Adam() solver.set_parameters(nn.get_parameters()) trainer = Trainer(inputs=[x, t, t_neg], loss=loss, solver=solver) trainer.run(train_data_iter, valid_data_iter, epochs=max_epoch) with open('vectors.txt', 'w') as f: f.write('{} {}\n'.format(vocab_size - 1, embedding_size)) with nn.parameter_scope('W_in'): x = nn.Variable((1, 1)) y = PF.embed(x, vocab_size, embedding_size) for word, i in ptb_dataset.w2i.items(): x.d = np.array([[i]]) y.forward() str_vec = ' '.join(map(str, list(y.d.copy()[0][0]))) f.write('{} {}\n'.format(word, str_vec))
'src_file': src_file, 'tgt_file': tgt_file, 'train_size': len(x_train), 'dev_size': len(x_dev), 'src_vocab_size': src_vocab_size, 'tgt_vocab_size': tgt_vocab_size, 'src_unk': str(src_unk_ratio*100)[:5] + '%', 'tgt_unk': str(tgt_unk_ratio*100)[:5] + '%' } status.update(hyperparameters) save_path = os.getcwd() + '/' + save_dir os.makedirs(save_path, exist_ok=True) with open(save_path + '/vocabs.pkl', 'wb') as f: pickle.dump(vocabs, f) with open(save_path + '/hyperparameters.pkl', 'wb') as f: pickle.dump(hyperparameters, f) with open(save_path + '/status.txt', 'w') as f: f.write('model: %s\n' % save_dir) for k, v in status.items(): f.write('%s: %s\n' % (k, str(v))) # print statistics and hyperparameters print('\n---', save_dir, '---') for k, v in status.items(): print('%s: %s' %(k, str(v))) print() # train train_iter = Iterator(x_train, t_train, batch_size, max_epoch) model = AttnBiSeq2Seq(src_vocab_size, tgt_vocab_size, wordvec_size, hidden_size) optimizer = Adam() trainer = Trainer(model, optimizer, save_path) trainer.report_bleu(x_dev, t_dev, vocabs) trainer.run(train_iter, eval_interval, max_grad)
# Optimizer if configuration.learn_beta: param_list.append({ 'params': [ train_criterion.beta, train_criterion.gamma, train_criterion.rel_beta, train_criterion.rel_gamma ] }) if configuration.optimizer == 'adam': optimizer = optim.Adam(param_list, lr=configuration.lr, weight_decay=5e-4) # Data train_dataloader, valid_dataloader = get_mapnet_train_dataloader( configuration) # Trainer print("Setup trainer...") trainer = Trainer(model=model, optimizer=optimizer, configuration=configuration, train_criterion=train_criterion, val_criterion=train_criterion, result_criterion=AbsoluteCriterion(), train_dataloader=train_dataloader, val_dataloader=valid_dataloader) trainer.run()
def translate_test(index): print('source:') print(' '.join([i2w_source[i] for i in test_source[index]][::-1]).strip(' pad')) print('target:') print(''.join([i2w_target[i] for i in test_target[index]]).strip('pad')) print('encoder-decoder output:') print(''.join([i2w_target[i] for i in predict(test_source[index])]).strip('pad')) def translate(sentence): sentence = list(map(lambda x: w2i_source[x], sentence.split())) sentence += [0] * (sentence_length_source - len(sentence)) sentence.reverse() return ''.join([i2w_target[i] for i in predict(np.array([sentence]))]) x, y, loss = build_model() # Create solver. solver = S.Momentum(1e-2, momentum=0.9) solver.set_parameters(nn.get_parameters()) trainer = Trainer(inputs=[x, y], loss=loss, metrics=dict(PPL=np.e**loss), solver=solver) trainer.run(train_data_iter, dev_data_iter, epochs=5, verbose=1)
mask = F.sign(t) # do not predict 'pad'. entropy = time_distributed_softmax_cross_entropy(y, expand_dims(t, axis=-1)) * mask count = F.sum(mask, axis=1) loss = F.mean(F.div2(F.sum(entropy, axis=1), count)) return x, t, loss x, t, loss = build_model() # Create solver. solver = S.Momentum(1e-2, momentum=0.9) solver.set_parameters(nn.get_parameters()) x, t, loss = build_model(train=True) trainer = Trainer(inputs=[x, t], loss=loss, metrics={'PPL': np.e**loss}, solver=solver, save_path='char-cnn-lstmlm') trainer.run(train_data_iter, valid_data_iter, epochs=max_epoch) for epoch in range(max_epoch): x, t, loss = build_model(train=True) trainer.update_variables(inputs=[x, t], loss=loss, metrics={'PPL': np.e**loss}) trainer.run(train_data_iter, None, epochs=1, verbose=1) x, t, loss = build_model(train=False) trainer.update_variables(inputs=[x, t], loss=loss, metrics={'PPL': np.e**loss}) trainer.evaluate(valid_data_iter, verbose=1) # nn.load_parameters('char-cnn-lstm_best.h5') # batch_size = 1 # sentence_length = 1 # x, embeddings = build_model(get_embeddings=True)