def configuration(cls, plm=None, method='lgesql', table_path='data/tables.json', tables='data/tables.bin', db_dir='data/database'): cls.plm, cls.method = plm, method cls.grammar = ASDLGrammar.from_filepath(GRAMMAR_FILEPATH) cls.trans = TransitionSystem.get_class_by_lang('sql')(cls.grammar) cls.tables = pickle.load(open(tables, 'rb')) if type(tables) == str else tables cls.evaluator = Evaluator(cls.trans, table_path, db_dir) if plm is None: cls.word2vec = Word2vecUtils() cls.tokenizer = lambda x: x cls.word_vocab = Vocab( padding=True, unk=True, boundary=True, default=UNK, filepath='./pretrained_models/glove.42b.300d/vocab.txt', specials=SCHEMA_TYPES) # word vocab for glove.42B.300d else: cls.tokenizer = AutoTokenizer.from_pretrained( os.path.join('./pretrained_models', plm)) cls.word_vocab = cls.tokenizer.get_vocab() cls.relation_vocab = Vocab(padding=False, unk=False, boundary=False, iterable=RELATIONS, default=None) cls.graph_factory = GraphFactory(cls.method, cls.relation_vocab)
def process_dataset(processor, dataset, tables, output_path=None, skip_large=False, verbose=False): from utils.constants import GRAMMAR_FILEPATH grammar = ASDLGrammar.from_filepath(GRAMMAR_FILEPATH) trans = TransitionSystem.get_class_by_lang('sql')(grammar) processed_dataset = [] for idx, entry in enumerate(dataset): if skip_large and len(tables[entry['db_id']]['column_names']) > 100: continue if verbose: print('*************** Processing %d-th sample **************' % (idx)) entry = process_example(processor, entry, tables[entry['db_id']], trans, verbose=verbose) processed_dataset.append(entry) print('In total, process %d samples , skip %d extremely large databases.' % (len(processed_dataset), len(dataset) - len(processed_dataset))) if output_path is not None: # serialize preprocessed dataset pickle.dump(processed_dataset, open(output_path, 'wb')) return processed_dataset
def train_decoder(args): train_set = Dataset.from_bin_file(args.train_file) dev_set = Dataset.from_bin_file(args.dev_file) vocab = pickle.load(open(args.vocab)) grammar = ASDLGrammar.from_text(open(args.asdl_file).read()) transition_system = TransitionSystem.get_class_by_lang(args.lang)(grammar) model = Reconstructor(args, vocab, transition_system) model.train() if args.cuda: model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) def evaluate_ppl(): model.eval() cum_loss = 0. cum_tgt_words = 0. for batch in dev_set.batch_iter(args.batch_size): loss = -model.score(batch).sum() cum_loss += loss.data[0] cum_tgt_words += sum(len(e.src_sent) + 1 for e in batch) # add ending </s> ppl = np.exp(cum_loss / cum_tgt_words) model.train() return ppl print('begin training decoder, %d training examples, %d dev examples' % (len(train_set), len(dev_set)), file=sys.stderr) print('vocab: %s' % repr(vocab), file=sys.stderr) epoch = train_iter = 0 report_loss = report_examples = 0. history_dev_scores = [] num_trial = patience = 0 while True: epoch += 1 epoch_begin = time.time() for batch_examples in train_set.batch_iter(batch_size=args.batch_size, shuffle=True): batch_examples = [ e for e in batch_examples if len(e.tgt_actions) <= args.decode_max_time_step ] # batch_examples = [e for e in train_set.examples if e.idx in [10192, 10894, 9706, 4659, 5609, 1442, 5849, 10644, 4592, 1875]] train_iter += 1 optimizer.zero_grad() loss = -model.score(batch_examples) # print(loss.data) loss_val = torch.sum(loss).data[0] report_loss += loss_val report_examples += len(batch_examples) loss = torch.mean(loss) loss.backward() # clip gradient grad_norm = torch.nn.utils.clip_grad_norm(model.parameters(), args.clip_grad) optimizer.step() if train_iter % args.log_every == 0: print('[Iter %d] encoder loss=%.5f' % (train_iter, report_loss / report_examples), file=sys.stderr) report_loss = report_examples = 0. print('[Epoch %d] epoch elapsed %ds' % (epoch, time.time() - epoch_begin), file=sys.stderr) # model_file = args.save_to + '.iter%d.bin' % train_iter # print('save model to [%s]' % model_file, file=sys.stderr) # model.save(model_file) # perform validation print('[Epoch %d] begin validation' % epoch, file=sys.stderr) eval_start = time.time() # evaluate ppl ppl = evaluate_ppl() print('[Epoch %d] ppl=%.5f took %ds' % (epoch, ppl, time.time() - eval_start), file=sys.stderr) dev_acc = -ppl is_better = history_dev_scores == [] or dev_acc > max( history_dev_scores) history_dev_scores.append(dev_acc) if is_better: patience = 0 model_file = args.save_to + '.bin' print('save currently the best model ..', file=sys.stderr) print('save model to [%s]' % model_file, file=sys.stderr) model.save(model_file) # also save the optimizers' state torch.save(optimizer.state_dict(), args.save_to + '.optim.bin') elif patience < args.patience: patience += 1 print('hit patience %d' % patience, file=sys.stderr) if patience == args.patience: num_trial += 1 print('hit #%d trial' % num_trial, file=sys.stderr) if num_trial == args.max_num_trial: print('early stop!', file=sys.stderr) exit(0) # decay lr, and restore from previously best checkpoint lr = optimizer.param_groups[0]['lr'] * args.lr_decay print('load previously best model and decay learning rate to %f' % lr, file=sys.stderr) # load model params = torch.load(args.save_to + '.bin', map_location=lambda storage, loc: storage) model.load_state_dict(params['state_dict']) if args.cuda: model = model.cuda() # load optimizers if args.reset_optimizer: print('reset optimizer', file=sys.stderr) optimizer = torch.optim.Adam( model.inference_model.parameters(), lr=lr) else: print('restore parameters of the optimizers', file=sys.stderr) optimizer.load_state_dict( torch.load(args.save_to + '.optim.bin')) # set new lr for param_group in optimizer.param_groups: param_group['lr'] = lr # reset patience patience = 0
def train(args): """Maximum Likelihood Estimation""" grammar = ASDLGrammar.from_text(open(args.asdl_file).read()) transition_system = TransitionSystem.get_class_by_lang(args.lang)(grammar) train_set = Dataset.from_bin_file(args.train_file) if args.dev_file: dev_set = Dataset.from_bin_file(args.dev_file) else: dev_set = Dataset(examples=[]) vocab = pickle.load(open(args.vocab, 'rb')) if args.lang == 'wikisql': # import additional packages for wikisql dataset from model.wikisql.dataset import WikiSqlExample, WikiSqlTable, TableColumn parser_cls = get_parser_class(args.lang) model = parser_cls(args, vocab, transition_system) model.train() if args.cuda: model.cuda() optimizer_cls = eval('torch.optim.%s' % args.optimizer) # FIXME: this is evil! optimizer = optimizer_cls(model.parameters(), lr=args.lr) if args.uniform_init: print('uniformly initialize parameters [-%f, +%f]' % (args.uniform_init, args.uniform_init), file=sys.stderr) nn_utils.uniform_init(-args.uniform_init, args.uniform_init, model.parameters()) elif args.glorot_init: print('use glorot initialization', file=sys.stderr) nn_utils.glorot_init(model.parameters()) # load pre-trained word embedding (optional) if args.glove_embed_path: print('load glove embedding from: %s' % args.glove_embed_path, file=sys.stderr) glove_embedding = GloveHelper(args.glove_embed_path) glove_embedding.load_to(model.src_embed, vocab.source) print('begin training, %d training examples, %d dev examples' % (len(train_set), len(dev_set)), file=sys.stderr) print('vocab: %s' % repr(vocab), file=sys.stderr) epoch = train_iter = 0 report_loss = report_examples = report_sup_att_loss = 0. history_dev_scores = [] num_trial = patience = 0 while True: epoch += 1 epoch_begin = time.time() for batch_examples in train_set.batch_iter(batch_size=args.batch_size, shuffle=True): batch_examples = [ e for e in batch_examples if len(e.tgt_actions) <= args.decode_max_time_step ] train_iter += 1 optimizer.zero_grad() ret_val = model.score(batch_examples) loss = -ret_val[0] # print(loss.data) loss_val = torch.sum(loss).data[0] report_loss += loss_val report_examples += len(batch_examples) loss = torch.mean(loss) if args.sup_attention: att_probs = ret_val[1] if att_probs: sup_att_loss = -torch.log(torch.cat(att_probs)).mean() sup_att_loss_val = sup_att_loss.data[0] report_sup_att_loss += sup_att_loss_val loss += sup_att_loss loss.backward() # clip gradient if args.clip_grad > 0.: grad_norm = torch.nn.utils.clip_grad_norm( model.parameters(), args.clip_grad) optimizer.step() if train_iter % args.log_every == 0: log_str = '[Iter %d] encoder loss=%.5f' % ( train_iter, report_loss / report_examples) if args.sup_attention: log_str += ' supervised attention loss=%.5f' % ( report_sup_att_loss / report_examples) report_sup_att_loss = 0. print(log_str, file=sys.stderr) report_loss = report_examples = 0. print('[Epoch %d] epoch elapsed %ds' % (epoch, time.time() - epoch_begin), file=sys.stderr) if args.save_all_models: model_file = args.save_to + '.iter%d.bin' % train_iter print('save model to [%s]' % model_file, file=sys.stderr) model.save(model_file) # perform validation if args.dev_file: if epoch % args.valid_every_epoch == 0: print('[Epoch %d] begin validation' % epoch, file=sys.stderr) eval_start = time.time() eval_results = evaluation.evaluate( dev_set.examples, model, args, verbose=True, eval_top_pred_only=args.eval_top_pred_only) dev_acc = eval_results['accuracy'] print('[Epoch %d] code generation accuracy=%.5f took %ds' % (epoch, dev_acc, time.time() - eval_start), file=sys.stderr) is_better = history_dev_scores == [] or dev_acc > max( history_dev_scores) history_dev_scores.append(dev_acc) else: is_better = True if epoch > args.lr_decay_after_epoch: lr = optimizer.param_groups[0]['lr'] * args.lr_decay print('decay learning rate to %f' % lr, file=sys.stderr) # set new lr for param_group in optimizer.param_groups: param_group['lr'] = lr if is_better: patience = 0 model_file = args.save_to + '.bin' print('save the current model ..', file=sys.stderr) print('save model to [%s]' % model_file, file=sys.stderr) model.save(model_file) # also save the optimizers' state torch.save(optimizer.state_dict(), args.save_to + '.optim.bin') elif patience < args.patience and epoch >= args.lr_decay_after_epoch: patience += 1 print('hit patience %d' % patience, file=sys.stderr) if epoch == args.max_epoch: print('reached max epoch, stop!', file=sys.stderr) exit(0) if patience >= args.patience and epoch >= args.lr_decay_after_epoch: num_trial += 1 print('hit #%d trial' % num_trial, file=sys.stderr) if num_trial == args.max_num_trial: print('early stop!', file=sys.stderr) exit(0) # decay lr, and restore from previously best checkpoint lr = optimizer.param_groups[0]['lr'] * args.lr_decay print('load previously best model and decay learning rate to %f' % lr, file=sys.stderr) # load model params = torch.load(args.save_to + '.bin', map_location=lambda storage, loc: storage) model.load_state_dict(params['state_dict']) if args.cuda: model = model.cuda() # load optimizers if args.reset_optimizer: print('reset optimizer', file=sys.stderr) optimizer = torch.optim.Adam(model.parameters(), lr=lr) else: print('restore parameters of the optimizers', file=sys.stderr) optimizer.load_state_dict( torch.load(args.save_to + '.optim.bin')) # set new lr for param_group in optimizer.param_groups: param_group['lr'] = lr # reset patience patience = 0
def train_rerank_feature(args): train_set = Dataset.from_bin_file(args.train_file) dev_set = Dataset.from_bin_file(args.dev_file) vocab = pickle.load(open(args.vocab, 'rb')) grammar = ASDLGrammar.from_text(open(args.asdl_file).read()) transition_system = TransitionSystem.get_class_by_lang(args.lang)(grammar) train_paraphrase_model = args.mode == 'train_paraphrase_identifier' def _get_feat_class(): if args.mode == 'train_reconstructor': return Reconstructor elif args.mode == 'train_paraphrase_identifier': return ParaphraseIdentificationModel def _filter_hyps(_decode_results): for i in range(len(_decode_results)): valid_hyps = [] for hyp in _decode_results[i]: try: transition_system.tokenize_code(hyp.code) valid_hyps.append(hyp) except: pass _decode_results[i] = valid_hyps model = _get_feat_class()(args, vocab, transition_system) if args.glorot_init: print('use glorot initialization', file=sys.stderr) nn_utils.glorot_init(model.parameters()) model.train() if args.cuda: model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # if training the paraphrase model, also load in decoding results if train_paraphrase_model: print('load training decode results [%s]' % args.train_decode_file, file=sys.stderr) train_decode_results = pickle.load(open(args.train_decode_file, 'rb')) _filter_hyps(train_decode_results) train_decode_results = {e.idx: hyps for e, hyps in zip(train_set, train_decode_results)} print('load dev decode results [%s]' % args.dev_decode_file, file=sys.stderr) dev_decode_results = pickle.load(open(args.dev_decode_file, 'rb')) _filter_hyps(dev_decode_results) dev_decode_results = {e.idx: hyps for e, hyps in zip(dev_set, dev_decode_results)} def evaluate_ppl(): model.eval() cum_loss = 0. cum_tgt_words = 0. for batch in dev_set.batch_iter(args.batch_size): loss = -model.score(batch).sum() cum_loss += loss.data.item() cum_tgt_words += sum(len(e.src_sent) + 1 for e in batch) # add ending </s> ppl = np.exp(cum_loss / cum_tgt_words) model.train() return ppl def evaluate_paraphrase_acc(): model.eval() labels = [] for batch in dev_set.batch_iter(args.batch_size): probs = model.score(batch).exp().data.cpu().numpy() for p in probs: labels.append(p >= 0.5) # get negative examples batch_decoding_results = [dev_decode_results[e.idx] for e in batch] batch_negative_examples = [get_negative_example(e, _hyps, type='best') for e, _hyps in zip(batch, batch_decoding_results)] batch_negative_examples = list(filter(None, batch_negative_examples)) probs = model.score(batch_negative_examples).exp().data.cpu().numpy() for p in probs: labels.append(p < 0.5) acc = np.average(labels) model.train() return acc def get_negative_example(_example, _hyps, type='sample'): incorrect_hyps = [hyp for hyp in _hyps if not hyp.is_correct] if incorrect_hyps: incorrect_hyp_scores = [hyp.score for hyp in incorrect_hyps] if type in ('best', 'sample'): if type == 'best': sample_idx = np.argmax(incorrect_hyp_scores) sampled_hyp = incorrect_hyps[sample_idx] else: incorrect_hyp_probs = [np.exp(score) for score in incorrect_hyp_scores] incorrect_hyp_probs = np.array(incorrect_hyp_probs) / sum(incorrect_hyp_probs) sampled_hyp = np.random.choice(incorrect_hyps, size=1, p=incorrect_hyp_probs) sampled_hyp = sampled_hyp[0] sample = Example(idx='negative-%s' % _example.idx, src_sent=_example.src_sent, tgt_code=sampled_hyp.code, tgt_actions=None, tgt_ast=None) return sample elif type == 'all': samples = [] for i, hyp in enumerate(incorrect_hyps): sample = Example(idx='negative-%s-%d' % (_example.idx, i), src_sent=_example.src_sent, tgt_code=hyp.code, tgt_actions=None, tgt_ast=None) samples.append(sample) return samples else: return None print('begin training decoder, %d training examples, %d dev examples' % (len(train_set), len(dev_set)), file=sys.stderr) print('vocab: %s' % repr(vocab), file=sys.stderr) epoch = train_iter = 0 report_loss = report_examples = 0. history_dev_scores = [] num_trial = patience = 0 while True: epoch += 1 epoch_begin = time.time() for batch_examples in train_set.batch_iter(batch_size=args.batch_size, shuffle=True): batch_examples = [e for e in batch_examples if len(e.tgt_actions) <= args.decode_max_time_step] if train_paraphrase_model: positive_examples_num = len(batch_examples) labels = [0] * len(batch_examples) negative_samples = [] batch_decoding_results = [train_decode_results[e.idx] for e in batch_examples] # sample negative examples for example, hyps in zip(batch_examples, batch_decoding_results): if hyps: negative_sample = get_negative_example(example, hyps, type=args.negative_sample_type) if negative_sample: if isinstance(negative_sample, Example): negative_samples.append(negative_sample) labels.append(1) else: negative_samples.extend(negative_sample) labels.extend([1] * len(negative_sample)) batch_examples += negative_samples train_iter += 1 optimizer.zero_grad() nll = -model(batch_examples) if train_paraphrase_model: idx_tensor = Variable(torch.LongTensor(labels).unsqueeze(-1), requires_grad=False) if args.cuda: idx_tensor = idx_tensor.cuda() loss = torch.gather(nll, 1, idx_tensor) else: loss = nll # print(loss.data) loss_val = torch.sum(loss).data.item() report_loss += loss_val report_examples += len(batch_examples) loss = torch.mean(loss) loss.backward() # clip gradient grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad) optimizer.step() if train_iter % args.log_every == 0: print('[Iter %d] encoder loss=%.5f' % (train_iter, report_loss / report_examples), file=sys.stderr) report_loss = report_examples = 0. print('[Epoch %d] epoch elapsed %ds' % (epoch, time.time() - epoch_begin), file=sys.stderr) # perform validation print('[Epoch %d] begin validation' % epoch, file=sys.stderr) eval_start = time.time() # evaluate dev_score dev_acc = evaluate_paraphrase_acc() if train_paraphrase_model else -evaluate_ppl() print('[Epoch %d] dev_score=%.5f took %ds' % (epoch, dev_acc, time.time() - eval_start), file=sys.stderr) is_better = history_dev_scores == [] or dev_acc > max(history_dev_scores) history_dev_scores.append(dev_acc) if is_better: patience = 0 model_file = args.save_to + '.bin' print('save currently the best model ..', file=sys.stderr) print('save model to [%s]' % model_file, file=sys.stderr) model.save(model_file) # also save the optimizers' state torch.save(optimizer.state_dict(), args.save_to + '.optim.bin') elif patience < args.patience: patience += 1 print('hit patience %d' % patience, file=sys.stderr) if patience == args.patience: num_trial += 1 print('hit #%d trial' % num_trial, file=sys.stderr) if num_trial == args.max_num_trial: print('early stop!', file=sys.stderr) exit(0) # decay lr, and restore from previously best checkpoint lr = optimizer.param_groups[0]['lr'] * args.lr_decay print('load previously best model and decay learning rate to %f' % lr, file=sys.stderr) # load model params = torch.load(args.save_to + '.bin', map_location=lambda storage, loc: storage) model.load_state_dict(params['state_dict']) if args.cuda: model = model.cuda() # load optimizers if args.reset_optimizer: print('reset optimizer', file=sys.stderr) optimizer = torch.optim.Adam(model.inference_model.parameters(), lr=lr) else: print('restore parameters of the optimizers', file=sys.stderr) optimizer.load_state_dict(torch.load(args.save_to + '.optim.bin')) # set new lr for param_group in optimizer.param_groups: param_group['lr'] = lr # reset patience patience = 0
def train_decoder(args): train_set = Dataset.from_bin_file(args.train_file) dev_set = Dataset.from_bin_file(args.dev_file) vocab = pickle.load(open(args.vocab)) grammar = ASDLGrammar.from_text(open(args.asdl_file).read()) transition_system = TransitionSystem.get_class_by_lang(args.lang)(grammar) model = Reconstructor(args, vocab, transition_system) model.train() if args.cuda: model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) def evaluate_ppl(): model.eval() cum_loss = 0. cum_tgt_words = 0. for batch in dev_set.batch_iter(args.batch_size): loss = -model.score(batch).sum() cum_loss += loss.data[0] cum_tgt_words += sum(len(e.src_sent) + 1 for e in batch) # add ending </s> ppl = np.exp(cum_loss / cum_tgt_words) model.train() return ppl print('begin training decoder, %d training examples, %d dev examples' % (len(train_set), len(dev_set)), file=sys.stderr) print('vocab: %s' % repr(vocab), file=sys.stderr) epoch = train_iter = 0 report_loss = report_examples = 0. history_dev_scores = [] num_trial = patience = 0 while True: epoch += 1 epoch_begin = time.time() for batch_examples in train_set.batch_iter(batch_size=args.batch_size, shuffle=True): batch_examples = [e for e in batch_examples if len(e.tgt_actions) <= args.decode_max_time_step] # batch_examples = [e for e in train_set.examples if e.idx in [10192, 10894, 9706, 4659, 5609, 1442, 5849, 10644, 4592, 1875]] train_iter += 1 optimizer.zero_grad() loss = -model.score(batch_examples) # print(loss.data) loss_val = torch.sum(loss).data[0] report_loss += loss_val report_examples += len(batch_examples) loss = torch.mean(loss) loss.backward() # clip gradient grad_norm = torch.nn.utils.clip_grad_norm(model.parameters(), args.clip_grad) optimizer.step() if train_iter % args.log_every == 0: print('[Iter %d] encoder loss=%.5f' % (train_iter, report_loss / report_examples), file=sys.stderr) report_loss = report_examples = 0. print('[Epoch %d] epoch elapsed %ds' % (epoch, time.time() - epoch_begin), file=sys.stderr) # model_file = args.save_to + '.iter%d.bin' % train_iter # print('save model to [%s]' % model_file, file=sys.stderr) # model.save(model_file) # perform validation print('[Epoch %d] begin validation' % epoch, file=sys.stderr) eval_start = time.time() # evaluate ppl ppl = evaluate_ppl() print('[Epoch %d] ppl=%.5f took %ds' % (epoch, ppl, time.time() - eval_start), file=sys.stderr) dev_acc = -ppl is_better = history_dev_scores == [] or dev_acc > max(history_dev_scores) history_dev_scores.append(dev_acc) if is_better: patience = 0 model_file = args.save_to + '.bin' print('save currently the best model ..', file=sys.stderr) print('save model to [%s]' % model_file, file=sys.stderr) model.save(model_file) # also save the optimizers' state torch.save(optimizer.state_dict(), args.save_to + '.optim.bin') elif patience < args.patience: patience += 1 print('hit patience %d' % patience, file=sys.stderr) if patience == args.patience: num_trial += 1 print('hit #%d trial' % num_trial, file=sys.stderr) if num_trial == args.max_num_trial: print('early stop!', file=sys.stderr) exit(0) # decay lr, and restore from previously best checkpoint lr = optimizer.param_groups[0]['lr'] * args.lr_decay print('load previously best model and decay learning rate to %f' % lr, file=sys.stderr) # load model params = torch.load(args.save_to + '.bin', map_location=lambda storage, loc: storage) model.load_state_dict(params['state_dict']) if args.cuda: model = model.cuda() # load optimizers if args.reset_optimizer: print('reset optimizer', file=sys.stderr) optimizer = torch.optim.Adam(model.inference_model.parameters(), lr=lr) else: print('restore parameters of the optimizers', file=sys.stderr) optimizer.load_state_dict(torch.load(args.save_to + '.optim.bin')) # set new lr for param_group in optimizer.param_groups: param_group['lr'] = lr # reset patience patience = 0
def train(args): """Maximum Likelihood Estimation""" grammar = ASDLGrammar.from_text(open(args.asdl_file).read()) transition_system = TransitionSystem.get_class_by_lang(args.lang)(grammar) train_set = Dataset.from_bin_file(args.train_file) if args.dev_file: dev_set = Dataset.from_bin_file(args.dev_file) else: dev_set = Dataset(examples=[]) vocab = pickle.load(open(args.vocab, 'rb')) if args.lang == 'wikisql': # import additional packages for wikisql dataset from model.wikisql.dataset import WikiSqlExample, WikiSqlTable, TableColumn parser_cls = get_parser_class(args.lang) model = parser_cls(args, vocab, transition_system) model.train() if args.cuda: model.cuda() optimizer_cls = eval('torch.optim.%s' % args.optimizer) # FIXME: this is evil! optimizer = optimizer_cls(model.parameters(), lr=args.lr) if args.uniform_init: print('uniformly initialize parameters [-%f, +%f]' % (args.uniform_init, args.uniform_init), file=sys.stderr) nn_utils.uniform_init(-args.uniform_init, args.uniform_init, model.parameters()) elif args.glorot_init: print('use glorot initialization', file=sys.stderr) nn_utils.glorot_init(model.parameters()) # load pre-trained word embedding (optional) if args.glove_embed_path: print('load glove embedding from: %s' % args.glove_embed_path, file=sys.stderr) glove_embedding = GloveHelper(args.glove_embed_path) glove_embedding.load_to(model.src_embed, vocab.source) print('begin training, %d training examples, %d dev examples' % (len(train_set), len(dev_set)), file=sys.stderr) print('vocab: %s' % repr(vocab), file=sys.stderr) epoch = train_iter = 0 report_loss = report_examples = report_sup_att_loss = 0. history_dev_scores = [] num_trial = patience = 0 while True: epoch += 1 epoch_begin = time.time() for batch_examples in train_set.batch_iter(batch_size=args.batch_size, shuffle=True): batch_examples = [e for e in batch_examples if len(e.tgt_actions) <= args.decode_max_time_step] train_iter += 1 optimizer.zero_grad() ret_val = model.score(batch_examples) loss = -ret_val[0] # print(loss.data) loss_val = torch.sum(loss).data[0] report_loss += loss_val report_examples += len(batch_examples) loss = torch.mean(loss) if args.sup_attention: att_probs = ret_val[1] if att_probs: sup_att_loss = -torch.log(torch.cat(att_probs)).mean() sup_att_loss_val = sup_att_loss.data[0] report_sup_att_loss += sup_att_loss_val loss += sup_att_loss loss.backward() # clip gradient if args.clip_grad > 0.: grad_norm = torch.nn.utils.clip_grad_norm(model.parameters(), args.clip_grad) optimizer.step() if train_iter % args.log_every == 0: log_str = '[Iter %d] encoder loss=%.5f' % (train_iter, report_loss / report_examples) if args.sup_attention: log_str += ' supervised attention loss=%.5f' % (report_sup_att_loss / report_examples) report_sup_att_loss = 0. print(log_str, file=sys.stderr) report_loss = report_examples = 0. print('[Epoch %d] epoch elapsed %ds' % (epoch, time.time() - epoch_begin), file=sys.stderr) if args.save_all_models: model_file = args.save_to + '.iter%d.bin' % train_iter print('save model to [%s]' % model_file, file=sys.stderr) model.save(model_file) # perform validation if args.dev_file: if epoch % args.valid_every_epoch == 0: print('[Epoch %d] begin validation' % epoch, file=sys.stderr) eval_start = time.time() eval_results = evaluation.evaluate(dev_set.examples, model, args, verbose=True, eval_top_pred_only=args.eval_top_pred_only) dev_acc = eval_results['accuracy'] print('[Epoch %d] code generation accuracy=%.5f took %ds' % (epoch, dev_acc, time.time() - eval_start), file=sys.stderr) is_better = history_dev_scores == [] or dev_acc > max(history_dev_scores) history_dev_scores.append(dev_acc) else: is_better = True if epoch > args.lr_decay_after_epoch: lr = optimizer.param_groups[0]['lr'] * args.lr_decay print('decay learning rate to %f' % lr, file=sys.stderr) # set new lr for param_group in optimizer.param_groups: param_group['lr'] = lr if is_better: patience = 0 model_file = args.save_to + '.bin' print('save the current model ..', file=sys.stderr) print('save model to [%s]' % model_file, file=sys.stderr) model.save(model_file) # also save the optimizers' state torch.save(optimizer.state_dict(), args.save_to + '.optim.bin') elif patience < args.patience and epoch >= args.lr_decay_after_epoch: patience += 1 print('hit patience %d' % patience, file=sys.stderr) if epoch == args.max_epoch: print('reached max epoch, stop!', file=sys.stderr) exit(0) if patience >= args.patience and epoch >= args.lr_decay_after_epoch: num_trial += 1 print('hit #%d trial' % num_trial, file=sys.stderr) if num_trial == args.max_num_trial: print('early stop!', file=sys.stderr) exit(0) # decay lr, and restore from previously best checkpoint lr = optimizer.param_groups[0]['lr'] * args.lr_decay print('load previously best model and decay learning rate to %f' % lr, file=sys.stderr) # load model params = torch.load(args.save_to + '.bin', map_location=lambda storage, loc: storage) model.load_state_dict(params['state_dict']) if args.cuda: model = model.cuda() # load optimizers if args.reset_optimizer: print('reset optimizer', file=sys.stderr) optimizer = torch.optim.Adam(model.parameters(), lr=lr) else: print('restore parameters of the optimizers', file=sys.stderr) optimizer.load_state_dict(torch.load(args.save_to + '.optim.bin')) # set new lr for param_group in optimizer.param_groups: param_group['lr'] = lr # reset patience patience = 0
def train(args): grammar = ASDLGrammar.from_text(open(args.asdl_file).read()) transition_system = TransitionSystem.get_class_by_lang(args.lang)(grammar) train_set = Dataset.from_bin_file(args.train_file) dev_set = Dataset.from_bin_file(args.dev_file) vocab = pkl.load(open(args.vocab, 'rb')) model = Parser(args, vocab, transition_system) model.train() if args.cuda: model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) print('begin training, %d training examples, %d dev examples' % (len(train_set), len(dev_set)), file=sys.stderr) print('vocab: %s' % repr(vocab), file=sys.stderr) epoch = train_iter = 0 report_loss = report_examples = 0. history_dev_scores = [] num_trial = patience = 0 while True: epoch += 1 epoch_begin = time.time() for batch_examples in train_set.batch_iter(batch_size=args.batch_size, shuffle=True): batch_examples = [ e for e in batch_examples if len(e.tgt_actions) <= args.decode_max_time_step ] train_iter += 1 optimizer.zero_grad() loss = -model.score(batch_examples) # print(loss.data) loss_val = torch.sum(loss).data[0] report_loss += loss_val report_examples += len(batch_examples) loss = torch.mean(loss) loss.backward() # clip gradient if args.clip_grad > 0.: grad_norm = torch.nn.utils.clip_grad_norm( model.parameters(), args.clip_grad) optimizer.step() if train_iter % args.log_every == 0: print('[Iter %d] encoder loss=%.5f' % (train_iter, report_loss / report_examples), file=sys.stderr) report_loss = report_examples = 0. print('[Epoch %d] epoch elapsed %ds' % (epoch, time.time() - epoch_begin), file=sys.stderr) # model_file = args.save_to + '.iter%d.bin' % train_iter # print('save model to [%s]' % model_file, file=sys.stderr) # model.save(model_file) # perform validation print('[Epoch %d] begin validation' % epoch, file=sys.stderr) eval_start = time.time() eval_results = evaluation.evaluate(dev_set.examples, model, args, verbose=True) dev_acc = eval_results['accuracy'] print('[Epoch %d] code generation accuracy=%.5f took %ds' % (epoch, dev_acc, time.time() - eval_start), file=sys.stderr) is_better = history_dev_scores == [] or dev_acc > max( history_dev_scores) history_dev_scores.append(dev_acc) if is_better: patience = 0 model_file = args.save_to + '.bin' print('save currently the best model ..', file=sys.stderr) print('save model to [%s]' % model_file, file=sys.stderr) model.save(model_file) # also save the optimizers' state torch.save(optimizer.state_dict(), args.save_to + '.optim.bin') elif epoch == args.max_epoch: print('reached max epoch, stop!', file=sys.stderr) exit(0) elif patience < args.patience: patience += 1 print('hit patience %d' % patience, file=sys.stderr) if patience == args.patience: num_trial += 1 print('hit #%d trial' % num_trial, file=sys.stderr) if num_trial == args.max_num_trial: print('early stop!', file=sys.stderr) exit(0) # decay lr, and restore from previously best checkpoint lr = optimizer.param_groups[0]['lr'] * args.lr_decay print('load previously best model and decay learning rate to %f' % lr, file=sys.stderr) # load model params = torch.load(args.save_to + '.bin', map_location=lambda storage, loc: storage) model.load_state_dict(params['state_dict']) if args.cuda: model = model.cuda() # load optimizers if args.reset_optimizer: print('reset optimizer', file=sys.stderr) optimizer = torch.optim.Adam( model.inference_model.parameters(), lr=lr) else: print('restore parameters of the optimizers', file=sys.stderr) optimizer.load_state_dict( torch.load(args.save_to + '.optim.bin')) # set new lr for param_group in optimizer.param_groups: param_group['lr'] = lr # reset patience patience = 0
def train_lstm_lm(args): all_data = load_code_dir(args.code_dir) np.random.shuffle(all_data) train_data = all_data[:-1000] dev_data = all_data[-1000:] print('train data size: %d, dev data size: %d' % (len(train_data), len(dev_data)), file=sys.stderr) vocab = VocabEntry.from_corpus([e['tokens'] for e in train_data], size=args.vocab_size, freq_cutoff=args.freq_cutoff) print('vocab size: %d' % len(vocab), file=sys.stderr) grammar = ASDLGrammar.from_text(open('asdl/lang/py/py_asdl.txt').read()) transition_system = TransitionSystem.get_class_by_lang('python')(grammar) model = LSTMPrior(args, vocab, transition_system) model.train() if args.cuda: model.cuda() optimizer = torch.optim.Adam(model.parameters()) def evaluate_ppl(): model.eval() cum_loss = 0. cum_tgt_words = 0. for examples in nn_utils.batch_iter(dev_data, args.batch_size): batch_tokens = [e['tokens'] for e in examples] batch = nn_utils.to_input_variable(batch_tokens, vocab, cuda=args.cuda, append_boundary_sym=True) loss = model.forward(batch).sum() cum_loss += loss.data[0] cum_tgt_words += sum(len(tokens) + 1 for tokens in batch_tokens) # add ending </s> ppl = np.exp(cum_loss / cum_tgt_words) model.train() return ppl print('begin training decoder, %d training examples, %d dev examples' % (len(train_data), len(dev_data)), file=sys.stderr) epoch = num_trial = train_iter = patience = 0 report_loss = report_examples = 0. history_dev_scores = [] while True: epoch += 1 epoch_begin = time.time() for examples in nn_utils.batch_iter(train_data, batch_size=args.batch_size, shuffle=True): train_iter += 1 optimizer.zero_grad() batch_tokens = [e['tokens'] for e in examples] batch = nn_utils.to_input_variable(batch_tokens, vocab, cuda=args.cuda, append_boundary_sym=True) loss = model.forward(batch) # print(loss.data) loss_val = torch.sum(loss).data[0] report_loss += loss_val report_examples += len(examples) loss = torch.mean(loss) loss.backward() # clip gradient grad_norm = torch.nn.utils.clip_grad_norm(model.parameters(), args.clip_grad) optimizer.step() if train_iter % args.log_every == 0: print('[Iter %d] encoder loss=%.5f' % (train_iter, report_loss / report_examples), file=sys.stderr) report_loss = report_examples = 0. print('[Epoch %d] epoch elapsed %ds' % (epoch, time.time() - epoch_begin), file=sys.stderr) # model_file = args.save_to + '.iter%d.bin' % train_iter # print('save model to [%s]' % model_file, file=sys.stderr) # model.save(model_file) # perform validation print('[Epoch %d] begin validation' % epoch, file=sys.stderr) eval_start = time.time() # evaluate ppl ppl = evaluate_ppl() print('[Epoch %d] ppl=%.5f took %ds' % (epoch, ppl, time.time() - eval_start), file=sys.stderr) dev_acc = -ppl is_better = history_dev_scores == [] or dev_acc > max( history_dev_scores) history_dev_scores.append(dev_acc) if is_better: patience = 0 model_file = args.save_to + '.bin' print('save currently the best model ..', file=sys.stderr) print('save model to [%s]' % model_file, file=sys.stderr) model.save(model_file) # also save the optimizers' state torch.save(optimizer.state_dict(), args.save_to + '.optim.bin') elif patience < args.patience: patience += 1 print('hit patience %d' % patience, file=sys.stderr) if patience == args.patience: num_trial += 1 print('hit #%d trial' % num_trial, file=sys.stderr) if num_trial == args.max_num_trial: print('early stop!', file=sys.stderr) exit(0) # decay lr, and restore from previously best checkpoint lr = optimizer.param_groups[0]['lr'] * args.lr_decay print('load previously best model and decay learning rate to %f' % lr, file=sys.stderr) # load model params = torch.load(args.save_to + '.bin', map_location=lambda storage, loc: storage) model.load_state_dict(params['state_dict']) if args.cuda: model = model.cuda() # load optimizers if args.reset_optimizer: print('reset optimizer', file=sys.stderr) optimizer = torch.optim.Adam( model.inference_model.parameters(), lr=lr) else: print('restore parameters of the optimizers', file=sys.stderr) optimizer.load_state_dict( torch.load(args.save_to + '.optim.bin')) # set new lr for param_group in optimizer.param_groups: param_group['lr'] = lr # reset patience patience = 0