def setup_train(self, model_file_path=None): self.model = Model(model_file_path) params = list(self.model.encoder.parameters()) + list(self.model.decoder.parameters()) + \ list(self.model.reduce_state.parameters()) initial_lr = config.lr_coverage if config.is_coverage else config.lr self.optimizer = Adagrad( params, lr=initial_lr, initial_accumulator_value=config.adagrad_init_acc) start_iter, start_loss = 0, 0 if model_file_path is not None: state = torch.load(model_file_path, map_location=lambda storage, location: storage) start_iter = state['iter'] start_loss = state['current_loss'] if not config.is_coverage: self.optimizer.load_state_dict(state['optimizer']) if use_cuda: for state in self.optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.cuda() return start_iter, start_loss
def setup_train(self, model_file_path=None): self.model = Model(model_file_path, vectors=self.vectors) params = list(self.model.encoder.parameters()) + list(self.model.decoder.parameters()) + \ list(self.model.reduce_state.parameters()) pytorch_total_params = sum(p.numel() for p in params if p.requires_grad) print(f"Parameters count: {pytorch_total_params}") initial_lr = config.lr_coverage if config.is_coverage else config.lr # self.optimizer = adagrad.Adagrad(params, lr=initial_lr, initial_accumulator_value=config.adagrad_init_acc) self.optimizer = Adam(params, lr=initial_lr) start_iter, start_training_loss, start_eval_loss = 0, 0, 0 if model_file_path is not None: state = torch.load(model_file_path, map_location=lambda storage, location: storage) start_iter = state['iter'] start_training_loss = state['current_train_loss'] start_eval_loss = state['current_eval_loss'] if not config.is_coverage: self.optimizer.load_state_dict(state['optimizer']) if use_cuda: for state in self.optimizer.state.values(): for k, v in state.items(): print(k) if isinstance(v, torch.Tensor): state[k] = v.cuda() self.chechpoint = Checkpoint(self.model, self.optimizer, self.model_dir, start_eval_loss if start_eval_loss != 0 else float("inf")) return start_iter, start_training_loss, start_eval_loss
def __init__(self, model_file_path, model_type="stem", load_batcher=True): self.vocab = Vocab(config.vocab_path, config.vocab_size) if load_batcher: self.batcher = Batcher(config.decode_data_path, self.vocab, mode='decode', batch_size=config.beam_size, single_pass=True) time.sleep(15) self.model = Model(model_file_path, is_eval=True) self.model_type = model_type
def __init__(self, model_file_path, is_word_level, is_combined, alpha): self.vocab = Vocab(config.vocab_path, config.vocab_size) # self.batcher = Batcher(config.eval_data_path, self.vocab, mode='eval', # batch_size=config.batch_size, single_pass=True) self.dataset = DailyMailDataset("val", self.vocab) # time.sleep(15) model_name = os.path.basename(model_file_path) self.is_word_level = is_word_level self.is_combined = is_combined self.alpha = alpha eval_dir = os.path.join(config.log_root, 'eval_%s' % (model_name)) if not os.path.exists(eval_dir): os.mkdir(eval_dir) self.model = Model(model_file_path, is_eval=True)
def predict(sentence, model_path): if not os.path.exists(model_path): raise Exception("Need to provide model path") model = Model(model_path) checkpoint = torch.load(model_path, map_location=lambda storage, location: storage) vocab = checkpoint['vocab'] target_field = Field(sequential=True, init_token=START_DECODING, eos_token=STOP_DECODING, pad_token=PAD_TOKEN, batch_first=True, include_lengths=True, unk_token=UNKNOWN_TOKEN, lower=True) source_field = Field(sequential=True, init_token=SENTENCE_START, eos_token=SENTENCE_END, pad_token=PAD_TOKEN, batch_first=True, include_lengths=True, unk_token=UNKNOWN_TOKEN, lower=True) source_field.vocab = vocab target_field.vocab = vocab data = [{'src': sentence, 'tgt': ''}] predict_data = Mydataset(data=data, fields=(('source', source_field), ('target', target_field))) setattr(args, 'vectors', source_field.vocab.vectors) setattr(args, 'vocab_size', len(source_field.vocab.itos)) setattr(args, 'emb_dim', vectors.dim)
dest="is_word_level", action="store_true") parser.add_argument("--combined", dest="is_combined", action="store_true") parser.set_defaults(is_word_level=False) parser.set_defaults(is_combined=False) args = parser.parse_args() seq2seq_checkpoint_file = "./Seq2Seq_model_50000" pg_losses = [ ] #pickle.load(open("/home/lgpu0231/dumps_model_12_16_11_08/pg_losses_350.p", 'rb')) run_avg_losses = [ ] #pickle.load(open("/home/lgpu0231/dumps_model_12_16_11_08/run_avg_losses_350.p", 'rb')) # Model model = Model(seq2seq_checkpoint_file) # model = Model() # Load data trainer = TrainSeq2Seq(is_word_level=args.is_word_level, is_combined=args.is_combined) # Prepare for training (e.g. optimizer) iter, running_avg_loss = trainer.setup(model, model_file_path=None) # GENERATOR MLE TRAINING - Pretrain print('Starting Generator MLE Training...') #trainer.train_nll(MLE_TRAIN_EPOCHS, iter, running_avg_loss) # ADVERSARIAL TRAINING print('\nStarting PG Training...') trainer.train_pg(PG_TRAIN_EPOCHS, iter, running_avg_loss, pg_losses,
def train(): target_field = Field(sequential=True, init_token=START_DECODING, eos_token=STOP_DECODING, pad_token=PAD_TOKEN, batch_first=True, include_lengths=True, unk_token=UNKNOWN_TOKEN, lower=True) source_field = Field(sequential=True, init_token=SENTENCE_START, eos_token=SENTENCE_END, pad_token=PAD_TOKEN, batch_first=True, include_lengths=True, unk_token=UNKNOWN_TOKEN, lower=True) train_path = '../data/incar_alexa/train_public.pickle' dev_path = '../data/incar_alexa/dev_public.pickle' test_path = '../data/incar_alexa/test_public.pickle' path = '../data/cnn_stories_tokenized' summary_writer = SummaryWriter(config.summary_path) train_src, train_tgt, train_id = load_data(train_path) dev_src, dev_tgt, dev_id = load_data(dev_path) test_src, test_tgt, test_id = load_data(test_path) # train_data = prepare_data_cnn(path) # # print(train_data[0]) # train_src = [dt['src'] for dt in train_data] # train_tgt = [dt['tgt'] for dt in train_data] # train_id = [dt['id'] for dt in train_data] # train_src, test_src, train_tgt, test_tgt = train_test_split( # train_src, train_tgt, test_size=0.15, random_state=123) # train_id, test_id = train_test_split( # train_id, test_size=0.15, random_state=123) # # print(f"{len(train_src)}, {len(train_tgt)}") # train_src, dev_src, train_tgt, dev_tgt = train_test_split( # train_src, train_tgt, test_size=0.15, random_state=123) # train_id, dev_id = train_test_split( # train_id, test_size=0.15, random_state=123) # print(source_field.preprocess(train_src[0])) # exit() train_src_preprocessed = [source_field.preprocess(x) for x in train_src] dev_src_preprocessed = [source_field.preprocess(x) for x in dev_src] test_src_preprocessed = [source_field.preprocess(x) for x in test_src] train_tgt_preprocessed = [target_field.preprocess(x) for x in train_tgt] dev_tgt_preprocessed = [target_field.preprocess(x) for x in dev_tgt] test_tgt_preprocessed = [target_field.preprocess(x) for x in test_tgt] # train_src_preprocessed = source_field.apply(lambda x: source_field.preprocess(x)) vectors = Vectors( name='/home/binhna/Downloads/shared_resources/cc.en.300.vec', cache='/home/binhna/Downloads/shared_resources/') source_field.build_vocab([ train_src_preprocessed, dev_src_preprocessed, train_tgt_preprocessed, dev_tgt_preprocessed ], vectors=vectors) target_field.build_vocab([ train_src_preprocessed, dev_src_preprocessed, train_tgt_preprocessed, dev_tgt_preprocessed ], vectors=vectors) train_data = [{ 'src': src, 'tgt': tgt, 'id': id } for src, tgt, id in zip(train_src, train_tgt, train_id)] train_data = Mydataset(data=train_data, fields=(('source', source_field), ('target', target_field))) dev_data = [{ 'src': src, 'tgt': tgt, 'id': id } for src, tgt, id in zip(dev_src, dev_tgt, dev_id)] # print(dev_data[0]) dev_data = Mydataset(data=dev_data, fields=(('source', source_field), ('target', target_field))) test_data = [{ 'src': src, 'tgt': tgt, 'id': id } for src, tgt, id in zip(test_src, test_tgt, test_id)] test_data = Mydataset(data=test_data, fields=(('source', source_field), ('target', target_field))) # print(train_data[10].source) # print(train_data[10].target) # print(len(target_field.vocab)) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') train_iter, test_iter, dev_iter = BucketIterator.splits( datasets=(train_data, test_data, dev_data), batch_sizes=(config.batch_size, config.batch_size, config.batch_size), device=device, sort_key=lambda x: len(x.source), sort_within_batch=True) args = ARGS() setattr(args, 'vectors', source_field.vocab.vectors) setattr(args, 'vocab_size', len(source_field.vocab.itos)) setattr(args, 'emb_dim', vectors.dim) model = Model(args) params = list(model.encoder.parameters()) + list( model.decoder.parameters()) + list(model.reduce_state.parameters()) initial_lr = config.lr_coverage if config.is_coverage else config.lr optimizer = Adagrad(params, lr=initial_lr, initial_accumulator_value=config.adagrad_init_acc) iter, running_avg_loss = 0, 0 start = time.time() for epoch in range(500): print(f"Epoch: {epoch+1}") for i, batch in tqdm(enumerate(train_iter), total=len(train_iter)): # print(batch.source[0].size()) # exit() batch_size = batch.batch_size # encoder part enc_padding_mask = get_mask(batch.source, device) enc_batch = batch.source[0] enc_lens = batch.source[1] encoder_outputs, encoder_feature, encoder_hidden = model.encoder( enc_batch, enc_lens) s_t_1 = model.reduce_state(encoder_hidden) coverage = Variable(torch.zeros(batch.source[0].size())).to(device) c_t_1 = Variable(torch.zeros( (batch_size, 2 * config.hidden_dim))).to(device) extra_zeros, enc_batch_extend_vocab, max_art_oovs = get_extra_features( batch.source[0], source_field.vocab) extra_zeros = extra_zeros.to(device) enc_batch_extend_vocab = enc_batch_extend_vocab.to(device) # decoder part dec_batch = batch.target[0][:, :-1] # print(dec_batch.size()) target_batch = batch.target[0][:, 0:] dec_lens_var = batch.target[1] dec_padding_mask = get_mask(batch.target, device) max_dec_len = max(dec_lens_var) step_losses = [] for di in range(min(max_dec_len, config.max_dec_steps) - 1): y_t_1 = dec_batch[:, di] # Teacher forcing final_dist, s_t_1, c_t_1, attn_dist, p_gen, next_coverage = model.decoder( y_t_1, s_t_1, encoder_outputs, encoder_feature, enc_padding_mask, c_t_1, extra_zeros, enc_batch_extend_vocab, coverage, di) target = target_batch[:, di] gold_probs = torch.gather(final_dist, 1, target.unsqueeze(1)).squeeze() step_loss = -torch.log(gold_probs + config.eps) if config.is_coverage: step_coverage_loss = torch.sum( torch.min(attn_dist, coverage), 1) step_loss = step_loss + config.cov_loss_wt * step_coverage_loss coverage = next_coverage step_mask = dec_padding_mask[:, di] step_loss = step_loss * step_mask step_losses.append(step_loss) sum_losses = torch.sum(torch.stack(step_losses, 1), 1) batch_avg_loss = sum_losses / dec_lens_var loss = torch.mean(batch_avg_loss) loss.backward() norm = clip_grad_norm_(model.encoder.parameters(), config.max_grad_norm) clip_grad_norm_(model.decoder.parameters(), config.max_grad_norm) clip_grad_norm_(model.reduce_state.parameters(), config.max_grad_norm) optimizer.step() running_avg_loss = calc_running_avg_loss(loss.item(), running_avg_loss, summary_writer, iter) iter += 1 summary_writer.flush() # print_interval = 10 # if iter % print_interval == 0: # print(f'steps {iter}, batch number: {i} with {time.time() - start} seconds, loss: {loss}') # start = time.time() if iter % 300 == 0: save_model(model, optimizer, running_avg_loss, iter, config.model_dir)
dev_data = Mydataset(data=dev_data, fields=(('source', source_field), ('target', target_field))) test_data = [{ 'src': src, 'tgt': tgt, 'id': id } for src, tgt, id in zip(test_src, test_tgt, test_id)] test_data = Mydataset(data=test_data, fields=(('source', source_field), ('target', target_field))) setattr(args, 'vectors', source_field.vocab.vectors) setattr(args, 'vocab_size', len(source_field.vocab.itos)) setattr(args, 'emb_dim', vectors.dim) model = Model(args) trainer = Trainer(model=model, args=args, train_dataset=train_data, eval_dataset=dev_data, test_dataset=test_data, vocab=source_field.vocab, is_train=True) trainer.train() # for name in ['train', 'dev', 'test']: # process_incar_data(f'../data/incar_alexa/{name}_public.json') # vocabs = read_vocabs('../data/finished_files/vocab') # print(len(vocabs))