def run(): USE_CUDA = torch.cuda.is_available() config_path = os.path.join("experiments", FLAGS.config) if not os.path.exists(config_path): raise FileNotFoundError with open(config_path, "r") as f: config = json.load(f) config["gpu"] = torch.cuda.is_available() dataset = ToyDataset(5, 15) eval_dataset = ToyDataset(5, 15, type='eval') BATCHSIZE = 30 train_loader = data.DataLoader(dataset, batch_size=BATCHSIZE, shuffle=False, collate_fn=pad_collate, drop_last=True) eval_loader = data.DataLoader(eval_dataset, batch_size=BATCHSIZE, shuffle=False, collate_fn=pad_collate, drop_last=True) config["batch_size"] = BATCHSIZE # Models model = Seq2Seq(config) if USE_CUDA: model = model.cuda() # Optimizer optimizer = torch.optim.Adam(model.parameters(), lr=config.get("learning_rate", .001)) print("=" * 60) print(model) print("=" * 60) for k, v in sorted(config.items(), key=lambda i: i[0]): print(" (" + k + ") : " + str(v)) print() print("=" * 60) print("\nInitializing weights...") for name, param in model.named_parameters(): if 'bias' in name: torch.nn.init.constant_(param, 0.0) elif 'weight' in name: torch.nn.init.xavier_normal_(param) for epoch in range(FLAGS.epochs): run_state = (epoch, FLAGS.epochs, FLAGS.train_size) # Train needs to return model and optimizer, otherwise the model keeps restarting from zero at every epoch # print("My test: ", model('abcd')) model, optimizer = train(model, optimizer, train_loader, run_state) evaluate(model, eval_loader)
def train(vocabs, batch_gen, train_params, model_params): word2int, int2word = vocabs num_epoch = train_params['num_epoch'] learn_rate = train_params['learn_rate'] clip = train_params['clip'] eval_every = train_params['eval_every'] train_tf_ratio = train_params['train_tf_ratio'] val_tf_ratio = train_params['val_tf_ratio'] net = Seq2Seq(vocabs=vocabs, device=device, **model_params).to(device) net.train() opt = optim.Adam(net.parameters(), lr=learn_rate) weights = calc_class_weights(batch_gen.data_dict, batch_gen.label_dict) criterion = nn.CrossEntropyLoss(weight=weights, ignore_index=word2int['<pad>']) print('Training is starting ...') train_loss_list = [] val_loss_list = [] for epoch in range(num_epoch): running_loss = 0 for idx, (x_cap, y_cap) in enumerate(batch_gen.generate('train')): print('\rtrain:{}/{}'.format(idx, batch_gen.num_iter('train')), flush=True, end='') x_cap, y_cap = x_cap.to(device), y_cap.to(device) opt.zero_grad() output = net(x_cap, y_cap, train_tf_ratio) loss = criterion(output.view(-1, output.size(2)), y_cap.view(-1).long()) loss.backward() nn.utils.clip_grad_norm_(net.parameters(), clip) opt.step() running_loss += loss.item() if (idx+1) % eval_every == 0: print('\n') val_loss = evaluate(net, word2int, batch_gen, weights, val_tf_ratio) print("\nEpoch: {}/{}...".format(epoch + 1, num_epoch), "Step: {}...".format(idx), "Loss: {:.4f}...".format(running_loss / idx), "Val Loss: {:.4f}\n".format(val_loss)) print('\nCreating sample captions') sample(net, vocabs, generator=batch_gen.generate('validation')) print('\n') train_loss_list.append(running_loss / idx) val_loss_list.append(val_loss) loss_file = open('results/losses.pkl', 'wb') model_file = open('results/seq2seq.pkl', 'wb') pickle.dump([train_loss_list, val_loss_list], loss_file) pickle.dump(net, model_file) print('Training finished, saving the model') model_file = open('seq2seq.pkl', 'wb') pickle.dump(net, model_file)
def evaluate(): reader = PWKPReader() vocab = Vocabulary.from_files(vocab_dir) iterator = BasicIterator(batch_size=opt.batch_size) iterator.index_with(vocab) model = Seq2Seq(emb_size=opt.emb_size, hidden_size=opt.hidden_size, enc_layers=opt.enc_layers, dec_layers=opt.dec_layers, dropout=opt.dropout, bidirectional=opt.bidirectional, beam_size=opt.beam_size, label_smoothing=opt.label_smoothing, vocab=vocab) model = model.cuda(opt.gpu) model_state = torch.load(opt.restore, map_location=util.device_mapping(-1)) model.load_state_dict(model_state) predictor = Predictor(iterator=iterator, max_decoding_step=opt.max_step, vocab=vocab, reader=reader, data_path=test_path, log_dir=save_dir, map_path=ner_path, cuda_device=opt.gpu) predictor.evaluate(model)
def instantiate_model(model_name, vocab_size, embed_dim, hidden_dim, lr, bidirectional_encoder, max_encoder_len, max_decoder_len, eos_token, device=DEVICE, decoder_num_layers=2, dropout_rate=0.5, embedding_weights=None): attention = None if model_name == SEQ2SEQ: model = Seq2Seq(vocab_size, embed_dim, hidden_dim, max_encoder_len, max_decoder_len, eos_token, bidirectional_encoder=bidirectional_encoder, num_decoder_layers=decoder_num_layers, dropout_rate=dropout_rate, embedding_weights=embedding_weights) attention = Attention(hidden_dim, embed_dim, max_encoder_len) else: raise ValueError('wrong value for model_name') print('model created') model.to(device) if not(attention == None): attention.to(device) print('model moved to device: ', device) optimizer = Adam(model.parameters(), lr=lr) print('optimizer created') loss_function = CrossEntropyLoss(ignore_index=0, reduction='mean') print('loss function created') return model, attention, optimizer, loss_function
def test(test_loader, modelID, showAttn=True): encoder = Encoder(HIDDEN_SIZE_ENC, HEIGHT, WIDTH, Bi_GRU, CON_STEP, FLIP).cuda() decoder = Decoder(HIDDEN_SIZE_DEC, EMBEDDING_SIZE, vocab_size, Attention, TRADEOFF_CONTEXT_EMBED).cuda() seq2seq = Seq2Seq(encoder, decoder, output_max_len, vocab_size).cuda() model_file = 'save_weights/seq2seq-' + str(modelID) + '.model' pretrain_dict = torch.load(model_file) seq2seq_dict = seq2seq.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in seq2seq_dict } seq2seq_dict.update(pretrain_dict) seq2seq.load_state_dict(seq2seq_dict) #load print('Loading ' + model_file) seq2seq.eval() total_loss_t = 0 start_t = time.time() for num, (test_index, test_in, test_in_len, test_out, test_domain) in enumerate(test_loader): lambd = LAMBD test_in, test_out = Variable(test_in, volatile=True).cuda(), Variable( test_out, volatile=True).cuda() test_domain = Variable(test_domain, volatile=True).cuda() output_t, attn_weights_t, out_domain_t = seq2seq(test_in, test_out, test_in_len, lambd, teacher_rate=False, train=False) batch_count_n = writePredict(modelID, test_index, output_t, 'test') test_label = test_out.permute(1, 0)[1:].contiguous().view(-1) if LABEL_SMOOTH: loss_t = crit(log_softmax(output_t.view(-1, vocab_size)), test_label) else: loss_t = F.cross_entropy(output_t.view(-1, vocab_size), test_label, ignore_index=tokens['PAD_TOKEN']) total_loss_t += loss_t.data[0] if showAttn: global_index_t = 0 for t_idx, t_in in zip(test_index, test_in): visualizeAttn(t_in.data[0], test_in_len[0], [j[global_index_t] for j in attn_weights_t], modelID, batch_count_n[global_index_t], 'test_' + t_idx.split(',')[0]) global_index_t += 1 total_loss_t /= (num + 1) writeLoss(total_loss_t, 'test') print(' TEST loss=%.3f, time=%.3f' % (total_loss_t, time.time() - start_t))
def __init__( self, dataloader, params, save_model_every=1, # Every Number of epochs to save after print_every=1000, # Every Number of batches to print after dev_loader=None, #test_loader=None, vocab=None, saver=None, resume_training=False, resume_epoch=None): self.save_model_every = save_model_every self.print_every = print_every self.params = params self.vocab = vocab self.model_name = params[C.MODEL_NAME] self.start_epoch = 0 self.resume_training = resume_training self.lr = None # Data Loaders self.dataloader = dataloader self.dev_loader = dev_loader #self.test_loader = test_loader # Saver and Logger self.saver = saver self.logger = self.saver.logger # Model self.model = Seq2Seq( self.vocab.get_vocab_size(), hsizes(params, self.model_name), params, ) if self.dataloader else None self.logger.log('MODEL : %s' % self.model) self.logger.log('PARAMS: %s' % self.params) # Optimizer and loss metrics if self.resume_training: self.optimizer, self.metrics = self.saver.load_model_and_state( resume_epoch, self.model) self.start_epoch = resume_epoch + 1 else: self.optimizer = None self.metrics = TrainerMetrics(self.logger) self.loss = Loss() if USE_CUDA: if self.model: self.model = self.model.cuda()
def test(test_loader, modelID, showAttn=True): encoder = Encoder(HIDDEN_SIZE_ENC, HEIGHT, WIDTH, Bi_GRU, CON_STEP, FLIP).to(device) decoder = Decoder(HIDDEN_SIZE_DEC, EMBEDDING_SIZE, vocab_size, Attention, TRADEOFF_CONTEXT_EMBED).to(device) seq2seq = Seq2Seq(encoder, decoder, output_max_len, vocab_size).to(device) model_file = 'save_weights/seq2seq-' + str(modelID) + '.model' print('Loading ' + model_file) seq2seq.load_state_dict(torch.load(model_file)) #load seq2seq.eval() total_loss_t = 0 start_t = time.time() with torch.no_grad(): for num, (test_index, test_in, test_in_len, test_out) in enumerate(test_loader): #test_in = test_in.unsqueeze(1) test_in, test_out = test_in.to(device), test_out.to(device) if test_in.requires_grad or test_out.requires_grad: print( 'ERROR! test_in, test_out should have requires_grad=False') output_t, attn_weights_t = seq2seq(test_in, test_out, test_in_len, teacher_rate=False, train=False) batch_count_n = writePredict(modelID, test_index, output_t, 'test') test_label = test_out.permute(1, 0)[1:].reshape(-1) #loss_t = F.cross_entropy(output_t.view(-1, vocab_size), # test_label, ignore_index=tokens['PAD_TOKEN']) #loss_t = loss_label_smoothing(output_t.view(-1, vocab_size), test_label) if LABEL_SMOOTH: loss_t = crit(log_softmax(output_t.reshape(-1, vocab_size)), test_label) else: loss_t = F.cross_entropy(output_t.reshape(-1, vocab_size), test_label, ignore_index=tokens['PAD_TOKEN']) total_loss_t += loss_t.item() if showAttn: global_index_t = 0 for t_idx, t_in in zip(test_index, test_in): visualizeAttn(t_in.detach()[0], test_in_len[0], [j[global_index_t] for j in attn_weights_t], modelID, batch_count_n[global_index_t], 'test_' + t_idx.split(',')[0]) global_index_t += 1 total_loss_t /= (num + 1) writeLoss(total_loss_t, 'test') print(' TEST loss=%.3f, time=%.3f' % (total_loss_t, time.time() - start_t))
def train(dataset, params): batches = list( dataset.get_batch(params.batch_size, params.src_max_length, params.tgt_max_length)) n_batches = (dataset.total_pairs - 1) // params.batch_size + 1 model = Seq2Seq(params, dataset.vocab, dataset.SPECIAL_TOKENS) # define the model model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=params.lr) # use ADAM optimizer for epoch_count in range(1, 1 + params.n_epoch): epoch_loss = 0. prog_bar = tqdm(range(1, n_batches + 1), desc='Epoch %d' % epoch_count) # track the progress model.train() for batch_count in prog_bar: optimizer.zero_grad() batch = batches[batch_count - 1] source_tensor, target_tensor = batch source_tensor = source_tensor.to(device) target_tensor = target_tensor.to(device) # calculate output and losses output_tokens, batch_loss = model(source_tensor, target_tensor) # backward propagation batch_loss.backward() optimizer.step() batch_loss_value = batch_loss.item() epoch_loss += batch_loss_value epoch_avg_loss = epoch_loss / batch_count if batch_count % 100 == 0: prog_bar.set_postfix(loss='%g' % epoch_avg_loss) print("\n") print("Example Article:\n") print("{}\n".format(" ".join( [dataset.vocab[i] for i in source_tensor[:, 0]]))) print("Example Summary:\n") print("{}\n".format(" ".join( [dataset.vocab[i] for i in target_tensor[:, 0]]))) print("Output Summmary:\n") print("{}\n".format(" ".join( [dataset.vocab[i] for i in output_tokens[:, 0]]))) # save model filename = "{}.{}.pt".format(params.model_path_prefix, epoch_count) torch.save(model.state_dict(), filename)
def main(): mkdirs(os.path.join('experiments', 'maskmle', args.model_size)) train_loader, test_loader, valid_loader, vocabulary_size = create_ptb_loader( args.data_dir, args.batch_size, args.seq_len) # Instantiate and init the model, and move it to the GPU model = Seq2Seq(vocabulary_size, model_config) if args.pretrained: model.load_pretrained_weights( os.path.join('experiments', 'lm', args.model_size, 'model_best.pth.tar')) else: print('NO PRETRAINED LANGUAGE MODEL!!') return if CUDA_AVAIL: model = model.cuda() criterion = torch.nn.NLLLoss() # Define optimizer optimizer = torch.optim.SGD(model.parameters(), lr=model_config.learning_rate) lr = model_config.learning_rate best_val_loss = np.inf for e in tqdm(range(model_config.max_max_epoch), desc='Epoch'): model = train(train_loader, model, criterion, optimizer) val_loss = eval(valid_loader, model, criterion) state = { 'arch': "RnnLM", 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } if val_loss < best_val_loss: best_val_loss = val_loss save_checkpoint(state, folder=args.model_size, is_best=True) else: lr /= model_config.lr_decay optimizer = torch.optim.SGD(model.parameters(), lr=lr) save_checkpoint(state, folder=args.model_size) # Test test_loss = eval(test_loader, model, criterion) # Report msg = 'Epoch %d: \tValid loss=%.4f \tTest loss=%.4f \tTest perplexity=%.1f' % ( e + 1, val_loss, test_loss, np.exp(test_loss)) tqdm.write(msg)
def make_gated_dss(config, device): INPUT_DIM = src_vocab_length() OUTPUT_DIM = trg_vocab_length() enc = GatedDSSEncoder(INPUT_DIM, config.hid_dim, config.enc_layers, config.enc_dropout, device) dec = GatedDSSDecoder(OUTPUT_DIM, config.hid_dim, config.dec_layers, config.dec_dropout, device) return Seq2Seq(enc, dec, device, decode_mask_type="sequence").to(device)
def make_dss_enc_transformer_dec(config, device): INPUT_DIM = src_vocab_length() OUTPUT_DIM = trg_vocab_length() enc = DSSEncoder(INPUT_DIM, config.hid_dim, config.enc_layers, config.enc_dropout, device) dec = TransformerDecoder(OUTPUT_DIM, config.hid_dim, config.dec_layers, config.dec_heads, config.dec_pf_dim, config.dec_dropout, device) return Seq2Seq(enc, dec, device).to(device)
def predict(): outputter = tf.gfile.GFile(FLAGS.output_model_path + "/" + FLAGS.result_filename, mode="w") predict_mode = tf.contrib.learn.ModeKeys.INFER if FLAGS.mode == 'predict' else tf.contrib.learn.ModeKeys.EVAL model = Seq2Seq() if predict_mode == tf.contrib.learn.ModeKeys.INFER: if FLAGS.use_mstf_ops: pred_pipe = InputPipe(FLAGS.input_validation_data_path, FLAGS.eval_batch_size, 1, FLAGS.test_fields, "", True) else: pred_pipe = InputPipe(FLAGS.input_validation_data_path, FLAGS.eval_batch_size, 1, FLAGS.test_fields, "0", True) trainer = SingleboxTrainer(model, None, None, None, pred_pipe) else: if FLAGS.use_mstf_ops: red_pipe = InputPipe(FLAGS.input_validation_data_path, FLAGS.eval_batch_size, 1, FLAGS.test_fields, "", True) else: pred_pipe = InputPipe(FLAGS.input_validation_data_path, FLAGS.eval_batch_size, 1, FLAGS.test_fields, "0", True) trainer = SingleboxTrainer(model, None, None, pred_pipe, None) scope = tf.get_variable_scope() scope.reuse_variables() saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True with tf.Session(config=config) as sess: sess.run(tf.local_variables_initializer()) sess.run(tf.tables_initializer()) sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(FLAGS.input_previous_model_path) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Load model from ", ckpt.model_checkpoint_path) else: print("No initial model found.") trainer.predict(sess, predict_mode, outputter) outputter.close()
def __init__(self, weight_path, have_att=False): ENC_EMB_DIM = 256 DEC_EMB_DIM = 256 ENC_HID_DIM = 512 DEC_HID_DIM = 512 ENC_DROPOUT = 0.5 DEC_DROPOUT = 0.5 MAX_LEN = 46 self.maxlen = MAX_LEN self.vocab = Vocab(alphabets) INPUT_DIM = self.vocab.__len__() OUTPUT_DIM = self.vocab.__len__() if have_att: self.model = Seq2Seq(input_dim=INPUT_DIM, output_dim=OUTPUT_DIM, encoder_embbeded=ENC_EMB_DIM, decoder_embedded=DEC_EMB_DIM, encoder_hidden=ENC_HID_DIM, decoder_hidden=DEC_HID_DIM, encoder_dropout=ENC_DROPOUT, decoder_dropout=DEC_DROPOUT) else: self.model = Seq2Seq_WithoutAtt(input_dim=INPUT_DIM, output_dim=OUTPUT_DIM, encoder_embbeded=ENC_EMB_DIM, decoder_embedded=DEC_EMB_DIM, encoder_hidden=ENC_HID_DIM, decoder_hidden=DEC_HID_DIM, encoder_dropout=ENC_DROPOUT, decoder_dropout=DEC_DROPOUT) self.load_weights(weight_path) if torch.cuda.is_available(): self.device = "cuda" self.model.to('cuda') else: self.device = "cpu" print("Device: ", self.device) print("Loaded model")
def __init__(self, hidden_size, num_layers, device='cuda', drop_prob=0, lstm=True, feature_norm=False, input_size=100, output_size=100, bidirectional=True): super().__init__() self.seq2seq = Seq2Seq(hidden_size=hidden_size, num_layers=num_layers, device='cuda', drop_prob=drop_prob, lstm=lstm, feature_norm=feature_norm, input_size=input_size, output_size=output_size, bidirectional=bidirectional)
def main(args): train_dataset = BindingDataset('train', args=args) data_from_train = train_dataset.anony_ques_max_len, train_dataset.anony_query_max_len, train_dataset.anony_ques_vocab, train_dataset.anony_query_vocab args.anony_ques_max_len, args.anony_query_max_len, args.anony_ques_vocab, args.anony_query_vocab = data_from_train print(args.anony_ques_max_len, args.anony_query_max_len, len(args.anony_ques_vocab), args.anony_query_vocab) train_dataloader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=args.shuffle) # build dev_dataloader args.shuffle = False dev_dataset = BindingDataset('dev', args=args, data_from_train=data_from_train) dev_dataloader = DataLoader(dataset=dev_dataset, batch_size=args.batch_size, shuffle=args.shuffle) # build test_dataloader # test_dataset = BindingDataset('test', args=args, data_from_train=data_from_train) # test_dataloader = DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=args.shuffle) # load word embedding # train encoder = Encoder(len(args.anony_ques_vocab), args.word_dim, args.hidden_size, n_layers=2 * args.num_layers, dropout=args.dropout_p) decoder = Decoder(args.word_dim, args.hidden_size, len(args.anony_query_vocab), n_layers=args.num_layers, dropout=args.dropout_p) model = Seq2Seq(encoder, decoder) train(train_dataloader, dev_dataloader, args, model)
def test(dataset, params): batches = list( dataset.get_batch(params.batch_size, params.src_max_length, params.tgt_max_length)) n_batches = (dataset.total_pairs - 1) // params.batch_size + 1 model = Seq2Seq(params, dataset.vocab, dataset.SPECIAL_TOKENS) model = model.to(device) # load model from saved checkpoint model.load_state_dict(torch.load(params.model_path_prefix + ".25.pt")) model.eval() rouge = Rouge() pred_texts = [] target_texts = [] source_texts = [] loss_total = 0. bleu_total = 0. for batch_count, batch in enumerate(batches): source_tensor, target_tensor = batch # get predicted output with torch.no_grad(): source_tensor = source_tensor.to(device) target_tensor = target_tensor.to(device) output_tokens, batch_loss = model.beam_search( source_tensor, params.beam_size) batch_loss_value = batch_loss.item() loss_total += batch_loss_value pred_text = get_raw_texts(output_tokens, vocab=dataset.vocab, special_tokens=dataset.SPECIAL_TOKENS) pred_texts.extend(pred_text) target_text = get_raw_texts(target_tensor, vocab=dataset.vocab, special_tokens=dataset.SPECIAL_TOKENS) target_texts.extend(target_text) source_text = get_raw_texts(source_tensor, vocab=dataset.vocab, special_tokens=dataset.SPECIAL_TOKENS) source_texts.extend(source_text) # calculate bleu score for i in range(params.batch_size): bleu_total += bleu.sentence_bleu([target_text[i]], pred_text[i]) if batch_count % 100 == 0: print("predicting batch {} / total batch {}".format( batch_count + 1, n_batches)) # calculate rouge score scores = rouge.get_scores(pred_texts, target_texts, avg=True, ignore_empty=True) print("Rouge scores:\n {}\n".format(scores)) bleu_avg = bleu_total / dataset.total_pairs print("Bleu average scores:\n {}\n".format(bleu_avg)) loss_average = loss_total / n_batches print("Negative Log Likelihood:\n {}\n".format(loss_average)) for i in range(5): print("Example: {}\n".format(i + 1)) print("Article: {}\n".format(source_texts[i])) print("True Summary: {}\n".format(target_texts[i])) print("Generated Summary: {}\n".format(pred_texts[i]))
def __init__(self, alphabets_, list_ngram): self.vocab = Vocab(alphabets_) self.synthesizer = SynthesizeData(vocab_path="") self.list_ngrams_train, self.list_ngrams_valid = self.train_test_split( list_ngram, test_size=0.1) print("Loaded data!!!") print("Total training samples: ", len(self.list_ngrams_train)) print("Total valid samples: ", len(self.list_ngrams_valid)) INPUT_DIM = self.vocab.__len__() OUTPUT_DIM = self.vocab.__len__() self.device = DEVICE self.num_iters = NUM_ITERS self.beamsearch = BEAM_SEARCH self.batch_size = BATCH_SIZE self.print_every = PRINT_PER_ITER self.valid_every = VALID_PER_ITER self.checkpoint = CHECKPOINT self.export_weights = EXPORT self.metrics = MAX_SAMPLE_VALID logger = LOG if logger: self.logger = Logger(logger) self.iter = 0 self.model = Seq2Seq(input_dim=INPUT_DIM, output_dim=OUTPUT_DIM, encoder_embbeded=ENC_EMB_DIM, decoder_embedded=DEC_EMB_DIM, encoder_hidden=ENC_HID_DIM, decoder_hidden=DEC_HID_DIM, encoder_dropout=ENC_DROPOUT, decoder_dropout=DEC_DROPOUT) self.optimizer = AdamW(self.model.parameters(), betas=(0.9, 0.98), eps=1e-09) self.scheduler = OneCycleLR(self.optimizer, total_steps=self.num_iters, pct_start=PCT_START, max_lr=MAX_LR) self.criterion = LabelSmoothingLoss(len(self.vocab), padding_idx=self.vocab.pad, smoothing=0.1) self.train_gen = self.data_gen(self.list_ngrams_train, self.synthesizer, self.vocab, is_train=True) self.valid_gen = self.data_gen(self.list_ngrams_valid, self.synthesizer, self.vocab, is_train=False) self.train_losses = [] # to device self.model.to(self.device) self.criterion.to(self.device)
def train(): reader = PWKPReader() train_dataset = reader.read(train_path) valid_dataset = reader.read(dev_path) if os.path.exists(vocab_dir): vocab = Vocabulary.from_files(vocab_dir) else: vocab = Vocabulary.from_instances(instances=train_dataset, max_vocab_size=opt.vocab_size) vocab.save_to_files(vocab_dir) iterator = BucketIterator(batch_size=opt.batch_size, sorting_keys=[("src", "num_tokens"), ("tgt", "num_tokens")]) iterator.index_with(vocab) model = Seq2Seq(emb_size=opt.emb_size, hidden_size=opt.hidden_size, enc_layers=opt.enc_layers, dec_layers=opt.dec_layers, dropout=opt.dropout, bidirectional=opt.bidirectional, beam_size=opt.beam_size, label_smoothing=opt.label_smoothing, vocab=vocab) optimizer = optim.Adam(model.parameters(), lr=opt.lr) #learning_rate_scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=1, gamma=opt.lr_decay) val_iterator = BasicIterator(batch_size=opt.batch_size) val_iterator.index_with(vocab) predictor = Predictor(iterator=val_iterator, max_decoding_step=opt.max_step, vocab=vocab, reader=reader, data_path=test_path, log_dir=save_dir, map_path=ner_path, cuda_device=opt.gpu) trainer = Trainer( model=model, optimizer=optimizer, #learning_rate_scheduler=learning_rate_scheduler, learning_rate_decay=opt.lr_decay, ema_decay=opt.ema_decay, predictor=predictor, iterator=iterator, train_dataset=train_dataset, validation_dataset=valid_dataset, validation_metric='+bleu', cuda_device=opt.gpu, num_epochs=opt.epoch, serialization_dir=save_dir, num_serialized_models_to_keep=5, #model_save_interval=60, #summary_interval=500, should_log_parameter_statistics=False, grad_norm=10) trainer.train()
def main(): """Main method to run the models""" args = parse_args() dataset = [] vocab = [] whole_data = [] for x in [ques,query]: dataset.append(VerbalDataset()) #changed - cover_entities dataset[x].load_data_and_fields(cover_entities=True, query_as_input=x) vocab.append(dataset[x].get_vocabs()) whole_data.append(dataset[x].get_data()) src_vocab, trg_vocab = vocab[0] src_vocab_query, trg_vocab_query = vocab[1] train_data_question, valid_data_question, test_data_question = whole_data[0] print("train_data_quer", len(list(train_data_question))) train_data_query, valid_data_query, test_data_query = whole_data[1] save_vocab(trg_vocab) print('--------------------------------') print(f'Model: {args.model}') print(f'Model input: {args.input}') if args.model == RNN_NAME: print(f'Attention: {args.attention}') print(f'Cover entities: {args.cover_entities}') print('--------------------------------') print(f"Training data: {len(train_data_query.examples)}") print(f"Evaluation data: {len(valid_data_query.examples)}") print(f"Testing data: {len(test_data_query.examples)}") print('--------------------------------') print(f'Question example: {train_data_query.examples[0].src}') print(f'Answer example: {train_data_query.examples[0].trg}') print('--------------------------------') print(f"Unique tokens in questions vocabulary: {len(src_vocab_query)}") print(f"Unique tokens in answers vocabulary: {len(trg_vocab_query)}") print('--------------------------------') print(f'Batch: {args.batch_size}') print(f'Epochs: {args.epochs_num}') print('--------------------------------') if args.model == RNN_NAME and args.attention == ATTENTION_1: from models.rnn1 import Encoder, Decoder elif args.model == RNN_NAME and args.attention == ATTENTION_2: from models.rnn2 import Encoder, Decoder elif args.model == CNN_NAME: from models.cnn import Encoder, Decoder elif args.model == TRANSFORMER_NAME: from models.transformer import Encoder, Decoder, NoamOpt # create model encoder = Encoder(src_vocab, DEVICE) encoder_query = Encoder(src_vocab_query, DEVICE) decoder = Decoder(trg_vocab_query, DEVICE) model = Seq2Seq(encoder, encoder_query, decoder, args.model).to(DEVICE) parameters_num = sum(p.numel() for p in model.parameters() if p.requires_grad) print(f'The model has {parameters_num:,} trainable parameters') print('--------------------------------') # create optimizer if model.name == TRANSFORMER_NAME: # initialize model parameters with Glorot / fan_avg for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) optimizer = NoamOpt(torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) else: optimizer = optim.Adam(model.parameters()) # define criterion criterion = nn.CrossEntropyLoss(ignore_index=trg_vocab.stoi[PAD_TOKEN]) # train data trainer = Trainer(optimizer, criterion, args.batch_size, DEVICE) trainer.train(model, train_data_question, train_data_query, valid_data_question, valid_data_query, num_of_epochs=args.epochs_num) # load model model = Chechpoint.load(model) # generate test iterator valid_iterator, test_iterator = BucketIterator.splits( (valid_data_question, test_data_question), repeat=False, batch_size=args.batch_size, sort_within_batch=True if args.model == RNN_NAME else False, sort_key=lambda x: len(x.src), device=DEVICE) valid_iterator_query, test_iterator_query = BucketIterator.splits( (valid_data_query, test_data_query), repeat=False, batch_size=args.batch_size, sort_within_batch=True if args.model == RNN_NAME else False, sort_key=lambda x: len(x.src), device=DEVICE) # evaluate model valid_loss = trainer.evaluator.evaluate(model, valid_iterator, valid_iterator_query) test_loss = trainer.evaluator.evaluate(model, test_iterator, test_iterator_query) # calculate blue score for valid and test data predictor = Predictor(model, src_vocab, src_vocab_query, trg_vocab, DEVICE) valid_scorer = BleuScorer() test_scorer = BleuScorer() valid_scorer.data_score(valid_data_question.examples, valid_data_query.examples, predictor) results, _ = test_scorer.data_score(test_data_question.examples, test_data_query.examples, predictor) for k in results[0:10]: print("reference ", k['reference']) print("hypothesis", k['hypothesis']) print(f'| Val. Loss: {valid_loss:.3f} | Test PPL: {math.exp(valid_loss):7.3f} |') print(f'| Val. Data Average BLEU score {valid_scorer.average_score()} |') print(f'| Val. Data Average METEOR score {valid_scorer.average_meteor_score()} |') print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |') print(f'| Test Data Average BLEU score {test_scorer.average_score()} |') print(f'| Test Data Average METEOR score {test_scorer.average_meteor_score()} |')
print_accuracy_measures(predictions, actuals) if __name__ == "__main__": # Init models models = [] seq2seq = Seq2Seq(name="seq2seq", data_dict=data_dict, batch_size=batch_size, state_size=state_size, input_feature_amount=input_feature_amount, output_feature_amount=output_feature_amount, seq_len_in=seq_len_in, seq_len_out=seq_len_out, plot_time_steps_view=plot_time_steps_view, steps_per_epoch=steps_per_epoch, epochs=epochs, learning_rate=learning_rate, intermediates=intermediates, plot_loss=plot_loss, load_weights_path=load_s2s_weights_path, agg_level=agg_level ) seq2seq_1dconv = Seq2SeqConv(name="seq2seq_1dconv", data_dict=data_dict, batch_size=batch_size, state_size=state_size, input_feature_amount=input_feature_amount, output_feature_amount=output_feature_amount,
def main(train_loader, valid_loader, test_loader): encoder = Encoder(HIDDEN_SIZE_ENC, HEIGHT, WIDTH, Bi_GRU, CON_STEP, FLIP).cuda() decoder = Decoder(HIDDEN_SIZE_DEC, EMBEDDING_SIZE, vocab_size, Attention, TRADEOFF_CONTEXT_EMBED).cuda() seq2seq = Seq2Seq(encoder, decoder, output_max_len, vocab_size).cuda() if CurriculumModelID > 0: model_file = 'save_weights/seq2seq-' + str( CurriculumModelID) + '.model' #model_file = 'save_weights/words/seq2seq-' + str(CurriculumModelID) +'.model' print('Loading ' + model_file) seq2seq.load_state_dict(torch.load(model_file)) #load opt = optim.Adam(seq2seq.parameters(), lr=learning_rate) #opt = optim.SGD(seq2seq.parameters(), lr=learning_rate, momentum=0.9) #opt = optim.RMSprop(seq2seq.parameters(), lr=learning_rate, momentum=0.9) #scheduler = optim.lr_scheduler.StepLR(opt, step_size=20, gamma=1) scheduler = optim.lr_scheduler.MultiStepLR(opt, milestones=lr_milestone, gamma=lr_gamma) epochs = 5000000 if EARLY_STOP_EPOCH is not None: min_loss = 1e3 min_loss_index = 0 min_loss_count = 0 if CurriculumModelID > 0 and WORD_LEVEL: start_epoch = CurriculumModelID + 1 for i in range(start_epoch): scheduler.step() else: start_epoch = 0 for epoch in range(start_epoch, epochs): scheduler.step() lr = scheduler.get_lr()[0] teacher_rate = teacher_force_func(epoch) if TEACHER_FORCING else False start = time.time() loss = train(train_loader, seq2seq, opt, teacher_rate, epoch) writeLoss(loss, 'train') print('epoch %d/%d, loss=%.3f, lr=%.8f, teacher_rate=%.3f, time=%.3f' % (epoch, epochs, loss, lr, teacher_rate, time.time() - start)) if epoch % MODEL_SAVE_EPOCH == 0: folder_weights = 'save_weights' if not os.path.exists(folder_weights): os.makedirs(folder_weights) torch.save(seq2seq.state_dict(), folder_weights + '/seq2seq-%d.model' % epoch) start_v = time.time() loss_v = valid(valid_loader, seq2seq, epoch) writeLoss(loss_v, 'valid') print(' Valid loss=%.3f, time=%.3f' % (loss_v, time.time() - start_v)) if EARLY_STOP_EPOCH is not None: gt = 'RWTH_partition/RWTH.iam_word_gt_final.valid.thresh' decoded = 'pred_logs/valid_predict_seq.' + str(epoch) + '.log' res_cer = sub.Popen(['./tasas_cer.sh', gt, decoded], stdout=sub.PIPE) res_cer = res_cer.stdout.read().decode('utf8') loss_v = float(res_cer) / 100 if loss_v < min_loss: min_loss = loss_v min_loss_index = epoch min_loss_count = 0 else: min_loss_count += 1 if min_loss_count >= EARLY_STOP_EPOCH: print('Early Stopping at: %d. Best epoch is: %d' % (epoch, min_loss_index)) return min_loss_index
raw = torch.load('./dat/processed/raw_states.pt') for index, vects in d.items(): # each is N x 300 input_state, next_state = vects[0], vects[1] # raw strings corresponding to embeddings raw_input_state, raw_next_state = list(raw.keys())[index], raw[list( raw.keys())[index]] print(raw_input_state) print(input_state) print(raw_next_state) print(next_state) if index > 1: break model = Seq2Seq(hidden_size=2, num_layers=2) print(model) for index, vects in d.items(): # each is N x 300 input_state, next_state = vects[0], vects[1] # raw strings corresponding to embeddings raw_input_state, raw_next_state = list(raw.keys())[index], raw[list( raw.keys())[index]] #print(input_state.unsqueeze(0).shape) mu = model(input_state.unsqueeze(0)).detach() #print(mu.shape) # ACTOR FORMAT logstd = torch.zeros_like(mu)
def run(): ## Load Config from JSON file dir_path = os.path.dirname(os.path.realpath(__file__)) config_path = os.path.join(dir_path, "experiment", FLAGS.config) if not os.path.exists(config_path): raise FileNotFoundError if not os.path.exists(FLAGS.data_path): raise FileNotFoundError with open(config_path, "r") as f: config = json.load(f) config["gpu"] = torch.cuda.is_available() ## Load Data df = dl.load_raw_text_file(FLAGS.data_path, num_examples=30000) # index language for Input and Output inp_index = LanguageIndex(phrases=df["es"].values) targ_index = LanguageIndex(df["eng"].values) vocab_inp_size = len(inp_index.word2idx) vocab_tar_size = len(targ_index.word2idx) # Convert Sentences into tokenized tensors input_tensor, target_tensor = dl.convert_tensor(df, inp_index, targ_index) # Split to training and test set input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split( input_tensor, target_tensor, test_size=0.2) train_dataset = MyData(input_tensor_train, target_tensor_train) val_dataset = MyData(input_tensor_val, target_tensor_val) # Conver to DataLoader Object train_dataset = data.DataLoader(train_dataset, batch_size=config['batch_size'], drop_last=True, shuffle=True) eval_dataset = data.DataLoader(val_dataset, batch_size=config['batch_size'], drop_last=False, shuffle=True) # Models model = Seq2Seq(config, vocab_inp_size, vocab_tar_size) scorer = create_scorer(config['metrics']) if config['gpu']: model = model.cuda() # Optimizer optimizer = torch.optim.Adam(model.parameters(), lr=config.get("learning_rate", .001)) for name, param in model.named_parameters(): if 'bias' in name: torch.nn.init.constant_(param, 0.0) elif 'weight' in name: torch.nn.init.xavier_normal_(param) print("Weight Initialized") ## Train and Evaluate over epochs all_train_avg_loss = [] all_eval_avg_loss = [] all_eval_avg_acc = [] for epoch in range(FLAGS.epochs): run_state = (epoch, FLAGS.epochs) # Train needs to return model and optimizer, otherwise the model keeps restarting from zero at every epoch model, optimizer, train_avg_loss = train(model, optimizer, train_dataset, run_state, config['debug']) all_train_avg_loss.append(train_avg_loss) # Return Val Set Loss and Accuracy eval_avg_loss, eval_acc = evaluate(model, eval_dataset, targ_index, scorer, config['debug']) all_eval_avg_loss.append(eval_avg_loss) all_eval_avg_acc.append(eval_acc) # Save Model Checkpoint checkpoint_dict = { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': eval_avg_loss, } checkpoint_path = '{}/epoch_{:0.0f}_val_loss_{:0.3f}.pt'.format( FLAGS.model_checkpoint_dir, epoch, eval_avg_loss) torch.save(checkpoint_dict, checkpoint_path) # Export Model Learning Curve Info df = pd.DataFrame({ 'epoch': range(FLAGS.epochs), 'train_loss': all_train_avg_loss, 'eval_loss': all_eval_avg_loss, 'eval_acc': all_eval_avg_acc }) now = datetime.now() current_time = now.strftime("%Y%m%d%H%M%S") export_path = '{}/{}_{:0.0f}_bz_{}_val_loss_{:0.3f}.csv'.format( FLAGS.metrics_dir, current_time, FLAGS.epochs, config['batch_size'], eval_avg_loss) df.to_csv(export_path, index=False)
logger.log('Converting data entries to tensors') tensor_builder = TensorBuilder(input_lang, output_lang) train_pairs = [ tensor_builder.tensorsFromPair(pair) for pair in train_pairs ] valid_pairs = [ tensor_builder.tensorsFromPair(pair) for pair in valid_pairs ] test_pairs = [ tensor_builder.tensorsFromPair(pair) for pair in test_pairs ] logger.log('Building the model') model = Seq2Seq(input_size=input_lang.n_words, output_size=output_lang.n_words, hidden_size=constants.HIDDEN_SIZE, learning_rate=constants.LEARNING_RATE, teacher_forcing_ratio=constants.TEACHER_FORCING_RATIO, device=constants.DEVICE) logger.log(str(model)) logger.log('Initializing evaluators') evaluator = Evaluator(valid_pairs, input_lang, output_lang) test_set_evaluator = Evaluator(test_pairs, input_lang, output_lang) except Exception as e: # Log the error message and raise it again so see more info logger.log("Error: " + str(e)) raise e successful = False
input_lengths[-2] = Ti - 2 padded_input[-1, -3:, ] = 0 input_lengths[-1] = Ti - 3 encoder = Encoder(D, H, Li, bidirectional=B, rnn_type=R) # Decoder VOC, EMB, SOS, EOS, L = 10, 3, 8, 9, 2 H = H * 2 if B else H padded_target = torch.randint(10, (N, To), dtype=torch.long) # N x To padded_target[-1, -3:] = IGNORE_ID decoder = Decoder(VOC, EMB, SOS, EOS, H, L) # Seq2Seq seq2seq = Seq2Seq(encoder, decoder) loss = seq2seq(padded_input, input_lengths, padded_target) print(loss) # print(decoder_outputs) # print("To+1 =", len(decoder_outputs)) # print("N, V =", decoder_outputs[0].size()) import argparse beam_size = 5 nbest = 5 defaults = dict(beam_size=beam_size, nbest=nbest, decode_max_len=0) args = argparse.Namespace(**defaults) char_list = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] for i in range(3): print("\n***** Utt", i + 1) Ti = i + 20
def main(all_data_loader_func): encoder = Encoder(HIDDEN_SIZE_ENC, HEIGHT, WIDTH, Bi_GRU, CON_STEP, FLIP).cuda() decoder = Decoder(HIDDEN_SIZE_DEC, EMBEDDING_SIZE, vocab_size, Attention, TRADEOFF_CONTEXT_EMBED).cuda() seq2seq = Seq2Seq(encoder, decoder, output_max_len, vocab_size).cuda() if CurriculumModelID > 0: model_file = 'save_weights/seq2seq-' + str( CurriculumModelID) + '.model' print('Loading ' + model_file) pretrain_dict = torch.load(model_file) seq2seq_dict = seq2seq.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in seq2seq_dict } seq2seq_dict.update(pretrain_dict) seq2seq.load_state_dict(seq2seq_dict) #load opt = optim.Adam(seq2seq.parameters(), lr=learning_rate) scheduler = optim.lr_scheduler.MultiStepLR(opt, milestones=lr_milestone, gamma=lr_gamma) epochs = 5000 if EARLY_STOP_EPOCH is not None: min_loss = 1e3 min_loss_index = 0 min_loss_count = 0 if CurriculumModelID > 0: start_epoch = CurriculumModelID + 1 for i in range(start_epoch): scheduler.step() else: start_epoch = 0 for epoch in range(start_epoch, epochs): # each epoch, random sample training set to be balanced with unlabeled test set train_loader, valid_loader, test_loader = all_data_loader_func() scheduler.step() lr = scheduler.get_lr()[0] teacher_rate = teacher_force_func(epoch) if TEACHER_FORCING else False start = time.time() lambd = return_lambda(epoch) loss, loss_d = train(train_loader, seq2seq, opt, teacher_rate, epoch, lambd) writeLoss(loss, 'train') writeLoss(loss_d, 'domain_train') print( 'epoch %d/%d, loss=%.3f, domain_loss=%.3f, lr=%.6f, teacher_rate=%.3f, lambda_pau=%.3f, time=%.3f' % (epoch, epochs, loss, loss_d, lr, teacher_rate, lambd, time.time() - start)) if epoch % MODEL_SAVE_EPOCH == 0: folder_weights = 'save_weights' if not os.path.exists(folder_weights): os.makedirs(folder_weights) torch.save(seq2seq.state_dict(), folder_weights + '/seq2seq-%d.model' % epoch) start_v = time.time() loss_v, loss_v_d = valid(valid_loader, seq2seq, epoch) writeLoss(loss_v, 'valid') writeLoss(loss_v_d, 'domain_valid') print(' Valid loss=%.3f, domain_loss=%.3f, time=%.3f' % (loss_v, loss_v_d, time.time() - start_v)) test(test_loader, epoch, False) #~~~~~~ if EARLY_STOP_EPOCH is not None: gt = loadData.GT_TE decoded = 'pred_logs/valid_predict_seq.' + str(epoch) + '.log' res_cer = sub.Popen(['./tasas_cer.sh', gt, decoded], stdout=sub.PIPE) res_cer = res_cer.stdout.read().decode('utf8') loss_v = float(res_cer) / 100 if loss_v < min_loss: min_loss = loss_v min_loss_index = epoch min_loss_count = 0 else: min_loss_count += 1 if min_loss_count >= EARLY_STOP_EPOCH: print('Early Stopping at: %d. Best epoch is: %d' % (epoch, min_loss_index)) return min_loss_index
def run(): USE_CUDA = torch.cuda.is_available() FLAGS.config = 'example_seq2seq.json' config_path = os.path.join("experiments", FLAGS.config) print(FLAGS.config) if not os.path.exists(config_path): raise FileNotFoundError with open(config_path, "r") as f: config = json.load(f) config["gpu"] = torch.cuda.is_available() writer = SummaryWriter('experiments/finally') # dataset = ToyDataset(5, 15) # eval_dataset = ToyDataset(5, 15, type='eval') dataset = Toy_Numbers(10) eval_dataset = Toy_Numbers(10, train=False) BATCHSIZE = 32 train_loader = data.DataLoader(dataset, batch_size=BATCHSIZE, shuffle=False, collate_fn=pad_collate, drop_last=True) eval_loader = data.DataLoader(eval_dataset, batch_size=BATCHSIZE, shuffle=False, collate_fn=pad_collate, drop_last=True) config["batch_size"] = BATCHSIZE # Models model = Seq2Seq(config) model = model.float() # dataiter = iter(train_loader) # sample_input= dataiter.next() # writer.add_graph(model, sample_input) # writer.close() if USE_CUDA: model = model.cuda() # Optimizer optimizer = torch.optim.Adam(model.parameters(), lr=config.get("learning_rate", .001)) print("=" * 60) print(model) print("=" * 60) for k, v in sorted(config.items(), key=lambda i: i[0]): print(" (" + k + ") : " + str(v)) print() print("=" * 60) print("\nInitializing weights...") for name, param in model.named_parameters(): if 'bias' in name: torch.nn.init.constant_(param, 0.0) elif 'weight' in name: torch.nn.init.xavier_normal_(param) for epoch in range(FLAGS.epochs): run_state = (epoch, FLAGS.epochs, FLAGS.train_size) # Train needs to return model and optimizer, otherwise the model keeps restarting from zero at every epoch model, optimizer = train(model, optimizer, train_loader, run_state, writer) # print("losses", l_list) # for i in l_list: # # print(i) # writer.add_scalar('Loss/train',i) evaluate(model, eval_loader, writer)
def train(): with tf.Graph().as_default(), tf.device('/cpu:0'): tf.gfile.Copy(FLAGS.input_previous_model_path + "/" + FLAGS.decoder_vocab_file, FLAGS.output_model_path + "/" + FLAGS.decoder_vocab_file, overwrite=True) global_step = tf.train.get_or_create_global_step() inc_step = tf.assign_add(global_step, 1) #Training setting if FLAGS.use_mstf_ops: train_input_pipe = InputPipe([ FLAGS.input_training_data_path + "/" + i for i in tf.gfile.ListDirectory(FLAGS.input_training_data_path) ], FLAGS.batch_size, FLAGS.num_epochs, 2, "", False) auc_eval_pipe = InputPipe( FLAGS.input_validation_data_path + "/label_data.txt", FLAGS.eval_batch_size, 1, 3, "", False) if FLAGS.auc_evaluation else None bleu_eval_pipe = InputPipe( FLAGS.input_validation_data_path + "/bleu_data.txt", FLAGS.eval_batch_size, 1, 2, "", False) if FLAGS.bleu_evaluation else None else: train_input_pipe = InputPipe([ FLAGS.input_training_data_path + "/" + i for i in tf.gfile.ListDirectory(FLAGS.input_training_data_path) ], FLAGS.batch_size, FLAGS.num_epochs, 2, "0", False) auc_eval_pipe = InputPipe( FLAGS.input_validation_data_path + "/label_data.txt", FLAGS.eval_batch_size, 1, 3, "0", False) if FLAGS.auc_evaluation else None bleu_eval_pipe = InputPipe( FLAGS.input_validation_data_path + "/bleu_data.txt", FLAGS.eval_batch_size, 1, 2, "0", False) if FLAGS.bleu_evaluation else None model = Seq2Seq() trainer = SingleboxTrainer(model, inc_step, train_input_pipe, auc_eval_pipe, bleu_eval_pipe) summary_op = tf.summary.merge_all() config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True saver = tf.train.Saver(max_to_keep=FLAGS.max_model_to_keep, name='model_saver') with tf.Session(config=config) as session: summ_writer = tf.summary.FileWriter(FLAGS.log_dir, session.graph) #Load Pretrain session.run(tf.local_variables_initializer()) session.run(tf.global_variables_initializer()) session.run(tf.tables_initializer()) session.run(train_input_pipe.iterator.initializer) ckpt = tf.train.get_checkpoint_state( FLAGS.input_previous_model_path) if ckpt and ckpt.model_checkpoint_path: saver.restore(session, ckpt.model_checkpoint_path) print("Load Model From ", ckpt.model_checkpoint_path) else: print("No Initial Model Found.") trainer.start_time = time.time() while True: try: _, avg_loss, total_weight, step, summary = session.run( trainer.train_ops() + [summary_op]) #print(step) if step % FLAGS.log_frequency == 1: summ_writer.add_summary(summary, step) trainer.print_log(total_weight, step, avg_loss) if step % FLAGS.checkpoint_frequency == 1: if FLAGS.auc_evaluation: trainer.eval(step, session, 'auc') if FLAGS.bleu_evaluation: trainer.eval(step, session, 'bleu') if trainer.improved(): saver.save(session, FLAGS.output_model_path + "/seq2seq_model", global_step=step) elif trainer.early_stop(): print("\nEarly stop") break except tf.errors.OutOfRangeError: print("End of training.") break if not trainer.early_stop(): saver.save(session, FLAGS.output_model_path + "/" + "seq2seq_model_final", global_step=step)
def main(): # config for training config = Config() print("Normal train config:") pp(config) valid_config = Config() valid_config.dropout = 0 valid_config.batch_size = 20 # config for test test_config = Config() test_config.dropout = 0 test_config.batch_size = 1 with_sentiment = config.with_sentiment ############################################################################### # Load data ############################################################################### # sentiment data path: ../ final_data / poem_with_sentiment.txt # 该path必须命令行显示输入LoadPoem,因为defaultNonehjk # 处理pretrain数据和完整诗歌数据 # api = LoadPoem(args.train_data_dir, args.test_data_dir, args.max_vocab_size) api = LoadPoem(corpus_path=args.train_data_dir, test_path=args.test_data_dir, max_vocab_cnt=config.max_vocab_cnt, with_sentiment=with_sentiment) # 交替训练,准备大数据集 poem_corpus = api.get_tokenized_poem_corpus( type=1 + int(with_sentiment)) # corpus for training and validation test_data = api.get_tokenized_test_corpus() # 测试数据 # 三个list,每个list中的每一个元素都是 [topic, last_sentence, current_sentence] train_poem, valid_poem, test_poem = poem_corpus["train"], poem_corpus[ "valid"], test_data["test"] train_loader = SWDADataLoader("Train", train_poem, config) valid_loader = SWDADataLoader("Valid", valid_poem, config) test_loader = SWDADataLoader("Test", test_poem, config) print("Finish Poem data loading, not pretraining or alignment test") if not args.forward_only: # LOG # log_start_time = str(datetime.now().strftime('%Y%m%d%H%M')) if not os.path.isdir('./output'): os.makedirs('./output') if not os.path.isdir('./output/{}'.format(args.expname)): os.makedirs('./output/{}'.format(args.expname)) if not os.path.isdir('./output/{}/{}'.format(args.expname, log_start_time)): os.makedirs('./output/{}/{}'.format(args.expname, log_start_time)) # save arguments json.dump( vars(args), open( './output/{}/{}/args.json'.format(args.expname, log_start_time), 'w')) logger = logging.getLogger(__name__) logging.basicConfig(level=logging.DEBUG, format="%(message)s") fh = logging.FileHandler("./output/{}/{}/logs.txt".format( args.expname, log_start_time)) # add the handlers to the logger logger.addHandler(fh) logger.info(vars(args)) tb_writer = SummaryWriter("./output/{}/{}/tb_logs".format( args.expname, log_start_time)) if args.visual else None if config.reload_model: model = load_model(config.model_name) else: if args.model == "mCVAE": model = CVAE_GMP(config=config, api=api) elif args.model == 'CVAE': model = CVAE(config=config, api=api) else: model = Seq2Seq(config=config, api=api) if use_cuda: model = model.cuda() # if corpus.word2vec is not None and args.reload_from<0: # print("Loaded word2vec") # model.embedder.weight.data.copy_(torch.from_numpy(corpus.word2vec)) # model.embedder.weight.data[0].fill_(0) ############################################################################### # Start training ############################################################################### # model依然是PoemWAE_GMP保持不变,只不过,用这部分数据强制训练其中一个高斯先验分布 # pretrain = True cur_best_score = { 'min_valid_loss': 100, 'min_global_itr': 0, 'min_epoch': 0, 'min_itr': 0 } train_loader.epoch_init(config.batch_size, shuffle=True) # model = load_model(3, 3) epoch_id = 0 global_t = 0 while epoch_id < config.epochs: while True: # loop through all batches in training data # train一个batch model, finish_train, loss_records, global_t = \ train_process(global_t=global_t, model=model, train_loader=train_loader, config=config, sentiment_data=with_sentiment) if finish_train: test_process(model=model, test_loader=test_loader, test_config=test_config, logger=logger) # evaluate_process(model=model, valid_loader=valid_loader, log_start_time=log_start_time, global_t=global_t, epoch=epoch_id, logger=logger, tb_writer=tb_writer, api=api) # save model after each epoch save_model(model=model, epoch=epoch_id, global_t=global_t, log_start_time=log_start_time) logger.info( 'Finish epoch %d, current min valid loss: %.4f \ correspond epoch: %d itr: %d \n\n' % (cur_best_score['min_valid_loss'], cur_best_score['min_global_itr'], cur_best_score['min_epoch'], cur_best_score['min_itr'])) # 初始化下一个unlabeled data epoch的训练 # unlabeled_epoch += 1 epoch_id += 1 train_loader.epoch_init(config.batch_size, shuffle=True) break # elif batch_idx >= start_batch + config.n_batch_every_iter: # print("Finish unlabel epoch %d batch %d to %d" % # (unlabeled_epoch, start_batch, start_batch + config.n_batch_every_iter)) # start_batch += config.n_batch_every_iter # break # 写一下log if global_t % config.log_every == 0: log = 'Epoch id %d: step: %d/%d: ' \ % (epoch_id, global_t % train_loader.num_batch, train_loader.num_batch) for loss_name, loss_value in loss_records: if loss_name == 'avg_lead_loss': continue log = log + loss_name + ':%.4f ' % loss_value if args.visual: tb_writer.add_scalar(loss_name, loss_value, global_t) logger.info(log) # valid if global_t % config.valid_every == 0: # test_process(model=model, test_loader=test_loader, test_config=test_config, logger=logger) valid_process( global_t=global_t, model=model, valid_loader=valid_loader, valid_config=valid_config, unlabeled_epoch= epoch_id, # 如果sample_rate_unlabeled不是1,这里要在最后加一个1 tb_writer=tb_writer, logger=logger, cur_best_score=cur_best_score) # if batch_idx % (train_loader.num_batch // 3) == 0: # test_process(model=model, test_loader=test_loader, test_config=test_config, logger=logger) if global_t % config.test_every == 0: test_process(model=model, test_loader=test_loader, test_config=test_config, logger=logger) # forward_only 测试 else: expname = 'sentInput' time = '202101191105' model = load_model( './output/{}/{}/model_global_t_13596_epoch3.pckl'.format( expname, time)) test_loader.epoch_init(test_config.batch_size, shuffle=False) if not os.path.exists('./output/{}/{}/test/'.format(expname, time)): os.mkdir('./output/{}/{}/test/'.format(expname, time)) output_file = [ open('./output/{}/{}/test/output_0.txt'.format(expname, time), 'w'), open('./output/{}/{}/test/output_1.txt'.format(expname, time), 'w'), open('./output/{}/{}/test/output_2.txt'.format(expname, time), 'w') ] poem_count = 0 predict_results = {0: [], 1: [], 2: []} titles = {0: [], 1: [], 2: []} sentiment_result = {0: [], 1: [], 2: []} # Get all poem predictions while True: model.eval() batch = test_loader.next_batch_test() # test data使用专门的batch poem_count += 1 if poem_count % 10 == 0: print("Predicted {} poems".format(poem_count)) if batch is None: break title_list = batch # batch size是1,一个batch写一首诗 title_tensor = to_tensor(title_list) # test函数将当前batch对应的这首诗decode出来,记住每次decode的输入context是上一次的结果 for i in range(3): sentiment_label = np.zeros(1, dtype=np.int64) sentiment_label[0] = int(i) sentiment_label = to_tensor(sentiment_label) output_poem, output_tokens = model.test( title_tensor, title_list, sentiment_label=sentiment_label) titles[i].append(output_poem.strip().split('\n')[0]) predict_results[i] += (np.array(output_tokens)[:, :7].tolist()) # Predict sentiment use the sort net from collections import defaultdict neg = defaultdict(int) neu = defaultdict(int) pos = defaultdict(int) total = defaultdict(int) for i in range(3): _, neg[i], neu[i], pos[i] = test_sentiment(predict_results[i]) total[i] = neg[i] + neu[i] + pos[i] for i in range(3): print("%d%%\t%d%%\t%d%%" % (neg * 100 / total, neu * 100 / total, pos * 100 / total)) for i in range(3): write_predict_result_to_file(titles[i], predict_results[i], sentiment_result[i], output_file[i]) output_file[i].close() print("Done testing")
def train_net(args): torch.manual_seed(7) np.random.seed(7) checkpoint = args.checkpoint start_epoch = 0 best_loss = float('inf') writer = SummaryWriter() epochs_since_improvement = 0 # Initialize / load checkpoint if checkpoint is None: # model encoder = Encoder(args.einput, args.ehidden, args.elayer, dropout=args.edropout, bidirectional=args.ebidirectional, rnn_type=args.etype) decoder = Decoder(vocab_size, args.dembed, sos_id, eos_id, args.dhidden, args.dlayer, bidirectional_encoder=args.ebidirectional) model = Seq2Seq(encoder, decoder) print(model) model.cuda() optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] logger = get_logger() # Custom dataloaders train_dataset = AiShellDataset(args, 'train') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, collate_fn=pad_collate, pin_memory=True, shuffle=True, num_workers=num_workers) valid_dataset = AiShellDataset(args, 'dev') valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=args.batch_size, collate_fn=pad_collate, pin_memory=True, shuffle=False, num_workers=num_workers) # Epochs for epoch in range(start_epoch, args.epochs): # Halving learning rate when get small improvement if args.half_lr and epochs_since_improvement > 0: adjust_learning_rate(optimizer, 0.5) # One epoch's training train_loss = train(train_loader=train_loader, model=model, optimizer=optimizer, epoch=epoch, logger=logger) writer.add_scalar('Train_Loss', train_loss, epoch) lr = get_learning_rate(optimizer) print('Learning rate: {}\n'.format(lr)) writer.add_scalar('Learning_Rate', lr, epoch) # One epoch's validation valid_loss = valid(valid_loader=valid_loader, model=model, logger=logger) writer.add_scalar('Valid_Loss', valid_loss, epoch) # Check if there was an improvement is_best = valid_loss < best_loss best_loss = min(valid_loss, best_loss) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,)) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(epoch, epochs_since_improvement, model, optimizer, best_loss, is_best)