def train(train_feats, train_caps, val_feats, val_caps, train_prefix="", val_prefix="", epochs=EPOCHS, batch_size=BATCH_SIZE, max_seq_len=MAX_LEN, hidden_dim=HIDDEN_DIM, emb_dim=EMB_DIM, enc_seq_len=ENC_SEQ_LEN, enc_dim=ENC_DIM, clip_val=CLIP_VAL, teacher_force=TEACHER_FORCE_RAT, dropout_p=0.1, attn_activation="relu", epsilon=0.0005, weight_decay=WEIGHT_DECAY, lr=LEARNING_RATE, early_stopping=True, scheduler="step", attention=None, deep_out=False, checkpoint="", out_dir="Pytorch_Exp_Out", decoder=None): print("EXPERIMENT START ", time.asctime()) if not os.path.exists(out_dir): os.mkdir(out_dir) # 1. Load the data train_captions = open(train_caps, mode='r', encoding='utf-8') \ .read().strip().split('\n') train_features = open(train_feats, mode='r').read().strip().split('\n') train_features = [os.path.join(train_prefix, z) for z in train_features] assert len(train_captions) == len(train_features) if val_caps: val_captions = open(val_caps, mode='r', encoding='utf-8') \ .read().strip().split('\n') val_features = open(val_feats, mode='r').read().strip().split('\n') val_features = [os.path.join(val_prefix, z) for z in val_features] assert len(val_captions) == len(val_features) # 2. Preprocess the data train_captions = normalize_strings(train_captions) train_data = list(zip(train_captions, train_features)) train_data = filter_inputs(train_data) print("Total training instances: ", len(train_data)) if val_caps: val_captions = normalize_strings(val_captions) val_data = list(zip(val_captions, val_features)) val_data = filter_inputs(val_data) print("Total validation instances: ", len(val_data)) vocab = Vocab() vocab.build_vocab(map(lambda x: x[0], train_data), max_size=10000) vocab.save(path=os.path.join(out_dir, 'vocab.txt')) print("Vocabulary size: ", vocab.n_words) # 3. Initialize the network, optimizer & loss function net = Network(hid_dim=hidden_dim, out_dim=vocab.n_words, sos_token=0, eos_token=1, pad_token=2, teacher_forcing_rat=teacher_force, emb_dim=emb_dim, enc_seq_len=enc_seq_len, enc_dim=enc_dim, dropout_p=dropout_p, deep_out=deep_out, decoder=decoder, attention=attention) net.to(DEVICE) if checkpoint: net.load_state_dict(torch.load(checkpoint)) optimizer = torch.optim.Adam(net.parameters(), lr=lr, weight_decay=weight_decay) loss_function = nn.NLLLoss() scheduler = set_scheduler(scheduler, optimizer) # 4. Train prev_val_l = sys.maxsize total_instances = 0 total_steps = 0 train_loss_log = [] train_loss_log_batches = [] train_penalty_log = [] val_loss_log = [] val_loss_log_batches = [] val_bleu_log = [] prev_bleu = sys.maxsize train_data = DataLoader(captions=map(lambda x: x[0], train_data), sources=map(lambda x: x[1], train_data), batch_size=batch_size, vocab=vocab, max_seq_len=max_seq_len) if val_caps: val_data = DataLoader(captions=map(lambda x: x[0], val_data), sources=map(lambda x: x[1], val_data), batch_size=batch_size, vocab=vocab, max_seq_len=max_seq_len, val_multiref=True) training_start_time = time.time() for e in range(1, epochs + 1): print("Epoch ", e) tfr = _teacher_force(epochs, e, teacher_force) # train one epoch train_l, inst, steps, t, l_log, pen = train_epoch( model=net, loss_function=loss_function, optimizer=optimizer, data_iter=train_data, max_len=max_seq_len, clip_val=clip_val, epsilon=epsilon, teacher_forcing_rat=tfr) if scheduler is not None: scheduler.step() # epoch logs print("Training loss:\t", train_l) print("Instances:\t", inst) print("Steps:\t", steps) hours = t // 3600 mins = (t % 3600) // 60 secs = (t % 60) print("Time:\t{0}:{1}:{2}".format(hours, mins, secs)) total_instances += inst total_steps += steps train_loss_log.append(train_l) train_loss_log_batches += l_log train_penalty_log.append(pen) print() # evaluate if val_caps: val_l, l_log, bleu = evaluate(model=net, loss_function=loss_function, data_iter=val_data, max_len=max_seq_len, epsilon=epsilon) # validation logs print("Validation loss: ", val_l) print("Validation BLEU-4: ", bleu) if bleu > prev_bleu: torch.save(net.state_dict(), os.path.join(out_dir, 'net.pt')) val_loss_log.append(val_l) val_bleu_log.append(bleu) val_loss_log_batches += l_log #sample model print("Sampling training data...") print() samples = sample(net, train_data, vocab, samples=3, max_len=max_seq_len) for t, s in samples: print("Target:\t", t) print("Predicted:\t", s) print() # if val_caps: # print("Sampling validation data...") # print() # samples = sample(net, val_data, vocab, samples=3, max_len=max_seq_len) # for t, s in samples: # print("Target:\t", t) # print("Predicted:\t", s) # print() if val_caps: # If the validation loss after this epoch increased from the # previous epoch, wrap training. if prev_bleu > bleu and early_stopping: print("\nWrapping training after {0} epochs.\n".format(e + 1)) break prev_val_l = val_l prev_bleu = bleu # Experiment summary logs. tot_time = time.time() - training_start_time hours = tot_time // 3600 mins = (tot_time % 3600) // 60 secs = (tot_time % 60) print("Total training time:\t{0}:{1}:{2}".format(hours, mins, secs)) print("Total training instances:\t", total_instances) print("Total training steps:\t", total_steps) print() _write_loss_log("train_loss_log.txt", out_dir, train_loss_log) _write_loss_log("train_loss_log_batches.txt", out_dir, train_loss_log_batches) _write_loss_log("train_penalty.txt", out_dir, train_penalty_log) if val_caps: _write_loss_log("val_loss_log.txt", out_dir, val_loss_log) _write_loss_log("val_loss_log_batches.txt", out_dir, val_loss_log_batches) _write_loss_log("val_bleu4_log.txt", out_dir, val_bleu_log) print("EXPERIMENT END ", time.asctime())
model_weight_paths = get_model_weight_paths(ckpt_dir, args.num) for epoch_index, (epoch_number, weights_path) in enumerate(model_weight_paths): logger.info('Starting epoch: {}'.format(epoch_number)) assert osp.exists( weights_path), 'path to weights: {} was not found'.format( weights_path) state_dict = torch.load(weights_path, map_location=lambda storage, loc: storage) if 'model' in state_dict.keys(): state_dict = state_dict['model'] model.load_state_dict(state_dict, strict=True) model = model.to(device) model = model.eval() logger.info('weights loaded from path: {}'.format(weights_path)) logger.info('for epoch: {}'.format(epoch_number)) Hess = FullHessian(crit='CrossEntropyLoss', loader=loader, device=device, model=model, num_classes=C, hessian_type='Hessian', init_poly_deg=64, poly_deg=128, spectrum_margin=0.05, poly_points=1024,
def run(test_dir, test_srcs, checkpoint, vocab, out="captions.out.txt", batch_size=16, max_seq_len=MAX_LEN, hidden_dim=HIDDEN_DIM, emb_dim=EMB_DIM, enc_seq_len=ENC_SEQ_LEN, enc_dim=ENC_DIM, attn_activation="relu", deep_out=False, decoder=4, attention=3): if decoder == 1: decoder = models.AttentionDecoder_1 elif decoder == 2: decoder = models.AttentionDecoder_2 elif decoder == 3: decoder = models.AttentionDecoder_3 elif decoder == 4: decoder = models.AttentionDecoder_4 if attention == 1: attention = attentions.AdditiveAttention elif attention == 2: attention = attentions.GeneralAttention elif attention == 3: attention = attentions.ScaledGeneralAttention # load vocabulary vocabulary = Vocab() vocabulary.load(vocab) # load test instances file paths srcs = open(test_srcs).read().strip().split('\n') srcs = [os.path.join(test_dir, s) for s in srcs] # load model net = Network(hid_dim=hidden_dim, out_dim=vocabulary.n_words, sos_token=0, eos_token=1, pad_token=2, emb_dim=emb_dim, enc_seq_len=enc_seq_len, enc_dim=enc_dim, deep_out=deep_out, attention=attention, decoder=decoder) net.to(DEVICE) net.load_state_dict(torch.load(checkpoint)) net.eval() with torch.no_grad(): # run inference num_instances = len(srcs) i = 0 captions = [] while i < num_instances: srcs_batch = srcs[i:i + batch_size] batch = _load_batch(srcs_batch) batch = batch.to(DEVICE) tokens, _ = net(batch, targets=None, max_len=max_seq_len) tokens = tokens.permute(1, 0, 2).detach() _, topi = tokens.topk(1, dim=2) topi = topi.squeeze(2) # decode token output from the model for j in range(len(srcs_batch)): c = vocabulary.tensor_to_sentence(topi[j]) c = ' '.join(c) captions.append(c) i += len(srcs_batch) out_f = open(out, mode='w') for c in captions: out_f.write(c + '\n') return