def train(model_path=None): dataloader = DataLoader(Augmentation()) encoder = Encoder() dict_len = len(dataloader.data.dictionary) decoder = DecoderWithAttention(dict_len) if cuda: encoder = encoder.cuda() decoder = decoder.cuda() # if model_path: # text_generator.load_state_dict(torch.load(model_path)) train_iter = 1 encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=cfg.encoder_learning_rate) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=cfg.decoder_learning_rate) val_bleu = list() losses = list() while True: batch_image, batch_label = dataloader.get_next_batch() batch_image = torch.from_numpy(batch_image).type(torch.FloatTensor) batch_label = torch.from_numpy(batch_label).type(torch.LongTensor) if cuda: batch_image = batch_image.cuda() batch_label = batch_label.cuda() # print(batch_image.size()) # print(batch_label.size()) print('Training') output = encoder(batch_image) # print('encoder output:', output.size()) predictions, alphas = decoder(output, batch_label) loss = cal_loss(predictions, batch_label, alphas, 1) decoder_optimizer.zero_grad() encoder_optimizer.zero_grad() loss.backward() decoder_optimizer.step() encoder_optimizer.step() print('Iter', train_iter, '| loss:', loss.cpu().data.numpy(), '| batch size:', cfg.batch_size, '| encoder learning rate:', cfg.encoder_learning_rate, '| decoder learning rate:', cfg.decoder_learning_rate) losses.append(loss.cpu().data.numpy()) if train_iter % cfg.save_model_iter == 0: val_bleu.append(val_eval(encoder, decoder, dataloader)) torch.save( encoder.state_dict(), './models/train/encoder_' + cfg.pre_train_model + '_' + str(train_iter) + '.pkl') torch.save(decoder.state_dict(), './models/train/decoder_' + str(train_iter) + '.pkl') np.save('./result/train_bleu4.npy', val_bleu) np.save('./result/losses.npy', losses) if train_iter == cfg.train_iter: break train_iter += 1
class Model: def __init__(self, chpt_enc_path, chpt_dec_path, chpt_stat_path): historyLength = 10 encoder_dim = hiddenDimension lstm_input_dim = historyLength + 1 decoder_dim = hiddenDimension attention_dim = hiddenDimension output_dim = 1 self.decodeLength = 20 self.encoder = Encoder() self.decoder = DecoderWithAttention(encoder_dim, lstm_input_dim, decoder_dim, attention_dim, output_dim) self.encoder.load_state_dict(torch.load(chpt_enc_path)) self.decoder.load_state_dict(torch.load(chpt_dec_path)) self.encoder = self.encoder.to(device) self.decoder = self.decoder.to(device) self.encoder.eval() self.decoder.eval() with open(chpt_stat_path, 'rb') as f: chpt_stat = pickle.load(f) self.cMean = chpt_stat['cMean_tr'] self.cStd = chpt_stat['cStd_tr'] self.vMean = chpt_stat['vMean_tr'] self.vStd = chpt_stat['vStd_tr'] self.aMean = chpt_stat['aMean_tr'] self.aStd = chpt_stat['aStd_tr'] self.mean = torch.Tensor([self.vMean, self.aMean]).to(device) self.std = torch.Tensor([self.vStd, self.aStd]).to(device) def predict(self, curvatures, currentSpeed, histSpeeds, currentAccelX, histAccelXs): curvatures = torch.FloatTensor(curvatures).to(device) currentSpeed = torch.FloatTensor([currentSpeed]).to(device) histSpeeds = torch.FloatTensor(histSpeeds).to(device) currentAccelX = torch.FloatTensor([currentAccelX]).to(device) histAccelXs = torch.FloatTensor(histAccelXs).to(device) curvatures = (curvatures - self.cMean) / self.cStd currentSpeed = (currentSpeed - self.vMean) / self.vStd histSpeeds = (histSpeeds - self.vMean) / self.vStd currentAccelX = (currentAccelX - self.aMean) / self.aStd histAccelXs = (histAccelXs - self.aMean) / self.aStd curvatures = self.encoder(curvatures.unsqueeze(dim=0).unsqueeze(dim=0)) predictions, alphas, alphas_target = self.decoder(curvatures, currentSpeed, histSpeeds.unsqueeze(dim=0), currentAccelX, histAccelXs.unsqueeze(dim=0), self.decodeLength, self.vMean, self.vStd, self.aMean, self.aStd) return (predictions.squeeze()*self.aStd + self.aMean).cpu().detach().numpy(), alphas.squeeze().cpu().detach().numpy()
def main(data_name): dataset = MyDataSet(data_name=data_name, reset=False) vocab_size = dataset.vocab_size corpus = dataset.corpus id2word = {v: k for k, v in corpus.items()} train_loader, val_loader = _get_data_loader(dataset, 0.5, batch_size) embedding, embed_dim = load_embedding(basic_settings['word2vec'], corpus) encoder = Encoder(dataset.feature_dim, output_dim=100) decoder = DecoderWithAttention(encoder.get_output_dim(), decoder_dim=100, attn_dim=100, embed_dim=embed_dim, vocab_size=vocab_size) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=lr) encoder = encoder.to(device) decoder = decoder.to(device) criterion = torch.nn.CrossEntropyLoss().to(device) best_bleu4 = 0 best_hypos = [] best_refs = [] for epoch in range(1, epoches + 1): # One epoch's training train_epoch(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, optimizer=decoder_optimizer, epoch=epoch) # One epoch's validation bleu4_score, refs, hypos = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion, word2id=corpus) if bleu4_score > best_bleu4: best_bleu4 = bleu4_score best_refs = refs best_hypos = hypos name = data_name + '_' + str(best_bleu4) + '.xlsx' save_result(name, best_refs, best_hypos, id2word)
def __init__(self, chpt_enc_path, chpt_dec_path, chpt_stat_path): historyLength = 10 encoder_dim = hiddenDimension lstm_input_dim = 1*(historyLength + 1) decoder_dim = hiddenDimension attention_dim = hiddenDimension output_dim = 2 self.decodeLength = 20 self.encoder = Encoder() self.decoder = DecoderWithAttention(encoder_dim, lstm_input_dim, decoder_dim, attention_dim, output_dim) self.encoder.load_state_dict(torch.load(chpt_enc_path)) self.decoder.load_state_dict(torch.load(chpt_dec_path)) self.encoder = self.encoder.to(device) self.decoder = self.decoder.to(device) self.encoder.eval() self.decoder.eval() with open(chpt_stat_path, 'rb') as f: chpt_stat = pickle.load(f) self.cMean = chpt_stat['cMean_tr'] self.cStd = chpt_stat['cStd_tr'] self.vMean = chpt_stat['vMean_tr'] self.vStd = chpt_stat['vStd_tr'] self.aMean = chpt_stat['aMean_tr'] self.aStd = chpt_stat['aStd_tr'] self.lMean = chpt_stat['lMean_tr'] self.lStd = chpt_stat['lStd_tr'] self.dlMean = chpt_stat['dlMean_tr'] self.dlStd = chpt_stat['dlStd_tr']
def predict(image_name, model_path=None): print(len(data.dictionary)) encoder = Encoder() decoder = DecoderWithAttention(len(data.dictionary)) if cuda: encoder = encoder.cuda() decoder = decoder.cuda() if model_path: print('Loading the parameters of model.') if cuda: encoder.load_state_dict(torch.load(model_path[0])) decoder.load_state_dict(torch.load(model_path[1])) else: encoder.load_state_dict( torch.load(model_path[0], map_location='cpu')) decoder.load_state_dict( torch.load(model_path[1], map_location='cpu')) encoder.eval() decoder.eval() image = cv2.imread(image_name) image = cv2.resize(image, (224, 224)) image = image.astype(np.float32) / 255.0 image = image.transpose([2, 0, 1]) image = np.expand_dims(image, axis=0) image = torch.from_numpy(image).type(torch.FloatTensor) if cuda: image = image.cuda() output = encoder(image) # print('encoder output:', output.size()) sentences, alphas = beam_search(data, decoder, output) # print(sentences) show(image_name, sentences[0], alphas[0]) for sentence in sentences: prediction = [] for word in sentence: prediction.append(data.dictionary[word]) if word == 2: break # print(prediction) prediction = ' '.join([word for word in prediction]) print('The prediction sentence:', prediction)
def main(): global epochs_since_improvement, best_loss_tr encoder = Encoder() decoder = DecoderWithAttention(encoder_dim, lstm_input_dim, decoder_dim, attention_dim, output_dim) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = encoder.to(device) decoder = decoder.to(device) trainLoader = torch.utils.data.DataLoader(Dataset(driver, circuit_tr, curvatureLength, historyLength, predLength), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) cMean_tr = trainLoader.dataset.cMean cStd_tr = trainLoader.dataset.cStd vMean_tr = trainLoader.dataset.vMean vStd_tr = trainLoader.dataset.vStd aMean_tr = trainLoader.dataset.aMean aStd_tr = trainLoader.dataset.aStd validLoader = torch.utils.data.DataLoader(Dataset(driver, circuit_vl, curvatureLength, historyLength, predLength, cMean=cMean_tr, cStd=cStd_tr, vMean=vMean_tr, vStd=vStd_tr, aMean=aMean_tr, aStd=aStd_tr), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) print('Training version.{} (A->V)'.format(vNumber)) print('Training data ({} - {})'.format(driver, circuit_tr)) print('Validation data ({} - {})'.format(driver, circuit_vl)) print('curvature len {}'.format(curvatureLength)) print('history len {}'.format(historyLength)) print('pred len {}'.format(predLength)) print('hiddenDimension {}'.format(hiddenDimension)) print('\nTraining...\n') for epoch in tqdm(range(start_epoch, epochs)): loss, vMape, vRmse, vCorr, aCorr = train( trainLoader=trainLoader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch) writer.add_scalars('Loss', {'tr': loss}, epoch) writer.add_scalars('MAPE', {'tr': vMape}, epoch) writer.add_scalars('RMSE', {'tr': vRmse}, epoch) writer.add_scalars('vCorr', {'tr': vCorr}, epoch) writer.add_scalars('aCorr', {'tr': aCorr}, epoch) is_best = loss < best_loss_tr best_loss_tr = min(loss, best_loss_tr) if not is_best: epochs_since_improvement += 1 print( '\nEpoch {} Epoch Epochs since last improvement (unit: 100): {}\n' .format(epoch, epochs_since_improvement)) else: epochs_since_improvement = 0 if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(epoch, encoder_optimizer, 0.8) adjust_learning_rate(epoch, decoder_optimizer, 0.8) if epoch % 5 == 0: loss_vl, vMape_vl, vRmse_vl, vCorr_vl, aCorr_vl = validate( validLoader=validLoader, encoder=encoder, decoder=decoder, criterion=criterion) writer.add_scalars('Loss', {'vl': loss_vl}, epoch) writer.add_scalars('MAPE', {'vl': vMape_vl}, epoch) writer.add_scalars('RMSE', {'vl': vRmse_vl}, epoch) writer.add_scalars('vCorr', {'vl': vCorr_vl}, epoch) writer.add_scalars('aCorr', {'vl': aCorr_vl}, epoch) if epoch % 10 == 0: save_checkpoint(chptFolderPath, encoder, decoder, epoch, cMean_tr, cStd_tr, vMean_tr, vStd_tr, aMean_tr, aStd_tr, curvatureLength, historyLength) writer.close()
#========================================================================================================= #========================================================================================================= #================================ 2. DEFINING ARCHITECTURE # Read word map print('\nLoading word map', end='...') word_map_file = os.path.join(DATA_FOLDER, 'WORDMAP_' + base_filename + '.json') with open(word_map_file, 'r') as j: word_map = json.load(j) vocab_size = len(word_map) print('done') # Networks print('Loading networks', end='...') decoder = DecoderWithAttention(ATTENTION_DIM, EMBBEDING_DIM, DECODER_DIM, vocab_size, ENCODER_DIM, DROPOUT) encoder = Encoder(output_size=12) print('done') if START_EPOCH != 0: print('Loading last model', end='...') decoder.load_state_dict( torch.load('../models/image_captioning_{}.model'.format(START_EPOCH))) print('done') # Embedding if EMBBEDING_DIM == 200: print('Loading embeddings', end='...') embedding, _ = load_embeddings(embedding_file, DATA_FOLDER) decoder.load_pretrained_embeddings(embedding, fine_tune=True) print('done')
plt.title(word) plt.xticks(()) plt.yticks(()) plt.show() if __name__ == '__main__': # predict('./data/RSICD/RSICD_images/00110.jpg', ['./models/train/encoder_mobilenet_60000.pkl', './models/train/decoder_60000.pkl']) # predict('./data/RSICD/test/00029.jpg', ['./models/train/encoder_resnet_50000.pkl', './models/train/decoder_50000.pkl']) model_path = [ './models/train/encoder_mobilenet_60000.pkl', './models/train/decoder_60000.pkl' ] encoder = Encoder() decoder = DecoderWithAttention(len(data.dictionary)) if cuda: encoder = encoder.cuda() decoder = decoder.cuda() if model_path: print('Loading the parameters of model.') if cuda: encoder.load_state_dict(torch.load(model_path[0])) decoder.load_state_dict(torch.load(model_path[1])) else: encoder.load_state_dict( torch.load(model_path[0], map_location='cpu')) decoder.load_state_dict( torch.load(model_path[1], map_location='cpu')) encoder.eval() decoder.eval()
def main(args): """ Training and validation. """ global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map with open(args.vocab_path, 'rb') as f: word_map = pickle.load(f) # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder() encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] best_bleu4 = checkpoint['bleu-4'] decoder = checkpoint['decoder'] decoder_optimizer = checkpoint['decoder_optimizer'] encoder = checkpoint['encoder'] encoder_optimizer = checkpoint['encoder_optimizer'] if fine_tune_encoder is True and encoder_optimizer is None: encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() criterion = nn.CrossEntropyLoss() normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) train_loader = get_loader(args.train_image_dir, args.caption_path, word_map, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) val_loader = get_loader(args.val_image_dir, args.caption_path, word_map, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) for epoch in range(start_epoch, epochs): if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(decoder_optimizer, 0.8) if fine_tune_encoder: adjust_learning_rate(encoder_optimizer, 0.8) train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch) recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion) is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 save_checkpoint(data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best)
from model import Encoder, DecoderWithAttention import tensorflow as tf if __name__ == '__main__': config = { 'batch_size': 64, 'embedding_size': 300, 'vocab_size': 10000, 'hidden_units': 100, 'max_length': 25, 'attention_units': 200, } encoder = Encoder(config) decoder = DecoderWithAttention(config) print(encoder) encoder_inputs = tf.random_normal( shape=[config['batch_size'], config['max_length']]) print('encoder_inputs', encoder_inputs) encoder_outputs, state = encoder(encoder_inputs) print(encoder_outputs, state) decoder_inputs = tf.random_normal(shape=[config['batch_size'], 1]) outputs, state = decoder(decoder_inputs, state, encoder_outputs) print(outputs, state)