コード例 #1
0
def train(model_path=None):
    dataloader = DataLoader(Augmentation())
    encoder = Encoder()
    dict_len = len(dataloader.data.dictionary)
    decoder = DecoderWithAttention(dict_len)

    if cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
    # if model_path:
    #   text_generator.load_state_dict(torch.load(model_path))
    train_iter = 1
    encoder_optimizer = torch.optim.Adam(encoder.parameters(),
                                         lr=cfg.encoder_learning_rate)
    decoder_optimizer = torch.optim.Adam(decoder.parameters(),
                                         lr=cfg.decoder_learning_rate)

    val_bleu = list()
    losses = list()
    while True:
        batch_image, batch_label = dataloader.get_next_batch()
        batch_image = torch.from_numpy(batch_image).type(torch.FloatTensor)
        batch_label = torch.from_numpy(batch_label).type(torch.LongTensor)
        if cuda:
            batch_image = batch_image.cuda()
            batch_label = batch_label.cuda()
        # print(batch_image.size())
        # print(batch_label.size())

        print('Training')
        output = encoder(batch_image)
        # print('encoder output:', output.size())
        predictions, alphas = decoder(output, batch_label)

        loss = cal_loss(predictions, batch_label, alphas, 1)

        decoder_optimizer.zero_grad()
        encoder_optimizer.zero_grad()
        loss.backward()
        decoder_optimizer.step()
        encoder_optimizer.step()

        print('Iter', train_iter, '| loss:',
              loss.cpu().data.numpy(), '| batch size:', cfg.batch_size,
              '| encoder learning rate:', cfg.encoder_learning_rate,
              '| decoder learning rate:', cfg.decoder_learning_rate)
        losses.append(loss.cpu().data.numpy())
        if train_iter % cfg.save_model_iter == 0:
            val_bleu.append(val_eval(encoder, decoder, dataloader))
            torch.save(
                encoder.state_dict(), './models/train/encoder_' +
                cfg.pre_train_model + '_' + str(train_iter) + '.pkl')
            torch.save(decoder.state_dict(),
                       './models/train/decoder_' + str(train_iter) + '.pkl')
            np.save('./result/train_bleu4.npy', val_bleu)
            np.save('./result/losses.npy', losses)

        if train_iter == cfg.train_iter:
            break
        train_iter += 1
コード例 #2
0
class Model:
    def __init__(self, chpt_enc_path, chpt_dec_path, chpt_stat_path):
        historyLength = 10

        encoder_dim = hiddenDimension
        lstm_input_dim = historyLength + 1
        decoder_dim = hiddenDimension
        attention_dim = hiddenDimension
        output_dim = 1

        self.decodeLength = 20

        self.encoder = Encoder()
        self.decoder = DecoderWithAttention(encoder_dim, lstm_input_dim, decoder_dim, attention_dim, output_dim)

        self.encoder.load_state_dict(torch.load(chpt_enc_path))
        self.decoder.load_state_dict(torch.load(chpt_dec_path))

        self.encoder = self.encoder.to(device)
        self.decoder = self.decoder.to(device)

        self.encoder.eval()
        self.decoder.eval()

        with open(chpt_stat_path, 'rb') as f:
            chpt_stat = pickle.load(f)

        self.cMean = chpt_stat['cMean_tr']
        self.cStd = chpt_stat['cStd_tr']

        self.vMean = chpt_stat['vMean_tr']
        self.vStd = chpt_stat['vStd_tr']

        self.aMean = chpt_stat['aMean_tr']
        self.aStd = chpt_stat['aStd_tr']

        self.mean = torch.Tensor([self.vMean, self.aMean]).to(device)
        self.std = torch.Tensor([self.vStd, self.aStd]).to(device)

    def predict(self, curvatures, currentSpeed, histSpeeds, currentAccelX, histAccelXs):
        curvatures = torch.FloatTensor(curvatures).to(device)

        currentSpeed = torch.FloatTensor([currentSpeed]).to(device)
        histSpeeds = torch.FloatTensor(histSpeeds).to(device)

        currentAccelX = torch.FloatTensor([currentAccelX]).to(device)
        histAccelXs = torch.FloatTensor(histAccelXs).to(device)

        curvatures = (curvatures - self.cMean) / self.cStd
        currentSpeed = (currentSpeed - self.vMean) / self.vStd
        histSpeeds = (histSpeeds - self.vMean) / self.vStd
        currentAccelX = (currentAccelX - self.aMean) / self.aStd
        histAccelXs = (histAccelXs - self.aMean) / self.aStd

        curvatures = self.encoder(curvatures.unsqueeze(dim=0).unsqueeze(dim=0))
        predictions, alphas, alphas_target = self.decoder(curvatures, currentSpeed, histSpeeds.unsqueeze(dim=0), currentAccelX, histAccelXs.unsqueeze(dim=0),
                                    self.decodeLength, self.vMean, self.vStd, self.aMean, self.aStd)

        return (predictions.squeeze()*self.aStd + self.aMean).cpu().detach().numpy(), alphas.squeeze().cpu().detach().numpy()
コード例 #3
0
def main(data_name):
    dataset = MyDataSet(data_name=data_name, reset=False)
    vocab_size = dataset.vocab_size
    corpus = dataset.corpus
    id2word = {v: k for k, v in corpus.items()}
    train_loader, val_loader = _get_data_loader(dataset, 0.5, batch_size)

    embedding, embed_dim = load_embedding(basic_settings['word2vec'], corpus)

    encoder = Encoder(dataset.feature_dim, output_dim=100)
    decoder = DecoderWithAttention(encoder.get_output_dim(),
                                   decoder_dim=100,
                                   attn_dim=100,
                                   embed_dim=embed_dim,
                                   vocab_size=vocab_size)
    decoder_optimizer = torch.optim.Adam(params=filter(
        lambda p: p.requires_grad, decoder.parameters()),
                                         lr=lr)
    encoder = encoder.to(device)
    decoder = decoder.to(device)
    criterion = torch.nn.CrossEntropyLoss().to(device)

    best_bleu4 = 0
    best_hypos = []
    best_refs = []

    for epoch in range(1, epoches + 1):
        # One epoch's training
        train_epoch(train_loader=train_loader,
                    encoder=encoder,
                    decoder=decoder,
                    criterion=criterion,
                    optimizer=decoder_optimizer,
                    epoch=epoch)

        # One epoch's validation
        bleu4_score, refs, hypos = validate(val_loader=val_loader,
                                            encoder=encoder,
                                            decoder=decoder,
                                            criterion=criterion,
                                            word2id=corpus)
        if bleu4_score > best_bleu4:
            best_bleu4 = bleu4_score
            best_refs = refs
            best_hypos = hypos
    name = data_name + '_' + str(best_bleu4) + '.xlsx'
    save_result(name, best_refs, best_hypos, id2word)
コード例 #4
0
    def __init__(self, chpt_enc_path, chpt_dec_path, chpt_stat_path):
        historyLength = 10

        encoder_dim = hiddenDimension
        lstm_input_dim = 1*(historyLength + 1)
        decoder_dim = hiddenDimension
        attention_dim = hiddenDimension
        output_dim = 2

        self.decodeLength = 20

        self.encoder = Encoder()
        self.decoder = DecoderWithAttention(encoder_dim, lstm_input_dim, decoder_dim, attention_dim, output_dim)

        self.encoder.load_state_dict(torch.load(chpt_enc_path))
        self.decoder.load_state_dict(torch.load(chpt_dec_path))

        self.encoder = self.encoder.to(device)
        self.decoder = self.decoder.to(device)

        self.encoder.eval()
        self.decoder.eval()

        with open(chpt_stat_path, 'rb') as f:
            chpt_stat = pickle.load(f)

        self.cMean = chpt_stat['cMean_tr']
        self.cStd = chpt_stat['cStd_tr']

        self.vMean = chpt_stat['vMean_tr']
        self.vStd = chpt_stat['vStd_tr']

        self.aMean = chpt_stat['aMean_tr']
        self.aStd = chpt_stat['aStd_tr']

        self.lMean = chpt_stat['lMean_tr']
        self.lStd = chpt_stat['lStd_tr']

        self.dlMean = chpt_stat['dlMean_tr']
        self.dlStd = chpt_stat['dlStd_tr']
コード例 #5
0
def predict(image_name, model_path=None):
    print(len(data.dictionary))
    encoder = Encoder()
    decoder = DecoderWithAttention(len(data.dictionary))
    if cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
    if model_path:
        print('Loading the parameters of model.')
        if cuda:
            encoder.load_state_dict(torch.load(model_path[0]))
            decoder.load_state_dict(torch.load(model_path[1]))
        else:
            encoder.load_state_dict(
                torch.load(model_path[0], map_location='cpu'))
            decoder.load_state_dict(
                torch.load(model_path[1], map_location='cpu'))
    encoder.eval()
    decoder.eval()

    image = cv2.imread(image_name)
    image = cv2.resize(image, (224, 224))
    image = image.astype(np.float32) / 255.0
    image = image.transpose([2, 0, 1])
    image = np.expand_dims(image, axis=0)
    image = torch.from_numpy(image).type(torch.FloatTensor)
    if cuda:
        image = image.cuda()

    output = encoder(image)
    # print('encoder output:', output.size())
    sentences, alphas = beam_search(data, decoder, output)
    # print(sentences)
    show(image_name, sentences[0], alphas[0])

    for sentence in sentences:
        prediction = []
        for word in sentence:
            prediction.append(data.dictionary[word])
            if word == 2:
                break
        # print(prediction)
        prediction = ' '.join([word for word in prediction])
        print('The prediction sentence:', prediction)
コード例 #6
0
ファイル: train.py プロジェクト: 3secondz-lab/asurada
def main():
    global epochs_since_improvement, best_loss_tr

    encoder = Encoder()
    decoder = DecoderWithAttention(encoder_dim, lstm_input_dim, decoder_dim,
                                   attention_dim, output_dim)

    encoder_optimizer = torch.optim.Adam(params=filter(
        lambda p: p.requires_grad, encoder.parameters()),
                                         lr=encoder_lr)
    decoder_optimizer = torch.optim.Adam(params=filter(
        lambda p: p.requires_grad, decoder.parameters()),
                                         lr=decoder_lr)
    encoder = encoder.to(device)
    decoder = decoder.to(device)

    trainLoader = torch.utils.data.DataLoader(Dataset(driver, circuit_tr,
                                                      curvatureLength,
                                                      historyLength,
                                                      predLength),
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=workers,
                                              pin_memory=True)

    cMean_tr = trainLoader.dataset.cMean
    cStd_tr = trainLoader.dataset.cStd
    vMean_tr = trainLoader.dataset.vMean
    vStd_tr = trainLoader.dataset.vStd
    aMean_tr = trainLoader.dataset.aMean
    aStd_tr = trainLoader.dataset.aStd

    validLoader = torch.utils.data.DataLoader(Dataset(driver,
                                                      circuit_vl,
                                                      curvatureLength,
                                                      historyLength,
                                                      predLength,
                                                      cMean=cMean_tr,
                                                      cStd=cStd_tr,
                                                      vMean=vMean_tr,
                                                      vStd=vStd_tr,
                                                      aMean=aMean_tr,
                                                      aStd=aStd_tr),
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=workers,
                                              pin_memory=True)

    print('Training version.{} (A->V)'.format(vNumber))
    print('Training data ({} - {})'.format(driver, circuit_tr))
    print('Validation data ({} - {})'.format(driver, circuit_vl))
    print('curvature len {}'.format(curvatureLength))
    print('history len {}'.format(historyLength))
    print('pred len {}'.format(predLength))
    print('hiddenDimension {}'.format(hiddenDimension))

    print('\nTraining...\n')

    for epoch in tqdm(range(start_epoch, epochs)):

        loss, vMape, vRmse, vCorr, aCorr = train(
            trainLoader=trainLoader,
            encoder=encoder,
            decoder=decoder,
            criterion=criterion,
            encoder_optimizer=encoder_optimizer,
            decoder_optimizer=decoder_optimizer,
            epoch=epoch)

        writer.add_scalars('Loss', {'tr': loss}, epoch)
        writer.add_scalars('MAPE', {'tr': vMape}, epoch)
        writer.add_scalars('RMSE', {'tr': vRmse}, epoch)
        writer.add_scalars('vCorr', {'tr': vCorr}, epoch)
        writer.add_scalars('aCorr', {'tr': aCorr}, epoch)

        is_best = loss < best_loss_tr
        best_loss_tr = min(loss, best_loss_tr)
        if not is_best:
            epochs_since_improvement += 1
            print(
                '\nEpoch {} Epoch Epochs since last improvement (unit: 100): {}\n'
                .format(epoch, epochs_since_improvement))
        else:
            epochs_since_improvement = 0

        if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0:
            adjust_learning_rate(epoch, encoder_optimizer, 0.8)
            adjust_learning_rate(epoch, decoder_optimizer, 0.8)

        if epoch % 5 == 0:
            loss_vl, vMape_vl, vRmse_vl, vCorr_vl, aCorr_vl = validate(
                validLoader=validLoader,
                encoder=encoder,
                decoder=decoder,
                criterion=criterion)
            writer.add_scalars('Loss', {'vl': loss_vl}, epoch)
            writer.add_scalars('MAPE', {'vl': vMape_vl}, epoch)
            writer.add_scalars('RMSE', {'vl': vRmse_vl}, epoch)
            writer.add_scalars('vCorr', {'vl': vCorr_vl}, epoch)
            writer.add_scalars('aCorr', {'vl': aCorr_vl}, epoch)

        if epoch % 10 == 0:
            save_checkpoint(chptFolderPath, encoder, decoder, epoch, cMean_tr,
                            cStd_tr, vMean_tr, vStd_tr, aMean_tr, aStd_tr,
                            curvatureLength, historyLength)
    writer.close()
コード例 #7
0
#=========================================================================================================
#=========================================================================================================
#================================ 2. DEFINING ARCHITECTURE

# Read word map
print('\nLoading word map', end='...')
word_map_file = os.path.join(DATA_FOLDER, 'WORDMAP_' + base_filename + '.json')
with open(word_map_file, 'r') as j:
    word_map = json.load(j)
vocab_size = len(word_map)
print('done')

# Networks
print('Loading networks', end='...')
decoder = DecoderWithAttention(ATTENTION_DIM, EMBBEDING_DIM, DECODER_DIM,
                               vocab_size, ENCODER_DIM, DROPOUT)
encoder = Encoder(output_size=12)
print('done')

if START_EPOCH != 0:
    print('Loading last model', end='...')
    decoder.load_state_dict(
        torch.load('../models/image_captioning_{}.model'.format(START_EPOCH)))
    print('done')

# Embedding
if EMBBEDING_DIM == 200:
    print('Loading embeddings', end='...')
    embedding, _ = load_embeddings(embedding_file, DATA_FOLDER)
    decoder.load_pretrained_embeddings(embedding, fine_tune=True)
    print('done')
コード例 #8
0
        plt.title(word)
        plt.xticks(())
        plt.yticks(())
    plt.show()


if __name__ == '__main__':
    # predict('./data/RSICD/RSICD_images/00110.jpg', ['./models/train/encoder_mobilenet_60000.pkl', './models/train/decoder_60000.pkl'])
    # predict('./data/RSICD/test/00029.jpg', ['./models/train/encoder_resnet_50000.pkl', './models/train/decoder_50000.pkl'])

    model_path = [
        './models/train/encoder_mobilenet_60000.pkl',
        './models/train/decoder_60000.pkl'
    ]
    encoder = Encoder()
    decoder = DecoderWithAttention(len(data.dictionary))
    if cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
    if model_path:
        print('Loading the parameters of model.')
        if cuda:
            encoder.load_state_dict(torch.load(model_path[0]))
            decoder.load_state_dict(torch.load(model_path[1]))
        else:
            encoder.load_state_dict(
                torch.load(model_path[0], map_location='cpu'))
            decoder.load_state_dict(
                torch.load(model_path[1], map_location='cpu'))
    encoder.eval()
    decoder.eval()
コード例 #9
0
ファイル: train.py プロジェクト: vichu259/image_captioing
def main(args):
    """
    Training and validation.
    """

    global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map

    with open(args.vocab_path, 'rb') as f:
        word_map = pickle.load(f)

    # Initialize / load checkpoint
    if checkpoint is None:
        decoder = DecoderWithAttention(attention_dim=attention_dim,
                                       embed_dim=emb_dim,
                                       decoder_dim=decoder_dim,
                                       vocab_size=len(word_map),
                                       dropout=dropout)
        decoder_optimizer = torch.optim.Adam(params=filter(
            lambda p: p.requires_grad, decoder.parameters()),
                                             lr=decoder_lr)
        encoder = Encoder()
        encoder.fine_tune(fine_tune_encoder)
        encoder_optimizer = torch.optim.Adam(
            params=filter(lambda p: p.requires_grad, encoder.parameters()),
            lr=encoder_lr) if fine_tune_encoder else None

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        best_bleu4 = checkpoint['bleu-4']
        decoder = checkpoint['decoder']
        decoder_optimizer = checkpoint['decoder_optimizer']
        encoder = checkpoint['encoder']
        encoder_optimizer = checkpoint['encoder_optimizer']
        if fine_tune_encoder is True and encoder_optimizer is None:
            encoder.fine_tune(fine_tune_encoder)
            encoder_optimizer = torch.optim.Adam(params=filter(
                lambda p: p.requires_grad, encoder.parameters()),
                                                 lr=encoder_lr)

    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()

    criterion = nn.CrossEntropyLoss()
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])
    train_loader = get_loader(args.train_image_dir,
                              args.caption_path,
                              word_map,
                              transform,
                              args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)
    val_loader = get_loader(args.val_image_dir,
                            args.caption_path,
                            word_map,
                            transform,
                            args.batch_size,
                            shuffle=True,
                            num_workers=args.num_workers)

    for epoch in range(start_epoch, epochs):
        if epochs_since_improvement == 20:
            break
        if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0:
            adjust_learning_rate(decoder_optimizer, 0.8)
            if fine_tune_encoder:
                adjust_learning_rate(encoder_optimizer, 0.8)

        train(train_loader=train_loader,
              encoder=encoder,
              decoder=decoder,
              criterion=criterion,
              encoder_optimizer=encoder_optimizer,
              decoder_optimizer=decoder_optimizer,
              epoch=epoch)

        recent_bleu4 = validate(val_loader=val_loader,
                                encoder=encoder,
                                decoder=decoder,
                                criterion=criterion)

        is_best = recent_bleu4 > best_bleu4
        best_bleu4 = max(recent_bleu4, best_bleu4)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0

        save_checkpoint(data_name, epoch, epochs_since_improvement, encoder,
                        decoder, encoder_optimizer, decoder_optimizer,
                        recent_bleu4, is_best)
コード例 #10
0
from model import Encoder, DecoderWithAttention
import tensorflow as tf

if __name__ == '__main__':
    config = {
        'batch_size': 64,
        'embedding_size': 300,
        'vocab_size': 10000,
        'hidden_units': 100,
        'max_length': 25,
        'attention_units': 200,
    }
    encoder = Encoder(config)
    decoder = DecoderWithAttention(config)
    print(encoder)
    encoder_inputs = tf.random_normal(
        shape=[config['batch_size'], config['max_length']])
    print('encoder_inputs', encoder_inputs)
    encoder_outputs, state = encoder(encoder_inputs)
    print(encoder_outputs, state)
    decoder_inputs = tf.random_normal(shape=[config['batch_size'], 1])
    outputs, state = decoder(decoder_inputs, state, encoder_outputs)
    print(outputs, state)