Exemple #1
0
def train(train_loader, val_loader, epochnum, save_path='.', save_freq=None):
    iter_size = len(train_loader)
    net = Encoder()
    net.cuda()
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = optim.SGD(net.parameters(),
                          lr=0.01,
                          momentum=0.9,
                          weight_decay=2e-4)

    for epoch in range(epochnum):
        print('epoch : {}'.format(epoch))
        net.train()
        train_loss = 0
        train_correct = 0
        total = 0
        net.training = True
        for i, data in enumerate(train_loader):
            sys.stdout.write('iter : {} / {}\r'.format(i, iter_size))
            sys.stdout.flush()
            #print('iter: {} / {}'.format(i, iter_size))
            inputs, labels = data
            inputs, labels = Variable(inputs.cuda()), labels.cuda()
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, Variable(labels))
            loss.backward()
            optimizer.step()
            train_loss += loss.data[0]
            pred = (torch.max(outputs.data, 1)[1])
            train_correct += (pred == labels).sum()
            total += labels.size(0)
        sys.stdout.write(' ' * 20 + '\r')
        sys.stdout.flush()

        print('train_loss:{}, train_acc:{:.2%}'.format(train_loss / total,
                                                       train_correct / total))
        val_loss = 0
        val_correct = 0
        total = 0
        net.training = False
        for data in val_loader:
            net.eval()
            inputs, labels = data
            inputs, labels = Variable(inputs).cuda(), labels.cuda()
            outputs = net(inputs)
            pred = torch.max(outputs.data, 1)[1]
            total += labels.size(0)
            loss = criterion(outputs, Variable(labels))
            val_loss += loss.data[0]
            val_correct += (pred == labels).sum()

        print('val_loss:{}, val_acc:{:.2%}'.format(val_loss / total,
                                                   val_correct / total))
        optimizer.param_groups[0]['lr'] *= np.exp(-0.4)
        if save_freq and epoch % save_freq == save_freq - 1:
            net_name = os.path.join(save_path, 'epoch_{}'.format(epoch))
            torch.save(net, net_name)
    torch.save(net, os.path.join(save_path, 'trained_net'))
def train(model_path=None):
    dataloader = DataLoader(Augmentation())
    encoder = Encoder()
    dict_len = len(dataloader.data.dictionary)
    decoder = DecoderWithAttention(dict_len)

    if cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
    # if model_path:
    #   text_generator.load_state_dict(torch.load(model_path))
    train_iter = 1
    encoder_optimizer = torch.optim.Adam(encoder.parameters(),
                                         lr=cfg.encoder_learning_rate)
    decoder_optimizer = torch.optim.Adam(decoder.parameters(),
                                         lr=cfg.decoder_learning_rate)

    val_bleu = list()
    losses = list()
    while True:
        batch_image, batch_label = dataloader.get_next_batch()
        batch_image = torch.from_numpy(batch_image).type(torch.FloatTensor)
        batch_label = torch.from_numpy(batch_label).type(torch.LongTensor)
        if cuda:
            batch_image = batch_image.cuda()
            batch_label = batch_label.cuda()
        # print(batch_image.size())
        # print(batch_label.size())

        print('Training')
        output = encoder(batch_image)
        # print('encoder output:', output.size())
        predictions, alphas = decoder(output, batch_label)

        loss = cal_loss(predictions, batch_label, alphas, 1)

        decoder_optimizer.zero_grad()
        encoder_optimizer.zero_grad()
        loss.backward()
        decoder_optimizer.step()
        encoder_optimizer.step()

        print('Iter', train_iter, '| loss:',
              loss.cpu().data.numpy(), '| batch size:', cfg.batch_size,
              '| encoder learning rate:', cfg.encoder_learning_rate,
              '| decoder learning rate:', cfg.decoder_learning_rate)
        losses.append(loss.cpu().data.numpy())
        if train_iter % cfg.save_model_iter == 0:
            val_bleu.append(val_eval(encoder, decoder, dataloader))
            torch.save(
                encoder.state_dict(), './models/train/encoder_' +
                cfg.pre_train_model + '_' + str(train_iter) + '.pkl')
            torch.save(decoder.state_dict(),
                       './models/train/decoder_' + str(train_iter) + '.pkl')
            np.save('./result/train_bleu4.npy', val_bleu)
            np.save('./result/losses.npy', losses)

        if train_iter == cfg.train_iter:
            break
        train_iter += 1
Exemple #3
0
def instantiate_model(config, tokenizer):
    configure_devices(config)
    model = Model(config)
    optimizer = transformers.AdamW(model.parameters(), lr=config.learning_rate, weight_decay=0)
    metrics = None

    if config.continue_training:
        state_dict = torch.load(config.continue_training, map_location='cpu')
        model.load_state_dict(state_dict['model'])
        if 'optimizer_state_dict' in state_dict:
            optimizer.load_state_dict(state_dict['optimizer_state_dict'])
            for g in optimizer.param_groups:
                g['lr'] = config.learning_rate
        
        try:
            print(f"Loaded model:\nEpochs: {state_dict['epoch']}\nLoss: {state_dict['loss']}\n", 
                  f"Recall: {state_dict['rec']}\nMRR: {state_dict['mrr']}")
        except:
            pass
        
    if config.use_cuda:
        model = model.cuda()
        optimizer_to(optimizer, config.device)
        model = torch.nn.DataParallel(model, device_ids=config.devices)
    return model, optimizer, metrics
Exemple #4
0
def display_network(opt):
    cuda = True if torch.cuda.is_available() else False

    # Dimensionality
    input_shape = (opt.channels, opt.img_height, opt.img_width)
    shared_dim = opt.dim * (2**opt.n_downsample)

    # Initialize generator and discriminator
    shared_E = ResidualBlock(in_channels=shared_dim)
    E1 = Encoder(dim=opt.dim,
                 n_downsample=opt.n_downsample,
                 shared_block=shared_E)
    E2 = Encoder(dim=opt.dim,
                 n_downsample=opt.n_downsample,
                 shared_block=shared_E)

    shared_G = ResidualBlock(in_channels=shared_dim)
    G1 = Generator(dim=opt.dim,
                   n_upsample=opt.n_upsample,
                   shared_block=shared_G)
    G2 = Generator(dim=opt.dim,
                   n_upsample=opt.n_upsample,
                   shared_block=shared_G)

    D1 = Discriminator(input_shape)
    D2 = Discriminator(input_shape)

    if cuda:
        E1 = E1.cuda()
        E2 = E2.cuda()
        G1 = G1.cuda()
        G2 = G2.cuda()
        D1 = D1.cuda()
        D2 = D2.cuda()

    summary(E1, (opt.channels, opt.img_height, opt.img_width))
    summary(E2, (opt.channels, opt.img_height, opt.img_width))
    summary(G1, (opt.img_height, opt.dim, opt.dim))
    summary(G2, (opt.img_height, opt.dim, opt.dim))
    summary(D1, (opt.channels, opt.img_height, opt.img_width))
    summary(D2, (opt.channels, opt.img_height, opt.img_width))
Exemple #5
0
class PretrainingTrainer:
    def __init__(self):
        self.preprocessor = None
        self.model = None
        self.optimizer = None

    def setup_preprocessed_data(self):
        self.preprocessor = Preprocess()
        self.preprocessor.setup()

    def setup_model(self):
        # Create multilingual vocabulary
        self.model = Encoder()

        if con.CUDA:
            self.model = self.model.cuda()

    def setup_scheduler_optimizer(self):
        lr_rate = 0.001
        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=lr_rate,
                                    weight_decay=0)

    def train_model(self):
        train_loader = self.preprocessor.train_loaders
        batch_size = 8

        self.model.train()
        train_loss = 0
        batch_correct = 0
        total_correct = 0
        index = 0
        for hrl_src, lrl_src, hrl_att, lrl_att in train_loader:
            logits = self.model(hrl_src)
            print(logits.shape)
            break
            # self.optimizer.zero_grad()
            # batch_loss.backward()
            # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5)
            # self.optimizer.step()
            # batch_correct += self.evaluate(masked_outputs=masked_outputs, masked_lm_ids=masked_lm_ids)
            # total_correct += (8 * 20)

    def run_pretraining(self):
        self.setup_preprocessed_data()
        self.setup_model()
        self.setup_scheduler_optimizer()
        self.train_model()
Exemple #6
0
def predict(image_name, model_path=None):
    print(len(data.dictionary))
    encoder = Encoder()
    decoder = DecoderWithAttention(len(data.dictionary))
    if cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
    if model_path:
        print('Loading the parameters of model.')
        if cuda:
            encoder.load_state_dict(torch.load(model_path[0]))
            decoder.load_state_dict(torch.load(model_path[1]))
        else:
            encoder.load_state_dict(
                torch.load(model_path[0], map_location='cpu'))
            decoder.load_state_dict(
                torch.load(model_path[1], map_location='cpu'))
    encoder.eval()
    decoder.eval()

    image = cv2.imread(image_name)
    image = cv2.resize(image, (224, 224))
    image = image.astype(np.float32) / 255.0
    image = image.transpose([2, 0, 1])
    image = np.expand_dims(image, axis=0)
    image = torch.from_numpy(image).type(torch.FloatTensor)
    if cuda:
        image = image.cuda()

    output = encoder(image)
    # print('encoder output:', output.size())
    sentences, alphas = beam_search(data, decoder, output)
    # print(sentences)
    show(image_name, sentences[0], alphas[0])

    for sentence in sentences:
        prediction = []
        for word in sentence:
            prediction.append(data.dictionary[word])
            if word == 2:
                break
        # print(prediction)
        prediction = ' '.join([word for word in prediction])
        print('The prediction sentence:', prediction)
Exemple #7
0
def instantiate_model(config, tokenizer):
    configure_devices(config)
    model = Model(config)
    optimizer = transformers.AdamW(model.parameters(),
                                   lr=config.learning_rate,
                                   weight_decay=0)
    last_epoch = 0
    epoch_avg_loss = 0
    if config.continue_training:
        state_dict = torch.load(config.continue_training, map_location='cpu')
        model.load_state_dict(state_dict['model'])
        if 'optimizer_state_dict' in state_dict:
            optimizer.load_state_dict(state_dict['optimizer_state_dict'])
        last_epoch = state_dict['epoch']
        # epoch_avg_loss = state_dict['loss']
        # del state_dict # TODO TEST
    if config.use_cuda:
        model = model.cuda()
        optimizer_to(optimizer, config.device)
        model = torch.nn.DataParallel(model, device_ids=config.devices)
    return model, optimizer, last_epoch, epoch_avg_loss
Exemple #8
0
def infer(opt):
    cuda = True if torch.cuda.is_available() else False
    FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

    # Dimensionality
    shared_dim = opt.dim * (2**opt.n_downsample)

    # Initialize generator and discriminator
    shared_E = ResidualBlock(in_channels=shared_dim)
    shared_G = ResidualBlock(in_channels=shared_dim)

    E1 = Encoder(dim=opt.dim,
                 n_downsample=opt.n_downsample,
                 shared_block=shared_E)
    G2 = Generator(dim=opt.dim,
                   n_upsample=opt.n_upsample,
                   shared_block=shared_G)

    shared_E.load_state_dict(
        torch.load(opt.load_model.replace('*', 'shared_E')))
    shared_G.load_state_dict(
        torch.load(opt.load_model.replace('*', 'shared_G')))
    E1.load_state_dict(torch.load(opt.load_model.replace('*', 'E1')))
    G2.load_state_dict(torch.load(opt.load_model.replace('*', 'G2')))

    if cuda:
        shared_E.cuda()
        shared_G.cuda()
        E1 = E1.cuda()
        G2 = G2.cuda()

    sample = load_img(opt)
    sample = Variable(sample.unsqueeze(0).type(FloatTensor))
    _, Z1 = E1(sample)
    fake_X2 = G2(Z1)

    sample = torch.cat((sample.data, fake_X2.data), -1)
    save_image(sample, "images/infer.png", nrow=1, normalize=True)
Exemple #9
0
def main(_):
    # Load the configuration file.
    with open(FLAGS.config, 'r') as f:
        config = yaml.load(f)

    # Create the checkpoint directory if it does not already exist.
    ckpt_dir = os.path.join(config['data']['ckpt'], config['experiment_name'])
    if not os.path.exists(ckpt_dir):
        os.mkdir(ckpt_dir)

    # Check if a pre-existing configuration file exists and matches the current
    # configuration. Otherwise save a copy of the configuration to the
    # checkpoint directory.
    prev_config_path = os.path.join(ckpt_dir, 'config.yaml')
    if os.path.exists(prev_config_path):
        with open(prev_config_path, 'r') as f:
            prev_config = yaml.load(f)
        assert config == prev_config
    else:
        shutil.copyfile(FLAGS.config, prev_config_path)

    # Load the vocabularies.
    src_vocab = Vocab.load(config['data']['src']['vocab'])
    tgt_vocab = Vocab.load(config['data']['tgt']['vocab'])

    # Load the training and dev datasets.
    train_data = ShakespeareDataset('train', config, src_vocab, tgt_vocab)
    dev_data = ShakespeareDataset('dev', config, src_vocab, tgt_vocab)

    # Build the model.
    src_vocab_size = len(src_vocab)
    tgt_vocab_size = len(tgt_vocab)
    encoder = Encoder(src_vocab_size, config['model']['embedding_dim'])
    decoder = Decoder(tgt_vocab_size, config['model']['embedding_dim'])
    if torch.cuda.is_available():
        encoder = encoder.cuda()
        decoder = decoder.cuda()

    # Define the loss function + optimizer.
    loss_weights = torch.ones(decoder.tgt_vocab_size)
    loss_weights[0] = 0
    if torch.cuda.is_available():
        loss_weights = loss_weights.cuda()
    criterion = torch.nn.NLLLoss(loss_weights)

    learning_rate = config['training']['learning_rate']
    encoder_optimizer = torch.optim.Adam(encoder.parameters(),
                                         lr=learning_rate)
    decoder_optimizer = torch.optim.Adam(decoder.parameters(),
                                         lr=learning_rate)

    # Restore saved model (if one exists).
    ckpt_path = os.path.join(ckpt_dir, 'model.pt')
    if os.path.exists(ckpt_path):
        print('Loading checkpoint: %s' % ckpt_path)
        ckpt = torch.load(ckpt_path)
        epoch = ckpt['epoch']
        encoder.load_state_dict(ckpt['encoder'])
        decoder.load_state_dict(ckpt['decoder'])
        encoder_optimizer.load_state_dict(ckpt['encoder_optimizer'])
        decoder_optimizer.load_state_dict(ckpt['decoder_optimizer'])
    else:
        epoch = 0

    train_log_string = '%s :: Epoch %i :: Iter %i / %i :: train loss: %0.4f'
    dev_log_string = '\n%s :: Epoch %i :: dev loss: %0.4f'
    while epoch < config['training']['num_epochs']:

        # Main training loop.
        train_loss = []
        sampler = RandomSampler(train_data)
        for i, train_idx in enumerate(sampler):
            src, tgt = train_data[train_idx]

            # Clear gradients
            encoder_optimizer.zero_grad()
            decoder_optimizer.zero_grad()

            # Feed inputs one by one from src into encoder (in reverse).
            src_length = src.size()[0]
            hidden = None
            for j in reversed(range(src_length)):
                encoder_output, hidden = encoder(src[j], hidden)

            # Feed desired outputs one by one from tgt into decoder
            # and measure loss.
            tgt_length = tgt.size()[0]
            loss = 0
            for j in range(tgt_length - 1):
                decoder_output, hidden = decoder(tgt[j], hidden)
                loss += criterion(decoder_output, tgt[j + 1])

            # Backpropagate the loss and update the model parameters.
            loss.backward()
            encoder_optimizer.step()
            decoder_optimizer.step()

            train_loss.append(loss.data.cpu())

            # Every once and a while check on the loss
            if ((i + 1) % 100) == 0:
                print(train_log_string %
                      (datetime.now(), epoch, i + 1, len(train_data),
                       np.mean(train_loss)),
                      end='\r')
                train_loss = []

        # Evaluation loop.
        dev_loss = []
        for src, tgt in dev_data:

            # Feed inputs one by one from src into encoder.
            src_length = src.size()[0]
            hidden = None
            for j in reversed(range(src_length)):
                encoder_output, hidden = encoder(src[j], hidden)

            # Feed desired outputs one by one from tgt into decoder
            # and measure loss.
            tgt_length = tgt.size()[0]
            loss = 0
            for j in range(tgt_length - 1):
                decoder_output, hidden = decoder(tgt[j], hidden)
                loss += criterion(decoder_output, tgt[j + 1])

            dev_loss.append(loss.data.cpu())

        print(dev_log_string % (datetime.now(), epoch, np.mean(dev_loss)))

        state_dict = {
            'epoch': epoch,
            'encoder': encoder.state_dict(),
            'decoder': decoder.state_dict(),
            'encoder_optimizer': encoder_optimizer.state_dict(),
            'decoder_optimizer': decoder_optimizer.state_dict()
        }
        torch.save(state_dict, ckpt_path)

        epoch += 1
def train(config, encoder_in = None, decoder_in = None):
    
    train_data, word2index, tag2index, intent2index = preprocessing(config.file_path,config.max_length)
    
    if train_data==None:
        print("Please check your data or its path")
        return
    if encoder_in != None:
        encoder = encoder_in
        decoder = decoder_in
    else:
        encoder = Encoder(len(word2index),config.embedding_size,config.hidden_size)
        decoder = Decoder(len(tag2index),len(intent2index),len(tag2index)//3,config.hidden_size*2)
        if USE_CUDA:
            encoder = encoder.cuda()
            decoder = decoder.cuda()

        encoder.init_weights()
        decoder.init_weights()

    loss_function_1 = nn.CrossEntropyLoss(ignore_index=0)
    loss_function_2 = nn.CrossEntropyLoss()
    enc_optim= optim.Adam(encoder.parameters(), lr=config.learning_rate)
    dec_optim = optim.Adam(decoder.parameters(),lr=config.learning_rate)
    
    for step in range(config.step_size):
        losses=[]
        for i, batch in enumerate(getBatch(config.batch_size,train_data)):
            x,y_1,y_2 = zip(*batch) # sin,sout,intent
            x = torch.cat(x)
            tag_target = torch.cat(y_1)
            intent_target = torch.cat(y_2)
            x_mask = torch.cat([Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))).cuda() if USE_CUDA else Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))) for t in x]).view(config.batch_size,-1)
            y_1_mask = torch.cat([Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))).cuda() if USE_CUDA else Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))) for t in tag_target]).view(config.batch_size,-1)

            encoder.zero_grad()
            decoder.zero_grad()

            output, hidden_c = encoder(x,x_mask)
            start_decode = Variable(torch.LongTensor([[word2index['<SOS>']]*config.batch_size])).cuda().transpose(1,0) if USE_CUDA else Variable(torch.LongTensor([[word2index['<SOS>']]*config.batch_size])).transpose(1,0)

            tag_score, intent_score = decoder(start_decode,hidden_c,output,x_mask)

            loss_1 = loss_function_1(tag_score,tag_target.view(-1))
            loss_2 = loss_function_2(intent_score,intent_target)

            loss = loss_1+loss_2
            losses.append(loss.data.cpu().numpy() if USE_CUDA else loss.data.numpy())
            loss.backward()

            torch.nn.utils.clip_grad_norm(encoder.parameters(), 5.0)
            torch.nn.utils.clip_grad_norm(decoder.parameters(), 5.0)

            enc_optim.step()
            dec_optim.step()

            if i % 100==0:
                print("Step",step," epoch",i," : ",np.mean(losses))
                losses=[]

        t = Check()
        t.test(encoder,decoder)
        count = t.test_error_count
        rate = t.test_error_rate

        if not os.path.exists(config.model_dir):
            os.makedirs(config.model_dir)

        torch.save(decoder, os.path.join(config.model_dir, str(count)+'_'+str(rate)+'_'+'decoder.pkl'))
        torch.save(encoder, os.path.join(config.model_dir, str(count)+'_'+str(rate)+'_'+'encoder.pkl'))
    
    # if not os.path.exists(config.model_dir):
    #     os.makedirs(config.model_dir)

    # torch.save(decoder.state_dict(),os.path.join(config.model_dir,'jointnlu-decoder.pkl'))
    # torch.save(encoder.state_dict(),os.path.join(config.model_dir, 'jointnlu-encoder.pkl'))
    # torch.save(decoder,os.path.join(config.model_dir,'jointnlu-decoder.pkl'))
    # torch.save(encoder,os.path.join(config.model_dir, 'jointnlu-encoder.pkl'))
    print("Train Complete!")
Exemple #11
0
        plt.yticks(())
    plt.show()


if __name__ == '__main__':
    # predict('./data/RSICD/RSICD_images/00110.jpg', ['./models/train/encoder_mobilenet_60000.pkl', './models/train/decoder_60000.pkl'])
    # predict('./data/RSICD/test/00029.jpg', ['./models/train/encoder_resnet_50000.pkl', './models/train/decoder_50000.pkl'])

    model_path = [
        './models/train/encoder_mobilenet_60000.pkl',
        './models/train/decoder_60000.pkl'
    ]
    encoder = Encoder()
    decoder = DecoderWithAttention(len(data.dictionary))
    if cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
    if model_path:
        print('Loading the parameters of model.')
        if cuda:
            encoder.load_state_dict(torch.load(model_path[0]))
            decoder.load_state_dict(torch.load(model_path[1]))
        else:
            encoder.load_state_dict(
                torch.load(model_path[0], map_location='cpu'))
            decoder.load_state_dict(
                torch.load(model_path[1], map_location='cpu'))
    encoder.eval()
    decoder.eval()
    test_eval(encoder, decoder, data)
Exemple #12
0
def train(description_db, entity_db, word_vocab, entity_vocab,
          target_entity_vocab, out_file, embeddings, dim_size, batch_size,
          negative, epoch, optimizer, max_text_len, max_entity_len, pool_size,
          seed, save, **model_params):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    word_matrix = np.random.uniform(low=-0.05,
                                    high=0.05,
                                    size=(word_vocab.size, dim_size))
    word_matrix = np.vstack([np.zeros(dim_size),
                             word_matrix]).astype('float32')

    entity_matrix = np.random.uniform(low=-0.05,
                                      high=0.05,
                                      size=(entity_vocab.size, dim_size))
    entity_matrix = np.vstack([np.zeros(dim_size),
                               entity_matrix]).astype('float32')

    target_entity_matrix = np.random.uniform(low=-0.05,
                                             high=0.05,
                                             size=(target_entity_vocab.size,
                                                   dim_size))
    target_entity_matrix = np.vstack(
        [np.zeros(dim_size), target_entity_matrix]).astype('float32')

    for embedding in embeddings:
        for word in word_vocab:
            vec = embedding.get_word_vector(word)
            if vec is not None:
                word_matrix[word_vocab.get_index(word)] = vec

        for title in entity_vocab:
            vec = embedding.get_entity_vector(title)
            if vec is not None:
                entity_matrix[entity_vocab.get_index(title)] = vec

        for title in target_entity_vocab:
            vec = embedding.get_entity_vector(title)
            if vec is not None:
                target_entity_matrix[target_entity_vocab.get_index(
                    title)] = vec

    entity_negatives = np.arange(1, target_entity_matrix.shape[0])

    model_params.update(dict(dim_size=dim_size))
    model = Encoder(word_embedding=word_matrix,
                    entity_embedding=entity_matrix,
                    target_entity_embedding=target_entity_matrix,
                    word_vocab=word_vocab,
                    entity_vocab=entity_vocab,
                    target_entity_vocab=target_entity_vocab,
                    **model_params)

    del word_matrix
    del entity_matrix
    del target_entity_matrix

    model = model.cuda()

    model.train()
    parameters = [p for p in model.parameters() if p.requires_grad]
    optimizer_ins = getattr(optim, optimizer)(parameters)

    n_correct = 0
    n_total = 0
    cur_correct = 0
    cur_total = 0
    cur_loss = 0.0

    batch_idx = 0

    joblib.dump(
        dict(model_params=model_params,
             word_vocab=word_vocab.serialize(),
             entity_vocab=entity_vocab.serialize(),
             target_entity_vocab=target_entity_vocab.serialize()),
        out_file + '.pkl')

    if not save or 0 in save:
        state_dict = model.state_dict()
        torch.save(state_dict, out_file + '_epoch0.bin')

    for n_epoch in range(1, epoch + 1):
        logger.info('Epoch: %d', n_epoch)

        for (batch_idx, (args, target)) in enumerate(
                generate_data(description_db, word_vocab, entity_vocab,
                              target_entity_vocab, entity_negatives,
                              batch_size, negative, max_text_len,
                              max_entity_len, pool_size), batch_idx):
            args = tuple([o.cuda(async=True) for o in args])
            target = target.cuda()

            optimizer_ins.zero_grad()
            output = model(args)
            loss = F.cross_entropy(output, target)
            loss.backward()

            optimizer_ins.step()

            cur_correct += (torch.max(output, 1)[1].view(
                target.size()).data == target.data).sum()
            cur_total += len(target)
            cur_loss += loss.data
            if batch_idx != 0 and batch_idx % 1000 == 0:
                n_correct += cur_correct
                n_total += cur_total
                logger.info(
                    'Processed %d batches (epoch: %d, loss: %.4f acc: %.4f total acc: %.4f)'
                    % (batch_idx, n_epoch, cur_loss[0] / cur_total, 100. *
                       cur_correct / cur_total, 100. * n_correct / n_total))
                cur_correct = 0
                cur_total = 0
                cur_loss = 0.0
Exemple #13
0
    DATA_PATH, train=False, download=True, transform=transforms.ToTensor()),
                                        batch_size=NUM_BATCH,
                                        shuffle=True)


def cuda_tensors(obj):
    for attr in dir(obj):
        value = getattr(obj, attr)
        if isinstance(value, torch.Tensor):
            setattr(obj, attr, value.cuda())


enc = Encoder()
dec = Decoder()
if CUDA:
    enc.cuda()
    dec.cuda()
    cuda_tensors(enc)
    cuda_tensors(dec)

optimizer = torch.optim.Adam(list(enc.parameters()) + list(dec.parameters()),
                             lr=LEARNING_RATE,
                             betas=(BETA1, 0.999))


def elbo(q, p, alpha=0.1):
    if NUM_SAMPLES is None:
        return probtorch.objectives.montecarlo.elbo(q,
                                                    p,
                                                    sample_dim=None,
                                                    batch_dim=0,
Exemple #14
0
class Image_Captioning:
    def __init__(self):
        parser = argparse.ArgumentParser(description='Image Captioning')
        parser.add_argument('--root',
                            default='../../../cocodataset/',
                            type=str)
        parser.add_argument('--crop_size', default=224, type=int)
        parser.add_argument('--epochs', default=100, type=int)
        parser.add_argument('--lr', default=1e-4, type=float)
        parser.add_argument('--batch_size', default=128, help='')
        parser.add_argument('--num_workers', default=4, type=int)
        parser.add_argument('--embed_dim', default=256, type=int)
        parser.add_argument('--hidden_size', default=512, type=int)
        parser.add_argument('--num_layers', default=1, type=int)
        parser.add_argument('--model_path', default='./model/', type=str)
        parser.add_argument('--vocab_path', default='./vocab/', type=str)
        parser.add_argument('--save_step', default=1000, type=int)

        self.args = parser.parse_args()
        self.Multi_GPU = False

        # if torch.cuda.device_count() > 1:
        #     print('Multi GPU Activate!')
        #     print('Using GPU :', int(torch.cuda.device_count()))
        #     self.Multi_GPU = True

        os.makedirs(self.args.model_path, exist_ok=True)

        transform = transforms.Compose([
            transforms.RandomCrop(self.args.crop_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
        ])

        with open(self.args.vocab_path + 'vocab.pickle', 'rb') as f:
            data = pickle.load(f)

        self.vocab = data

        self.DataLoader = get_dataloader(root=self.args.root,
                                         transform=transform,
                                         shuffle=True,
                                         batch_size=self.args.batch_size,
                                         num_workers=self.args.num_workers,
                                         vocab=self.vocab)

        self.Encoder = Encoder(embed_dim=self.args.embed_dim)
        self.Decoder = Decoder(embed_dim=self.args.embed_dim,
                               hidden_size=self.args.hidden_size,
                               vocab_size=len(self.vocab),
                               num_layers=self.args.num_layers)
        # print(self.Encoder)
        # print(self.Decoder)

    def train(self):
        if self.Multi_GPU:
            self.Encoder = torch.nn.DataParallel(self.Encoder)
            self.Decoder = torch.nn.DataParallel(self.Decoder)
            parameters = list(self.Encoder.module.fc.parameters()) + list(
                self.Encoder.module.BN.parameters()) + list(
                    self.Decoder.parameters())
        else:
            parameters = list(self.Encoder.fc.parameters()) + list(
                self.Encoder.BN.parameters()) + list(self.Decoder.parameters())

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(parameters, lr=self.args.lr)

        self.Encoder.cuda()
        self.Decoder.cuda()

        self.Encoder.train()
        self.Decoder.train()

        print('-' * 100)
        print('Now Training')
        print('-' * 100)

        for epoch in range(self.args.epochs):
            total_loss = 0
            for batch_idx, (image, captions,
                            lengths) in enumerate(self.DataLoader):
                optimizer.zero_grad()
                image, captions = image.cuda(), captions.cuda()

                targets = pack_padded_sequence(captions,
                                               lengths,
                                               batch_first=True)[0]

                if self.Multi_GPU:
                    img_features = nn.parallel.DataParallel(
                        self.Encoder, image)
                    outputs = nn.parallel.DataParallel(
                        self.Decoder, (img_features, captions, lengths))
                else:
                    img_features = self.Encoder(image)
                    outputs = self.Decoder(img_features, captions, lengths)

                loss = criterion(outputs, targets)
                total_loss += loss.item()

                loss.backward()
                optimizer.step()

                if batch_idx % 30 == 0:
                    print('Epoch : {}, Step : [{}/{}], Step Loss : {:.4f}'.
                          format(epoch, batch_idx, len(self.DataLoader),
                                 loss.item()))

            print('Epoch : [{}/{}], Total loss : {:.4f}'.format(
                epoch, self.args.epochs, total_loss / len(self.DataLoader)))

        print('Now saving the models')
        torch.save(
            self.Encoder.state_dict(),
            self.args.model_path + 'Encoder-{}.ckpt'.format(self.args.epochs))
        torch.save(
            self.Decoder.state_dict(),
            self.args.model_path + 'Decoder-{}.ckpt'.format(self.args.epochs))
Exemple #15
0
class Classifier(object):
    def __init__(self, hps, data_loader, valid_data_loader, log_dir='./log/'):
        self.hps = hps
        self.data_loader = data_loader
        self.valid_data_loader = valid_data_loader
        self.model_kept = []
        self.max_keep = 10
        self.build_model()
        self.logger = Logger(log_dir)

    def build_model(self):
        hps = self.hps
        self.SpeakerClassifier = SpeakerClassifier(ns=hps.ns, dp=hps.dp, n_class=hps.n_speakers)
        self.Encoder = Encoder(ns=hps.ns)
        if torch.cuda.is_available():
            self.SpeakerClassifier.cuda()
            self.Encoder.cuda()
        betas = (0.5, 0.9)
        self.opt = optim.Adam(self.SpeakerClassifier.parameters(), lr=self.hps.lr, betas=betas)

    def load_encoder(self, model_path):
        print('load model from {}'.format(model_path))
        with open(model_path, 'rb') as f_in:
            all_model = torch.load(f_in)
            self.Encoder.load_state_dict(all_model['encoder'])

    def save_model(self, model_path, iteration):
        new_model_path = '{}-{}'.format(model_path, iteration)
        torch.save(self.SpeakerClassifier.state_dict(), new_model_path)
        self.model_kept.append(new_model_path)
        if len(self.model_kept) >= self.max_keep:
            os.remove(self.model_kept[0])
            self.model_kept.pop(0)

    def load_model(self, model_path):
        print('load model from {}'.format(model_path))
        self.SpeakerClassifier.load_state_dict(torch.load(model_path))

    def set_eval(self):
        self.SpeakerClassifier.eval()

    def set_train(self):
        self.SpeakerClassifier.train()

    def permute_data(self, data):
        C = to_var(data[0], requires_grad=False)
        X = to_var(data[2]).permute(0, 2, 1)
        return C, X

    def encode_step(self, x):
        enc = self.Encoder(x)
        return enc

    def forward_step(self, enc):
        logits = self.SpeakerClassifier(enc)
        return logits

    def cal_loss(self, logits, y_true):
        # calculate loss 
        criterion = nn.CrossEntropyLoss()
        loss = criterion(logits, y_true)
        return loss

    def valid(self, n_batches=10):
        # input: valid data, output: (loss, acc)
        total_loss, total_acc = 0., 0.
        self.set_eval()
        for i in range(n_batches):
            data = next(self.valid_data_loader)
            y, x = self.permute_data(data)
            enc = self.Encoder(x)
            logits = self.SpeakerClassifier(enc)
            loss = self.cal_loss(logits, y)
            acc = cal_acc(logits, y)
            total_loss += loss.data[0]
            total_acc += acc  
        self.set_train()
        return total_loss / n_batches, total_acc / n_batches

    def train(self, model_path, flag='train'):
        # load hyperparams
        hps = self.hps
        for iteration in range(hps.iters):
            data = next(self.data_loader)
            y, x = self.permute_data(data)
            # encode
            enc = self.encode_step(x)
            # forward to classifier
            logits = self.forward_step(enc)
            # calculate loss
            loss = self.cal_loss(logits, y)
            # optimize
            reset_grad([self.SpeakerClassifier])
            loss.backward()
            grad_clip([self.SpeakerClassifier], self.hps.max_grad_norm)
            self.opt.step()
            # calculate acc
            acc = cal_acc(logits, y)
            # print info
            info = {
                f'{flag}/loss': loss.data[0], 
                f'{flag}/acc': acc,
            }
            slot_value = (iteration + 1, hps.iters) + tuple([value for value in info.values()])
            log = 'iter:[%06d/%06d], loss=%.3f, acc=%.3f'
            print(log % slot_value, end='\r')
            for tag, value in info.items():
                self.logger.scalar_summary(tag, value, iteration)
            if iteration % 1000 == 0 or iteration + 1 == hps.iters:
                valid_loss, valid_acc = self.valid(n_batches=10)
                # print info
                info = {
                    f'{flag}/valid_loss': valid_loss, 
                    f'{flag}/valid_acc': valid_acc,
                }
                slot_value = (iteration + 1, hps.iters) + \
                        tuple([value for value in info.values()])
                log = 'iter:[%06d/%06d], valid_loss=%.3f, valid_acc=%.3f'
                print(log % slot_value)
                for tag, value in info.items():
                    self.logger.scalar_summary(tag, value, iteration)
                self.save_model(model_path, iteration)
Exemple #16
0
class Solver(object):
    def __init__(self, hps, data_loader, log_dir='./log/'):
        self.hps = hps
        self.data_loader = data_loader
        self.model_kept = []
        self.max_keep = 20
        self.build_model()
        self.logger = Logger(log_dir)

    def build_model(self):
        hps = self.hps
        ns = self.hps.ns
        emb_size = self.hps.emb_size
        self.Encoder = Encoder(ns=ns, dp=hps.enc_dp)
        self.Decoder = Decoder(ns=ns, c_a=hps.n_speakers, emb_size=emb_size)
        self.Generator = Decoder(ns=ns, c_a=hps.n_speakers, emb_size=emb_size)
        self.LatentDiscriminator = LatentDiscriminator(ns=ns, dp=hps.dis_dp)
        self.PatchDiscriminator = PatchDiscriminator(ns=ns,
                                                     n_class=hps.n_speakers)
        if torch.cuda.is_available():
            self.Encoder.cuda()
            self.Decoder.cuda()
            self.Generator.cuda()
            self.LatentDiscriminator.cuda()
            self.PatchDiscriminator.cuda()
        betas = (0.5, 0.9)
        params = list(self.Encoder.parameters()) + list(
            self.Decoder.parameters())
        self.ae_opt = optim.Adam(params, lr=self.hps.lr, betas=betas)
        self.gen_opt = optim.Adam(self.Generator.parameters(),
                                  lr=self.hps.lr,
                                  betas=betas)
        self.lat_opt = optim.Adam(self.LatentDiscriminator.parameters(),
                                  lr=self.hps.lr,
                                  betas=betas)
        self.patch_opt = optim.Adam(self.PatchDiscriminator.parameters(),
                                    lr=self.hps.lr,
                                    betas=betas)

    def save_model(self, model_path, iteration, enc_only=True):
        if not enc_only:
            all_model = {
                'encoder': self.Encoder.state_dict(),
                'decoder': self.Decoder.state_dict(),
                'generator': self.Generator.state_dict(),
                'latent_discriminator': self.LatentDiscriminator.state_dict(),
                'patch_discriminator': self.PatchDiscriminator.state_dict(),
            }
        else:
            all_model = {
                'encoder': self.Encoder.state_dict(),
                'decoder': self.Decoder.state_dict(),
                'generator': self.Generator.state_dict(),
            }
        new_model_path = '{}-{}'.format(model_path, iteration)
        with open(new_model_path, 'wb') as f_out:
            torch.save(all_model, f_out)
        self.model_kept.append(new_model_path)

        if len(self.model_kept) >= self.max_keep:
            os.remove(self.model_kept[0])
            self.model_kept.pop(0)

    def load_model(self, model_path, enc_only=True):
        print('load model from {}'.format(model_path))
        with open(model_path, 'rb') as f_in:
            all_model = torch.load(f_in)
            self.Encoder.load_state_dict(all_model['encoder'])
            self.Decoder.load_state_dict(all_model['decoder'])
            #self.Genrator.load_state_dict(all_model['generator'])
            if not enc_only:
                self.LatentDiscriminator.load_state_dict(
                    all_model['latent_discriminator'])
                self.PatchDiscriminator.load_state_dict(
                    all_model['patch_discriminator'])

    def set_eval(self):
        self.Encoder.eval()
        self.Decoder.eval()
        self.Generator.eval()
        #self.LatentDiscriminator.eval()

    def test_step(self, x, c):
        self.set_eval()
        x = to_var(x).permute(0, 2, 1)
        enc = self.Encoder(x)
        x_tilde = self.Decoder(enc, c)
        return x_tilde.data.cpu().numpy()

    def permute_data(self, data):
        C = [to_var(c, requires_grad=False) for c in data[:2]]
        X = [to_var(x).permute(0, 2, 1) for x in data[2:]]
        return C, X

    def sample_c(self, size):
        c_sample = Variable(torch.multinomial(torch.ones(8),
                                              num_samples=size,
                                              replacement=True),
                            requires_grad=False)
        c_sample = c_sample.cuda() if torch.cuda.is_available() else c_sample
        return c_sample

    def cal_acc(self, logits, y_true):
        _, ind = torch.max(logits, dim=1)
        acc = torch.sum(
            (ind == y_true).type(torch.FloatTensor)) / y_true.size(0)
        return acc

    def encode_step(self, *args):
        enc_list = []
        for x in args:
            enc = self.Encoder(x)
            enc_list.append(enc)
        return tuple(enc_list)

    def decode_step(self, enc, c):
        x_tilde = self.Decoder(enc, c)
        return x_tilde

    def latent_discriminate_step(self,
                                 enc_i_t,
                                 enc_i_tk,
                                 enc_i_prime,
                                 enc_j,
                                 is_dis=True):
        same_pair = torch.cat([enc_i_t, enc_i_tk], dim=1)
        diff_pair = torch.cat([enc_i_prime, enc_j], dim=1)
        if is_dis:
            same_val = self.LatentDiscriminator(same_pair)
            diff_val = self.LatentDiscriminator(diff_pair)
            w_dis = torch.mean(same_val - diff_val)
            gp = calculate_gradients_penalty(self.LatentDiscriminator,
                                             same_pair, diff_pair)
            return w_dis, gp
        else:
            diff_val = self.LatentDiscriminator(diff_pair)
            loss_adv = -torch.mean(diff_val)
            return loss_adv

    def patch_discriminate_step(self, x, x_tilde, cal_gp=True):
        # w-distance
        D_real, real_logits = self.PatchDiscriminator(x, classify=True)
        D_fake, fake_logits = self.PatchDiscriminator(x_tilde, classify=True)
        w_dis = torch.mean(D_real - D_fake)
        if cal_gp:
            gp = calculate_gradients_penalty(self.PatchDiscriminator, x,
                                             x_tilde)
            return w_dis, real_logits, fake_logits, gp
        else:
            return w_dis, real_logits, fake_logits

    # backup
    #def classify():
    #    # aux clssify loss
    #    criterion = nn.NLLLoss()
    #    c_loss = criterion(real_logits, c) + criterion(fake_logits, c_sample)
    #    real_acc = self.cal_acc(real_logits, c)
    #    fake_acc = self.cal_acc(fake_logits, c_sample)

    def train(self, model_path, flag='train'):
        # load hyperparams
        hps = self.hps
        for iteration in range(hps.iters):
            # calculate current alpha
            if iteration + 1 < hps.lat_sched_iters and iteration >= hps.enc_pretrain_iters:
                current_alpha = hps.alpha_enc * (
                    iteration + 1 - hps.enc_pretrain_iters) / (
                        hps.lat_sched_iters - hps.enc_pretrain_iters)
            else:
                current_alpha = 0
            if iteration >= hps.enc_pretrain_iters:
                n_latent_steps = hps.n_latent_steps \
                    if iteration > hps.enc_pretrain_iters else hps.dis_pretrain_iters
                for step in range(n_latent_steps):
                    #===================== Train latent discriminator =====================#
                    data = next(self.data_loader)
                    (c_i, c_j), (x_i_t, x_i_tk, x_i_prime,
                                 x_j) = self.permute_data(data)
                    # encode
                    enc_i_t, enc_i_tk, enc_i_prime, enc_j = self.encode_step(
                        x_i_t, x_i_tk, x_i_prime, x_j)
                    # latent discriminate
                    latent_w_dis, latent_gp = self.latent_discriminate_step(
                        enc_i_t, enc_i_tk, enc_i_prime, enc_j)
                    lat_loss = -hps.alpha_dis * latent_w_dis + hps.lambda_ * latent_gp
                    reset_grad([self.LatentDiscriminator])
                    lat_loss.backward()
                    grad_clip([self.LatentDiscriminator],
                              self.hps.max_grad_norm)
                    self.lat_opt.step()
                    # print info
                    info = {
                        f'{flag}/D_latent_w_dis': latent_w_dis.data[0],
                        f'{flag}/latent_gp': latent_gp.data[0],
                    }
                    slot_value = (step, iteration + 1, hps.iters) + \
                            tuple([value for value in info.values()])
                    log = 'lat_D-%d:[%06d/%06d], w_dis=%.3f, gp=%.2f'
                    print(log % slot_value)
                    for tag, value in info.items():
                        self.logger.scalar_summary(tag, value, iteration)
            # two stage training
            if iteration >= hps.patch_start_iter:
                for step in range(hps.n_patch_steps):
                    #===================== Train patch discriminator =====================#
                    data = next(self.data_loader)
                    (c_i, _), (x_i_t, _, _, _) = self.permute_data(data)
                    # encode
                    enc_i_t, = self.encode_step(x_i_t)
                    c_sample = self.sample_c(x_i_t.size(0))
                    x_tilde = self.decode_step(enc_i_t, c_i)
                    # Aux classify loss
                    patch_w_dis, real_logits, fake_logits, patch_gp = \
                            self.patch_discriminate_step(x_i_t, x_tilde, cal_gp=True)
                    patch_loss = -hps.beta_dis * patch_w_dis + hps.lambda_ * patch_gp + hps.beta_clf * c_loss
                    reset_grad([self.PatchDiscriminator])
                    patch_loss.backward()
                    grad_clip([self.PatchDiscriminator],
                              self.hps.max_grad_norm)
                    self.patch_opt.step()
                    # print info
                    info = {
                        f'{flag}/D_patch_w_dis': patch_w_dis.data[0],
                        f'{flag}/patch_gp': patch_gp.data[0],
                        f'{flag}/c_loss': c_loss.data[0],
                        f'{flag}/real_acc': real_acc,
                        f'{flag}/fake_acc': fake_acc,
                    }
                    slot_value = (step, iteration + 1, hps.iters) + \
                            tuple([value for value in info.values()])
                    log = 'patch_D-%d:[%06d/%06d], w_dis=%.3f, gp=%.2f, c_loss=%.3f, real_acc=%.2f, fake_acc=%.2f'
                    print(log % slot_value)
                    for tag, value in info.items():
                        self.logger.scalar_summary(tag, value, iteration)
            #===================== Train G =====================#
            data = next(self.data_loader)
            (c_i, c_j), (x_i_t, x_i_tk, x_i_prime,
                         x_j) = self.permute_data(data)
            # encode
            enc_i_t, enc_i_tk, enc_i_prime, enc_j = self.encode_step(
                x_i_t, x_i_tk, x_i_prime, x_j)
            # decode
            x_tilde = self.decode_step(enc_i_t, c_i)
            loss_rec = torch.mean(torch.abs(x_tilde - x_i_t))
            # latent discriminate
            loss_adv = self.latent_discriminate_step(enc_i_t,
                                                     enc_i_tk,
                                                     enc_i_prime,
                                                     enc_j,
                                                     is_dis=False)
            ae_loss = loss_rec + current_alpha * loss_adv
            reset_grad([self.Encoder, self.Decoder])
            retain_graph = True if hps.n_patch_steps > 0 else False
            ae_loss.backward(retain_graph=retain_graph)
            grad_clip([self.Encoder, self.Decoder], self.hps.max_grad_norm)
            self.ae_opt.step()
            info = {
                f'{flag}/loss_rec': loss_rec.data[0],
                f'{flag}/loss_adv': loss_adv.data[0],
                f'{flag}/alpha': current_alpha,
            }
            slot_value = (iteration + 1, hps.iters) + tuple(
                [value for value in info.values()])
            log = 'G:[%06d/%06d], loss_rec=%.2f, loss_adv=%.2f, alpha=%.2e'
            print(log % slot_value)
            for tag, value in info.items():
                self.logger.scalar_summary(tag, value, iteration + 1)
            # patch discriminate
            if hps.n_patch_steps > 0 and iteration >= hps.patch_start_iter:
                c_sample = self.sample_c(x_i_t.size(0))
                x_tilde = self.decode_step(enc_i_t, c_sample)
                patch_w_dis, real_logits, fake_logits = \
                        self.patch_discriminate_step(x_i_t, x_tilde, cal_gp=False)
                patch_loss = hps.beta_dec * patch_w_dis + hps.beta_clf * c_loss
                reset_grad([self.Decoder])
                patch_loss.backward()
                grad_clip([self.Decoder], self.hps.max_grad_norm)
                self.decoder_opt.step()
                info = {
                    f'{flag}/G_patch_w_dis': patch_w_dis.data[0],
                    f'{flag}/c_loss': c_loss.data[0],
                    f'{flag}/real_acc': real_acc,
                    f'{flag}/fake_acc': fake_acc,
                }
                slot_value = (iteration + 1, hps.iters) + tuple(
                    [value for value in info.values()])
                log = 'G:[%06d/%06d]: patch_w_dis=%.2f, c_loss=%.2f, real_acc=%.2f, fake_acc=%.2f'
                print(log % slot_value)
                for tag, value in info.items():
                    self.logger.scalar_summary(tag, value, iteration + 1)
            if iteration % 1000 == 0 or iteration + 1 == hps.iters:
                self.save_model(model_path, iteration)
def train(config):

    train_data, word2index, tag2index, intent2index = preprocessing(
        config.file_path, config.max_length)

    if train_data == None:
        print("Please check your data or its path")
        return

    encoder = Encoder(len(word2index), config.embedding_size,
                      config.hidden_size)
    decoder = Decoder(len(tag2index), len(intent2index),
                      len(tag2index) // 3, config.hidden_size * 2)
    if USE_CUDA:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
    #print("來到這裏了!1!")
    encoder.init_weights()
    decoder.init_weights()
    #print("來到這裏了!2!")
    loss_function_1 = nn.CrossEntropyLoss(ignore_index=0)
    loss_function_2 = nn.CrossEntropyLoss()
    enc_optim = optim.Adam(encoder.parameters(), lr=config.learning_rate)
    dec_optim = optim.Adam(decoder.parameters(), lr=config.learning_rate)
    #print("來到這裏了!3!")
    for step in range(config.step_size):
        losses = []
        for i, batch in enumerate(getBatch(config.batch_size, train_data)):
            x, y_1, y_2 = zip(*batch)
            x = torch.cat(x)
            tag_target = torch.cat(y_1)
            intent_target = torch.cat(y_2)
            # print("來到這裏了!4!")
            x_mask = torch.cat([
                Variable(torch.ByteTensor(tuple(map(lambda s: s == 0,
                                                    t.data)))).cuda()
                if USE_CUDA else Variable(
                    torch.ByteTensor(tuple(map(lambda s: s == 0, t.data))))
                for t in x
            ]).view(config.batch_size, -1)
            y_1_mask = torch.cat([
                Variable(torch.ByteTensor(tuple(map(lambda s: s == 0,
                                                    t.data)))).cuda()
                if USE_CUDA else Variable(
                    torch.ByteTensor(tuple(map(lambda s: s == 0, t.data))))
                for t in tag_target
            ]).view(config.batch_size, -1)
            #   print("來到這裏了!5!")
            encoder.zero_grad()
            decoder.zero_grad()
            #   print("來到這裏了!6!")
            output, hidden_c = encoder(x, x_mask)
            # print("來到這裏了!7!")
            start_decode = Variable(
                torch.LongTensor([
                    [word2index['<SOS>']] * config.batch_size
                ])).cuda().transpose(1, 0) if USE_CUDA else Variable(
                    torch.LongTensor([[word2index['<SOS>']] *
                                      config.batch_size])).transpose(1, 0)
            # print("來到這裏了!8!")

            tag_score, intent_score = decoder(start_decode, hidden_c, output,
                                              x_mask)
            #print("來到這裏了!9!")
            loss_1 = loss_function_1(tag_score, tag_target.view(-1))
            # print("來到這裏了!10!")
            loss_2 = loss_function_2(intent_score, intent_target)
            #print("來到這裏了!11!")
            loss = loss_1 + loss_2
            losses.append(
                loss.data.cpu().numpy() if USE_CUDA else loss.data.numpy())
            #print("來到這裏了!12!")
            loss.backward()
            # print("來到這裏了!13!")

            torch.nn.utils.clip_grad_norm(encoder.parameters(), 5.0)
            torch.nn.utils.clip_grad_norm(decoder.parameters(), 5.0)

            enc_optim.step()
            dec_optim.step()

            if i % 100 == 0:
                with open("result.txt", "a+") as f:
                    #print("Step",step," epoch",i," : ",np.mean(losses))
                    print(f"Step是{step},epoch是{i} :均值为{np.mean(losses)}")
                    f.write(f"Step是{step},epoch是{i} :均值为{np.mean(losses)}")
                    f.write("\n")
                    losses = []

    if not os.path.exists(config.model_dir):
        os.makedirs(config.model_dir)
    #print("來到這裏了!5!")
    torch.save(decoder.state_dict(),
               os.path.join(config.model_dir, 'jointnlu-decoder.pkl'))
    torch.save(encoder.state_dict(),
               os.path.join(config.model_dir, 'jointnlu-encoder.pkl'))
    print("Train Complete!")
Exemple #18
0
def main(args):
    """
    Training and validation.
    """

    global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map

    with open(args.vocab_path, 'rb') as f:
        word_map = pickle.load(f)

    # Initialize / load checkpoint
    if checkpoint is None:
        decoder = DecoderWithAttention(attention_dim=attention_dim,
                                       embed_dim=emb_dim,
                                       decoder_dim=decoder_dim,
                                       vocab_size=len(word_map),
                                       dropout=dropout)
        decoder_optimizer = torch.optim.Adam(params=filter(
            lambda p: p.requires_grad, decoder.parameters()),
                                             lr=decoder_lr)
        encoder = Encoder()
        encoder.fine_tune(fine_tune_encoder)
        encoder_optimizer = torch.optim.Adam(
            params=filter(lambda p: p.requires_grad, encoder.parameters()),
            lr=encoder_lr) if fine_tune_encoder else None

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        best_bleu4 = checkpoint['bleu-4']
        decoder = checkpoint['decoder']
        decoder_optimizer = checkpoint['decoder_optimizer']
        encoder = checkpoint['encoder']
        encoder_optimizer = checkpoint['encoder_optimizer']
        if fine_tune_encoder is True and encoder_optimizer is None:
            encoder.fine_tune(fine_tune_encoder)
            encoder_optimizer = torch.optim.Adam(params=filter(
                lambda p: p.requires_grad, encoder.parameters()),
                                                 lr=encoder_lr)

    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()

    criterion = nn.CrossEntropyLoss()
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])
    train_loader = get_loader(args.train_image_dir,
                              args.caption_path,
                              word_map,
                              transform,
                              args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)
    val_loader = get_loader(args.val_image_dir,
                            args.caption_path,
                            word_map,
                            transform,
                            args.batch_size,
                            shuffle=True,
                            num_workers=args.num_workers)

    for epoch in range(start_epoch, epochs):
        if epochs_since_improvement == 20:
            break
        if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0:
            adjust_learning_rate(decoder_optimizer, 0.8)
            if fine_tune_encoder:
                adjust_learning_rate(encoder_optimizer, 0.8)

        train(train_loader=train_loader,
              encoder=encoder,
              decoder=decoder,
              criterion=criterion,
              encoder_optimizer=encoder_optimizer,
              decoder_optimizer=decoder_optimizer,
              epoch=epoch)

        recent_bleu4 = validate(val_loader=val_loader,
                                encoder=encoder,
                                decoder=decoder,
                                criterion=criterion)

        is_best = recent_bleu4 > best_bleu4
        best_bleu4 = max(recent_bleu4, best_bleu4)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0

        save_checkpoint(data_name, epoch, epochs_since_improvement, encoder,
                        decoder, encoder_optimizer, decoder_optimizer,
                        recent_bleu4, is_best)
def train_dynamics(env, args, writer=None):
    """
    Trains the Dynamics module. Supervised.

    Arguments:
    env: the initialized environment (rllab/gym)
    args: input arguments
    writer: initialized summary writer for tensorboard
    """
    args.action_space = env.action_space

    # Initialize models
    enc = Encoder(env.observation_space.shape[0],
                  args.dim,
                  use_conv=args.use_conv)
    dec = Decoder(env.observation_space.shape[0],
                  args.dim,
                  use_conv=args.use_conv)
    d_module = D_Module(env.action_space.shape[0], args.dim, args.discrete)

    if args.from_checkpoint is not None:
        results_dict = torch.load(args.from_checkpoint)
        enc.load_state_dict(results_dict['enc'])
        dec.load_state_dict(results_dict['dec'])
        d_module.load_state_dict(results_dict['d_module'])

    all_params = chain(enc.parameters(), dec.parameters(),
                       d_module.parameters())

    if args.transfer:
        for p in enc.parameters():
            p.requires_grad = False

        for p in dec.parameters():
            p.requires_grad = False
        all_params = d_module.parameters()

    optimizer = torch.optim.Adam(all_params,
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    if args.gpu:
        enc = enc.cuda()
        dec = dec.cuda()
        d_module = d_module.cuda()

    # Initialize datasets
    val_loader = None
    train_dataset = DynamicsDataset(args.train_set,
                                    args.train_size,
                                    batch=args.train_batch,
                                    rollout=args.rollout)
    val_dataset = DynamicsDataset(args.test_set,
                                  5000,
                                  batch=args.test_batch,
                                  rollout=args.rollout)
    val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.num_workers)

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.num_workers)

    results_dict = {
        'dec_losses': [],
        'forward_losses': [],
        'inverse_losses': [],
        'total_losses': [],
        'enc': None,
        'dec': None,
        'd_module': None,
        'd_init': None,
        'args': args
    }

    total_action_taken = 0
    correct_predicted_a_hat = 0

    # create the mask here for re-weighting
    dec_mask = None
    if args.dec_mask is not None:
        dec_mask = torch.ones(9)
        game_vocab = dict([
            (b, a)
            for a, b in enumerate(sorted(env.game.all_possible_features()))
        ])
        dec_mask[game_vocab['Agent']] = args.dec_mask
        dec_mask[game_vocab['Goal']] = args.dec_mask
        dec_mask = dec_mask.expand(args.batch_size, args.maze_length,
                                   args.maze_length, 9).contiguous().view(-1)
        dec_mask = Variable(dec_mask, requires_grad=False)
        if args.gpu:
            dec_mask = dec_mask.cuda()

    for epoch in range(1, args.num_epochs + 1):
        enc.train()
        dec.train()
        d_module.train()

        if args.framework == "mazebase":
            d_init.train()

        # for measuring the accuracy
        train_acc = 0
        current_epoch_actions = 0
        current_epoch_predicted_a_hat = 0

        start = time.time()
        for i, (states, target_actions) in enumerate(train_loader):

            optimizer.zero_grad()

            if args.framework != "mazebase":
                forward_loss, inv_loss, dec_loss, recon_loss, model_loss, _, _ = forward_planning(
                    i, states, target_actions, enc, dec, d_module, args)
            else:
                forward_loss, inv_loss, dec_loss, recon_loss, model_loss, current_epoch_predicted_a_hat, current_epoch_actions = multiple_forward(
                    i, states, target_actions, enc, dec, d_module, args,
                    d_init, dec_mask)

            loss = forward_loss + args.inv_loss_coef * inv_loss + \
                        args.dec_loss_coef * dec_loss

            if i % args.log_interval == 0:
                log(
                    'Epoch [{}/{}]\tIter [{}/{}]\t'.format(
                        epoch, args.num_epochs, i+1, len(
                        train_dataset)//args.batch_size) + \
                    'Time: {:.2f}\t'.format(time.time() - start) + \
                    'Decoder Loss: {:.2f}\t'.format(dec_loss.data[0]) + \
                    'Forward Loss: {:.2f}\t'.format(forward_loss.data[0] ) + \
                    'Inverse Loss: {:.2f}\t'.format(inv_loss.data[0]) + \
                    'Loss: {:.2f}\t'.format(loss.data[0]))

                results_dict['dec_losses'].append(dec_loss.data[0])
                results_dict['forward_losses'].append(forward_loss.data[0])
                results_dict['inverse_losses'].append(inv_loss.data[0])
                results_dict['total_losses'].append(loss.data[0])

                # write the summaries here
                if writer:
                    writer.add_scalar('dynamics/total_loss', loss.data[0],
                                      epoch)
                    writer.add_scalar('dynamics/decoder', dec_loss.data[0],
                                      epoch)
                    writer.add_scalar('dynamics/reconstruction_loss',
                                      recon_loss.data[0], epoch)
                    writer.add_scalar('dynamics/next_state_prediction_loss',
                                      model_loss.data[0], epoch)
                    writer.add_scalar('dynamics/inv_loss', inv_loss.data[0],
                                      epoch)
                    writer.add_scalar('dynamics/forward_loss',
                                      forward_loss.data[0], epoch)

                    writer.add_scalars(
                        'dynamics/all_losses', {
                            "total_loss": loss.data[0],
                            "reconstruction_loss": recon_loss.data[0],
                            "next_state_prediction_loss": model_loss.data[0],
                            "decoder_loss": dec_loss.data[0],
                            "inv_loss": inv_loss.data[0],
                            "forward_loss": forward_loss.data[0],
                        }, epoch)

            loss.backward()

            correct_predicted_a_hat += current_epoch_predicted_a_hat
            total_action_taken += current_epoch_actions

            # does it not work at all without grad clipping ?
            torch.nn.utils.clip_grad_norm(all_params, args.max_grad_norm)
            optimizer.step()

            # maybe add the generated image to add the logs
            # writer.add_image()

        # Run validation
        if val_loader is not None:
            enc.eval()
            dec.eval()
            d_module.eval()
            forward_loss, inv_loss, dec_loss = 0, 0, 0
            for i, (states, target_actions) in enumerate(val_loader):
                f_loss, i_loss, d_loss, _, _, _, _ = forward_planning(
                    i, states, target_actions, enc, dec, d_module, args)
                forward_loss += f_loss
                inv_loss += i_loss
                dec_loss += d_loss
            loss = forward_loss + args.inv_loss_coef * inv_loss + \
                    args.dec_loss_coef * dec_loss
            if writer:
                writer.add_scalar('val/forward_loss', forward_loss.data[0] / i,
                                  epoch)
                writer.add_scalar('val/inverse_loss', inv_loss.data[0] / i,
                                  epoch)
                writer.add_scalar('val/decoder_loss', dec_loss.data[0] / i,
                                  epoch)
            log(
                '[Validation]\t' + \
                'Decoder Loss: {:.2f}\t'.format(dec_loss.data[0] / i) + \
                'Forward Loss: {:.2f}\t'.format(forward_loss.data[0] / i) + \
                'Inverse Loss: {:.2f}\t'.format(inv_loss.data[0] / i) + \
                'Loss: {:.2f}\t'.format(loss.data[0] / i))
        if epoch % args.checkpoint == 0:
            results_dict['enc'] = enc.state_dict()
            results_dict['dec'] = dec.state_dict()
            results_dict['d_module'] = d_module.state_dict()
            if args.framework == "mazebase":
                results_dict['d_init'] = d_init.state_dict()
            torch.save(
                results_dict,
                os.path.join(args.out, 'dynamics_module_epoch%s.pt' % epoch))
            log('Saved model %s' % epoch)

    results_dict['enc'] = enc.state_dict()
    results_dict['dec'] = dec.state_dict()
    results_dict['d_module'] = d_module.state_dict()
    torch.save(results_dict,
               os.path.join(args.out, 'dynamics_module_epoch%s.pt' % epoch))
    print(os.path.join(args.out, 'dynamics_module_epoch%s.pt' % epoch))
Exemple #20
0
def train():
    opt = parse_args()

    os.makedirs("images/%s" % (opt.dataset), exist_ok=True)
    os.makedirs("checkpoints/%s" % (opt.dataset), exist_ok=True)

    cuda = True if torch.cuda.is_available() else False
    FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

    # get dataloader
    train_loader = commic2human_loader(opt, mode='train')
    test_loader = commic2human_loader(opt, mode='test')

    # Dimensionality
    input_shape = (opt.channels, opt.img_height, opt.img_width)
    shared_dim = opt.dim * (2**opt.n_downsample)

    # Initialize generator and discriminator
    shared_E = ResidualBlock(in_channels=shared_dim)
    E1 = Encoder(dim=opt.dim,
                 n_downsample=opt.n_downsample,
                 shared_block=shared_E)
    E2 = Encoder(dim=opt.dim,
                 n_downsample=opt.n_downsample,
                 shared_block=shared_E)

    shared_G = ResidualBlock(in_channels=shared_dim)
    G1 = Generator(dim=opt.dim,
                   n_upsample=opt.n_upsample,
                   shared_block=shared_G)
    G2 = Generator(dim=opt.dim,
                   n_upsample=opt.n_upsample,
                   shared_block=shared_G)

    D1 = Discriminator(input_shape)
    D2 = Discriminator(input_shape)

    # Initialize weights
    E1.apply(weights_init_normal)
    E2.apply(weights_init_normal)
    G1.apply(weights_init_normal)
    G2.apply(weights_init_normal)
    D1.apply(weights_init_normal)
    D2.apply(weights_init_normal)

    # Loss function
    adversarial_loss = torch.nn.MSELoss()
    pixel_loss = torch.nn.L1Loss()

    if cuda:
        E1 = E1.cuda()
        E2 = E2.cuda()
        G1 = G1.cuda()
        G2 = G2.cuda()
        D1 = D1.cuda()
        D2 = D2.cuda()
        adversarial_loss = adversarial_loss.cuda()
        pixel_loss = pixel_loss.cuda()

    # Optimizers
    optimizer_G = torch.optim.Adam(itertools.chain(E1.parameters(),
                                                   E2.parameters(),
                                                   G1.parameters(),
                                                   G2.parameters()),
                                   lr=opt.lr,
                                   betas=(opt.b1, opt.b2))
    optimizer_D1 = torch.optim.Adam(D1.parameters(),
                                    lr=opt.lr,
                                    betas=(opt.b1, opt.b2))
    optimizer_D2 = torch.optim.Adam(D2.parameters(),
                                    lr=opt.lr,
                                    betas=(opt.b1, opt.b2))

    # Learning rate update schedulers
    lr_scheduler_G = torch.optim.lr_scheduler.LambdaLR(
        optimizer_G, lr_lambda=LambdaLR(opt.epochs, 0, opt.decay_epoch).step)
    lr_scheduler_D1 = torch.optim.lr_scheduler.LambdaLR(
        optimizer_D1, lr_lambda=LambdaLR(opt.epochs, 0, opt.decay_epoch).step)
    lr_scheduler_D2 = torch.optim.lr_scheduler.LambdaLR(
        optimizer_D2, lr_lambda=LambdaLR(opt.epochs, 0, opt.decay_epoch).step)

    prev_time = time.time()
    for epoch in range(opt.epochs):
        for i, (img_A, img_B) in enumerate(train_loader):

            # Model inputs
            X1 = Variable(img_A.type(FloatTensor))
            X2 = Variable(img_B.type(FloatTensor))

            # Adversarial ground truths
            valid = Variable(FloatTensor(img_A.shape[0],
                                         *D1.output_shape).fill_(1.0),
                             requires_grad=False)
            fake = Variable(FloatTensor(img_A.shape[0],
                                        *D1.output_shape).fill_(0.0),
                            requires_grad=False)

            # -----------------------------
            # Train Encoders and Generators
            # -----------------------------

            # Get shared latent representation
            mu1, Z1 = E1(X1)
            mu2, Z2 = E2(X2)

            # Reconstruct images
            recon_X1 = G1(Z1)
            recon_X2 = G2(Z2)

            # Translate images
            fake_X1 = G1(Z2)
            fake_X2 = G2(Z1)

            # Cycle translation
            mu1_, Z1_ = E1(fake_X1)
            mu2_, Z2_ = E2(fake_X2)
            cycle_X1 = G1(Z2_)
            cycle_X2 = G2(Z1_)

            # Losses for encoder and generator
            id_loss_1 = opt.lambda_id * pixel_loss(recon_X1, X1)
            id_loss_2 = opt.lambda_id * pixel_loss(recon_X2, X2)

            adv_loss_1 = opt.lambda_adv * adversarial_loss(D1(fake_X1), valid)
            adv_loss_2 = opt.lambda_adv * adversarial_loss(D2(fake_X2), valid)

            cyc_loss_1 = opt.lambda_cyc * pixel_loss(cycle_X1, X1)
            cyc_loss_2 = opt.lambda_cyc * pixel_loss(cycle_X2, X2)

            KL_loss_1 = opt.lambda_KL1 * compute_KL(mu1)
            KL_loss_2 = opt.lambda_KL1 * compute_KL(mu2)
            KL_loss_1_ = opt.lambda_KL2 * compute_KL(mu1_)
            KL_loss_2_ = opt.lambda_KL2 * compute_KL(mu2_)

            # total loss for encoder and generator
            G_loss = id_loss_1 + id_loss_2 \
                     + adv_loss_1 + adv_loss_2 \
                     + cyc_loss_1 + cyc_loss_2 + \
                     KL_loss_1 + KL_loss_2 + KL_loss_1_ + KL_loss_2_

            G_loss.backward()
            optimizer_G.step()

            # ----------------------
            # Train Discriminator 1
            # ----------------------

            optimizer_D1.zero_grad()

            D1_loss = adversarial_loss(D1(X1), valid) + adversarial_loss(
                D1(fake_X1.detach()), fake)
            D1_loss.backward()

            optimizer_D1.step()

            # ----------------------
            # Train Discriminator 2
            # ----------------------

            optimizer_D2.zero_grad()

            D2_loss = adversarial_loss(D2(X2), valid) + adversarial_loss(
                D2(fake_X2.detach()), fake)
            D2_loss.backward()

            optimizer_D2.step()

            # ------------------
            # Log Information
            # ------------------

            batches_done = epoch * len(train_loader) + i
            batches_left = opt.epochs * len(train_loader) - batches_done
            time_left = datetime.timedelta(seconds=batches_left *
                                           (time.time() - prev_time))
            prev_time = time.time()

            print(
                "[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f] ETA: %s"
                % (epoch, opt.epochs, i, len(train_loader),
                   (D1_loss + D2_loss).item(), G_loss.item(), time_left))

            if batches_done % opt.sample_interval == 0:
                save_sample(opt.dataset, test_loader, batches_done, E1, E2, G1,
                            G2, FloatTensor)

            if batches_done % opt.checkpoint_interval == 0:
                torch.save(E1.state_dict(),
                           "checkpoints/%s/E1_%d.pth" % (opt.dataset, epoch))
                torch.save(E2.state_dict(),
                           "checkpoints/%s/E2_%d.pth" % (opt.dataset, epoch))
                torch.save(G1.state_dict(),
                           "checkpoints/%s/G1_%d.pth" % (opt.dataset, epoch))
                torch.save(G2.state_dict(),
                           "checkpoints/%s/G2_%d.pth" % (opt.dataset, epoch))

        # Update learning rates
        lr_scheduler_G.step()
        lr_scheduler_D1.step()
        lr_scheduler_D2.step()

    torch.save(shared_E.state_dict(),
               "checkpoints/%s/shared_E_done.pth" % opt.dataset)
    torch.save(shared_G.state_dict(),
               "checkpoints/%s/shared_G_done.pth" % opt.dataset)
    torch.save(E1.state_dict(), "checkpoints/%s/E1_done.pth" % opt.dataset)
    torch.save(E2.state_dict(), "checkpoints/%s/E2_done.pth" % opt.dataset)
    torch.save(G1.state_dict(), "checkpoints/%s/G1_done.pth" % opt.dataset)
    torch.save(G2.state_dict(), "checkpoints/%s/G2_done.pth" % opt.dataset)
    print("Training Process has been Done!")
Exemple #21
0
def main():
    epoch = 1000
    batch_size = 64
    hidden_dim = 300
    use_cuda = True

    encoder = Encoder(num_words, hidden_dim)
    if args.attn:
        attn_model = 'dot'
        decoder = LuongAttnDecoderRNN(attn_model, hidden_dim, num_words)
    else:
        decoder = DecoderRhyme(hidden_dim, num_words, num_target_lengths,
                               num_rhymes)

    if args.train:
        weight = torch.ones(num_words)
        weight[word2idx_mapping[PAD_TOKEN]] = 0
        if use_cuda:
            encoder = encoder.cuda()
            decoder = decoder.cuda()
            weight = weight.cuda()
        encoder_optimizer = Adam(encoder.parameters(), lr=0.001)
        decoder_optimizer = Adam(decoder.parameters(), lr=0.001)
        criterion = nn.CrossEntropyLoss(weight=weight)

        np.random.seed(1124)
        order = np.arange(len(train_data))

        best_loss = 1e10
        best_epoch = 0

        for e in range(epoch):
            #if e - best_epoch > 20: break

            np.random.shuffle(order)
            shuffled_train_data = train_data[order]
            shuffled_x_lengths = input_lengths[order]
            shuffled_y_lengths = target_lengths[order]
            shuffled_y_rhyme = target_rhymes[order]
            train_loss = 0
            valid_loss = 0
            for b in tqdm(range(int(len(order) // batch_size))):
                #print(b, '\r', end='')
                batch_x = torch.LongTensor(
                    shuffled_train_data[b * batch_size:(b + 1) *
                                        batch_size][:, 0].tolist()).t()
                batch_y = torch.LongTensor(
                    shuffled_train_data[b * batch_size:(b + 1) *
                                        batch_size][:, 1].tolist()).t()
                batch_x_lengths = shuffled_x_lengths[b * batch_size:(b + 1) *
                                                     batch_size]
                batch_y_lengths = shuffled_y_lengths[b * batch_size:(b + 1) *
                                                     batch_size]
                batch_y_rhyme = shuffled_y_rhyme[b * batch_size:(b + 1) *
                                                 batch_size]

                if use_cuda:
                    batch_x, batch_y = batch_x.cuda(), batch_y.cuda()

                train_loss += train(batch_x, batch_y, batch_y_lengths,
                                    max(batch_y_lengths), batch_y_rhyme,
                                    encoder, decoder, encoder_optimizer,
                                    decoder_optimizer, criterion, use_cuda,
                                    False)

            train_loss /= b
            '''
            for b in range(len(valid_data) // batch_size):
                batch_x = torch.LongTensor(valid_data[b*batch_size: (b+1)*batch_size][:, 0].tolist()).t()
                batch_y = torch.LongTensor(valid_data[b*batch_size: (b+1)*batch_size][:, 1].tolist()).t()
                if use_cuda:
                    batch_x, batch_y = batch_x.cuda(), batch_y.cuda()

                valid_loss += train(batch_x, batch_y, max_seqlen, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, use_cuda, True)
            valid_loss /= b
            '''
            print(
                "epoch {}, train_loss {:.4f}, valid_loss {:.4f}, best_epoch {}, best_loss {:.4f}"
                .format(e, train_loss, valid_loss, best_epoch, best_loss))
            '''
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_epoch = e
                torch.save(encoder.state_dict(), args.encoder_path + '.best')
                torch.save(decoder.state_dict(), args.decoder_path + '.best')
            '''
            torch.save(encoder.state_dict(), args.encoder_path)
            torch.save(decoder.state_dict(), args.decoder_path)
        print(encoder)
        print(decoder)
        print("==============")

    else:
        encoder.load_state_dict(torch.load(
            args.encoder_path))  #, map_location=torch.device('cpu')))
        decoder.load_state_dict(torch.load(
            args.decoder_path))  #, map_location=torch.device('cpu')))
        print(encoder)
        print(decoder)

    predict(encoder, decoder)
Exemple #22
0
def main():

    checkpoint = torch.load(args.model_path)
    encoder = Encoder()
    generator = G()
    encoder.load_state_dict(checkpoint['encoder_state_dict'])
    generator.load_state_dict(checkpoint['generator_state_dict'])
    encoder.cuda()
    generator.cuda()

    FS = 16000
    SPEAKERS = list()
    with open(args.speaker_list) as fp:
        SPEAKERS = [l.strip() for l in fp.readlines()]

    normalizer = Tanhize(
        xmax=np.fromfile('./etc/{}_xmax.npf'.format(args.corpus_name)),
        xmin=np.fromfile('./etc/{}_xmin.npf'.format(args.corpus_name)),
    )

    total_sp_speaker = []
    total_speaker = []

    total_features = read_whole_features(args.file_pattern.format(args.src))
    for features in total_features:

        x = normalizer.forward_process(features['sp'])
        x = nh_to_nchw(x)
        y_s = features['speaker']
        #print('????',SPEAKERS.index(args.trg))

        #y_t_id = tf.placeholder(dtype=tf.int64, shape=[1,])
        #y_t = y_t_id * torch.ones(shape=[x.shape[0],], dtype=torch.int64)
        #print(y_t)
        x = Variable(torch.FloatTensor(x).cuda(), requires_grad=False)

        y_t = torch.ones((x.shape[0])).view(-1, 1) * (SPEAKERS.index(args.trg))

        z, _ = encoder(x)
        x_t, _ = generator(z, y_t)  # NOTE: the API yields NHWC format
        x_t = torch.squeeze(x_t)
        #print('x_t.shape',x_t.shape)
        x_t = normalizer.backward_process(x_t)
        #print('backward_process.finish')

        x_s, _ = generator(z, y_s)
        x_s = torch.squeeze(x_s)
        x_s = normalizer.backward_process(x_s)

        f0_s = features['f0']
        #print(f0_s.shape)
        f0_t = convert_f0(f0_s, args.src, args.trg)

        #output_dir = get_default_output(args.output_dir)
        output_dir = args.output_dir
        features['sp'] = x_t.cpu().data.numpy()
        features['f0'] = f0_t
        #print('=-=-=-=-=-=')
        y = pw2wav(features)

        oFilename = make_output_wav_name(output_dir, features['filename'])
        print(f'\r Processing {oFilename}', end=' ')

        if not os.path.exists(os.path.dirname(oFilename)):
            try:
                os.makedirs(os.path.dirname(oFilename))
            except OSError as exc:  # Guard against race condition
                print('error')
                pass

        sf.write(oFilename, y, FS)
        #print('2: ',features['sp'].shape)
        #print('3: ',features['f0'].shape)

    print('\n==finish==')
Exemple #23
0
class Trainer:
    def __init__(self, driving, target, time_step, split, lr):
        self.dataset = DataSet(driving, target, time_step, split)

        f = open('dataset_obj.txt', 'wb')
        pickle.dump(self.dataset, f)
        f.close()

        print('save model finish!!!!!!!!!!!!!!!!!!')

        # f = open('dataset_obj.txt','rb')
        # self.dataset = pickle.load(f)
        # f.close()

        self.encoder = Encoder(input_size=self.dataset.get_num_features(),
                               hidden_size=ENCODER_HIDDEN_SIZE,
                               T=time_step)
        self.decoder = Decoder(encoder_hidden_size=ENCODER_HIDDEN_SIZE,
                               decoder_hidden_size=DECODER_HIDDEN_SIZE,
                               T=time_step)
        if torch.cuda.is_available():
            # print('tocuda')
            self.encoder = self.encoder.cuda()
            self.decoder = self.decoder.cuda()
        self.encoder_optim = optim.Adam(self.encoder.parameters(), lr)
        self.decoder_optim = optim.Adam(self.decoder.parameters(), lr)
        self.loss_func = nn.CrossEntropyLoss()
        self.train_size, self.validation_size, self.test_size = self.dataset.get_size(
        )
        self.best_dev_acc = 0.0

    def get_accuracy(self, truth, pred):
        assert len(truth) == len(pred)
        right = (truth == pred).sum()
        return right / len(truth)

    def train_minibatch(self, num_epochs, batch_size, interval):
        train_acc_list = []
        dev_acc_list = []
        train_loss_list = []
        dev_loss_list = []
        x_train, y_train, y_seq_train = self.dataset.get_train_set()
        # print(x_train.shape)
        for epoch in range(num_epochs):
            print('Start epoch {}'.format(epoch))
            i = 0
            loss_sum = 0
            pred_res_total = []
            while i < self.train_size:
                self.encoder_optim.zero_grad()
                self.decoder_optim.zero_grad()
                batch_end = i + batch_size
                if (batch_end >= self.train_size):
                    batch_end = self.train_size
                var_x = self.to_variable(x_train[i:batch_end])
                # var_y = self.to_variable(y_train[i: batch_end])
                var_y = Variable(
                    torch.from_numpy(y_train[i:batch_end]).long()).cuda()
                var_y_seq = self.to_variable(y_seq_train[i:batch_end])
                if var_x.dim() == 2:
                    var_x = var_x.unsqueeze(2)
                code = self.encoder(var_x)

                y_res = self.decoder(code, var_y_seq)

                loss = self.loss_func(y_res, var_y)
                if i == 0:
                    print("y_res:", y_res)
                    print("var_y:", var_y)
                loss.backward()
                self.encoder_optim.step()
                self.decoder_optim.step()
                loss_sum += loss.item()

                # update the i
                i = batch_end

                pred_y = y_res.data.cpu()

                # print('see what the pred and truth')
                # print('y_res:',y_res.shape,' : ',y_res)
                # print('var_y:',var_y.shape,' : ',var_y)

                pred_y = torch.max(F.softmax(pred_y, dim=1), 1)[1]
                #
                # print('pred_y:',pred_y)
                # print('var_y',var_y)

                pred_res_total.extend(pred_y)

                # if i%50 == 0:
                #     print('         finish {0:.2f}/100'.format(i/self.train_size))

            acc = self.get_accuracy(y_train, np.array(pred_res_total))
            print(
                'epoch [%d] finished, the average loss is %.2f, accuracy is %.1f'
                % (epoch, loss_sum, acc * 100))

            dev_acc, dev_loss = self.test(batch_size)
            print('dev_acc is %.2f' % (dev_acc * 100))

            train_acc_list.append(acc)
            dev_acc_list.append(dev_acc)
            train_loss_list.append(loss_sum)
            dev_loss_list.append(dev_loss)

            if dev_acc > self.best_dev_acc:

                torch.save(
                    self.encoder.state_dict(),
                    'D:\Projects\\stock_predict\\models\\encoder_best.model')
                torch.save(
                    self.decoder.state_dict(),
                    'D:\Projects\\stock_predict\\models\\decoder_best.model')
                self.best_dev_acc = dev_acc

                test_acc, test_loss = self.test(batch_size, True)
                print('test_accuracy: %.1f' % (test_acc * 100))

        return train_acc_list, dev_acc_list, train_loss_list, dev_loss_list

    def test(self, batch_size, is_test=False):
        if not is_test:
            x, y, y_seq = self.dataset.get_validation_set()
        else:
            x, y, y_seq = self.dataset.get_test_set()
        i = 0
        res = []
        length = len(y)
        loss_sum = 0
        while i < length:

            batch_end = i + batch_size
            if batch_end >= length:
                batch_end = length
            var_x = self.to_variable(x[i:batch_end])
            var_y = Variable(torch.from_numpy(y[i:batch_end]).long()).cuda()
            # var_y = self.to_variable(y_test[i: batch_end])
            var_y_seq = self.to_variable(y_seq[i:batch_end])
            if var_x.dim() == 2:
                var_x = var_x.unsqueeze(2)

            # to encoder get encoder output
            code = self.encoder(var_x)

            # to decoder get classification
            y_res = self.decoder(code, var_y_seq)

            loss = self.loss_func(y_res, var_y)
            loss_sum += loss.item()

            pred_y = y_res.data.cpu()
            pred_y = torch.max(pred_y, 1)[1]
            res.extend(pred_y)
            i = batch_end

        res = np.array(res)

        return self.get_accuracy(y, res), loss_sum

    def load_model(self, encoder_path, decoder_path):
        self.encoder.load_state_dict(
            torch.load(encoder_path,
                       map_location=lambda storage, loc: storage))
        self.decoder.load_state_dict(
            torch.load(decoder_path,
                       map_location=lambda storage, loc: storage))

    def to_variable(self, x):
        if torch.cuda.is_available():
            # print("var to cuda")
            return Variable(torch.from_numpy(x).float()).cuda()
        else:
            return Variable(torch.from_numpy(x).float())

    def draw_plot(self, train_list, dev_list, acc=True):
        plt.plot(np.array(train_list))
        plt.plot(np.array(dev_list))
        if acc:
            plt.title('model acc')
            plt.ylabel('accuracy')

        else:
            plt.title('model loss')
            plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['train', 'validation'], loc='upper left')
        plt.show()
Exemple #24
0
])

with open("../data/vocab.pkl", 'rb') as f:
    vocab = pickle.load(f)
dataloader = get_loader("../data/resized/",
                        "../data/annotations/captions_train2014.json",
                        vocab,
                        trans,
                        128,
                        shuffle=True)

encoder = Encoder(256)
decoder = Decoder(256, 512, len(vocab), 1)

if torch.cuda.is_available():
    encoder.cuda()
    decoder.cuda()

    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(encoder.parameters()) + list(
        encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=0.001)

    total_step = len(dataloader)
    for epoch in range(5):
        for i, (images, captions, lengths) in enumerate(dataloader):
            images = to_var(images, volatile=True)
            captions = to_var(captions)
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]
            decoder.zero_grad()
Exemple #25
0
        exit(0)
    else:
        fp_data = sys.argv[1]
        fp_ind = sys.argv[2]
        fp_ans = sys.argv[3]

fp_model_fe = 'model6.fe.pt'

state_dict = torch.load(fp_model_fe)

model_enc = Encoder()
model_enc_dict = model_enc.state_dict()
model_enc_dict.update({k: v for k, v in state_dict.items() \
                            if k in model_enc_dict})
model_enc.load_state_dict(model_enc_dict)
model_enc.cuda()

test_loader = load_data(fp_data)
features = predict(model_enc, test_loader)

ind = (pd.read_csv(fp_ind, delimiter=',').values)[:, 1:]

pred = []
for i in range(ind.shape[0]):
    if np.linalg.norm(features[ind[i][0]] - features[ind[i][1]]) > 10:
        pred.append(0)
    else:
        pred.append(1)

df_pred = pd.DataFrame()
df_pred['ID'] = np.arange(len(pred))
class sample:
    def __init__(self):
        parser = argparse.ArgumentParser(description='Image Captioning')
        parser.add_argument('--root',
                            default='../../../cocodataset/',
                            type=str)
        parser.add_argument(
            '--sample_image',
            default='../../../cocodataset/val2017/000000435205.jpg',
            type=str)
        parser.add_argument('--epochs', default=100, type=int)
        parser.add_argument('--lr', default=1e-4, type=float)
        parser.add_argument('--batch_size', default=128, help='')
        parser.add_argument('--num_workers', default=4, type=int)
        parser.add_argument('--embed_dim', default=256, type=int)
        parser.add_argument('--hidden_size', default=512, type=int)
        parser.add_argument('--num_layers', default=1, type=int)
        parser.add_argument('--encoder_path',
                            default='./model/Encoder-100.ckpt',
                            type=str)
        parser.add_argument('--decoder_path',
                            default='./model/Decoder-100.ckpt',
                            type=str)
        parser.add_argument('--vocab_path', default='./vocab/', type=str)

        self.args = parser.parse_args()

        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, ), (0.5, )),
            transforms.Resize((224, 224))
        ])

        with open(self.args.vocab_path + 'vocab.pickle', 'rb') as f:
            data = pickle.load(f)

        self.vocab = data

        self.DataLoader = get_dataloader(root=self.args.root,
                                         transform=self.transform,
                                         shuffle=True,
                                         batch_size=self.args.batch_size,
                                         num_workers=self.args.num_workers,
                                         vocab=self.vocab)

        self.Encoder = Encoder(embed_dim=self.args.embed_dim)
        self.Decoder = Decoder(embed_dim=self.args.embed_dim,
                               hidden_size=self.args.hidden_size,
                               vocab_size=len(self.vocab),
                               num_layers=self.args.num_layers)

    def load_image(self, image_path):
        image = Image.open(image_path).convert('RGB')
        image = self.transform(image).unsqueeze(0)

        return image

    def main(self):
        self.Encoder.load_state_dict(torch.load(self.args.encoder_path))
        self.Decoder.load_state_dict(torch.load(self.args.decoder_path))

        self.Encoder = self.Encoder.cuda().eval()
        self.Decoder = self.Decoder.cuda().eval()

        sample_image = self.load_image(self.args.sample_image).cuda()
        output = self.Encoder(sample_image)
        output = self.Decoder.sample(output)[0].cpu().numpy()
        sample_caption = []

        for idx in output:
            word = self.vocab.idx2word[idx]
            sample_caption.append(word)
            if word == '<end>':
                break

        sentence = ' '.join(sample_caption)
        print(sentence)
# network
ImplicitFun = ImplicitFun()  
Encoder = Encoder() 
InverseImplicitFun = InverseImplicitFun()
if cate_name == 'helicopter':
    all_model = torch.load('../models/plane.pth')
else:
    all_model = torch.load('../models/' + cate_name + '.pth')
ImplicitFun.load_state_dict(all_model['ImplicitFun_state_dict'])
Encoder.load_state_dict(all_model['Encoder_state_dict'])
InverseImplicitFun.load_state_dict(all_model['InverseImplicitFun_state_dict'])
print(InverseImplicitFun)

# gpu or cpu 
ImplicitFun = ImplicitFun.cuda()
Encoder = Encoder.cuda()
InverseImplicitFun = InverseImplicitFun.cuda()

# ----------------------------------------------------------------------------------------------------------------------------------------------- #
# ----------------------------------------------------------------------------------------------------------------------------------------------- #
if __name__ == '__main__':

    thres = np.arange(0,0.26,0.01)

    dis_list = np.array([])
    for it, data in enumerate(train_loader):

        print("Paired sample: [%d/%d]"%(it, len(train_loader.dataset)))
        shape, land_a, land_b, name_a, name_b = data
        shape = Variable(shape.squeeze(0).cuda())
def train(config):

    data_loader = DataLoader(config.file_path, config.max_length,
                             config.batch_size)
    train_data, word2index, tag2index, intent2index = data_loader.load_train()

    if train_data is None:
        print("Please check your data or its path")
        return

    encoder = Encoder(len(word2index), config.embedding_size,
                      config.hidden_size)
    decoder = Decoder(len(tag2index), len(intent2index),
                      config.hidden_size * 2)
    if USE_CUDA:
        encoder = encoder.cuda()
        decoder = decoder.cuda()

    encoder.init_weights()
    decoder.init_weights()

    loss_function_1 = nn.CrossEntropyLoss(ignore_index=0)
    loss_function_2 = nn.CrossEntropyLoss()
    enc_optim = optim.Adam(encoder.parameters(), lr=config.learning_rate)
    dec_optim = optim.Adam(decoder.parameters(), lr=config.learning_rate)

    for step in range(config.step_size):
        losses = []
        for i, batch in enumerate(data_loader.get_batch(train_data)):
            x, embedding_x, y_1, y_2 = zip(*batch)
            x = torch.cat(x)
            embedding_x = torch.cat(embedding_x)
            tag_target = torch.cat(y_1)
            intent_target = torch.cat(y_2)
            x_mask = torch.cat([
                Variable(torch.ByteTensor(tuple(map(lambda s: s == 0,
                                                    t.data)))).cuda()
                if USE_CUDA else Variable(
                    torch.ByteTensor(tuple(map(lambda s: s == 0, t.data))))
                for t in x
            ]).view(len(batch), -1)

            encoder.zero_grad()
            decoder.zero_grad()

            output, hidden_c = encoder(x, embedding_x, x_mask)
            start_decode = Variable(
                torch.LongTensor(
                    [[word2index['<SOS>']] * len(batch)])).cuda().transpose(
                        1, 0) if USE_CUDA else Variable(
                            torch.LongTensor([[word2index['<SOS>']] *
                                              len(batch)])).transpose(1, 0)

            tag_score, intent_score = decoder(start_decode, hidden_c, output,
                                              x_mask)

            loss_1 = loss_function_1(tag_score, tag_target.view(-1))
            loss_2 = loss_function_2(intent_score, intent_target)

            loss = loss_1 + loss_2
            losses.append(
                loss.data.cpu().numpy() if USE_CUDA else loss.data.numpy())
            loss.backward()

            torch.nn.utils.clip_grad_norm(encoder.parameters(), 5.0)
            torch.nn.utils.clip_grad_norm(decoder.parameters(), 5.0)

            enc_optim.step()
            dec_optim.step()

            if i % 100 == 0:
                print("Step", step, " : ", np.mean(losses))
                losses = []

    if not os.path.exists(config.model_dir):
        os.makedirs(config.model_dir)

    torch.save(encoder, os.path.join(config.model_dir, 'jointnlu-encoder.pt'))
    torch.save(decoder, os.path.join(config.model_dir, 'jointnlu-decoder.pt'))
    print("Training Complete!")
Exemple #29
0
def main():

    parser = argparse.ArgumentParser(
        description='Estimate average error and std for each MNIST dataset')
    parser.add_argument('--model-name',
                        type=str,
                        required=True,
                        help='filepath of model to use')
    parser.add_argument('--output-name',
                        type=str,
                        required=True,
                        help='name of output files')
    parser.add_argument('--batch-size',
                        type=int,
                        default=200,
                        metavar='N',
                        help='batch-size for evaluation')

    args = parser.parse_args()

    #Load model
    path = '/home/ubuntu/Saved_Models/'
    filename = os.path.join(path, args.model_name, 'checkpoint.pt')

    use_cuda = torch.cuda.is_available()
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    device = torch.device("cuda" if use_cuda else "cpu")

    model = Encoder(device)
    model.load_state_dict(torch.load(filename))
    model = model.cuda()

    data_root_file = '/home/ubuntu/mnist-interpretable-tranformations/data'
    data_loaders = {
        digit: DataLoader(MNISTDadataset(data_root_file, digit),
                          batch_size=args.batch_size,
                          shuffle=False,
                          **kwargs)
        for digit in range(0, 10)
    }

    step = 5  #degrees step
    mean_error = pd.DataFrame()
    mean_abs_error = pd.DataFrame()
    error_std = pd.DataFrame()

    for digit, data_loader in data_loaders.items():
        sys.stdout.write('Processing digit {} \n'.format(digit))
        sys.stdout.flush()
        results = get_metrics(model, data_loader, device, step)
        mean_error[digit] = pd.Series(results[0])
        mean_abs_error[digit] = pd.Series(results[1])
        error_std[digit] = pd.Series(results[2])

    mean_error.index = mean_error.index * step
    mean_abs_error.index = mean_abs_error.index * step
    error_std.index = error_std.index * step

    mean_error.to_csv(args.output_name + '_mean_error.csv')
    mean_abs_error.to_csv(args.output_name + '_mean_abs_error.csv')
    error_std.to_csv(args.output_name + '_error_std.csv')

    ##Plottin just absolute error
    with plt.style.context('ggplot'):
        mean_abs_error.plot(figsize=(9, 8))
        plt.xlabel('Degrees')
        plt.ylabel('Average error in degrees')
        plt.legend(loc="upper left",
                   bbox_to_anchor=[0, 1],
                   ncol=2,
                   shadow=True,
                   title="Digits",
                   fancybox=True)

        plt.tick_params(colors='gray', direction='out')
        plt.savefig(args.output_name + '_abs_mean_curves.png')
        plt.close()

    ##Plotting absoltue error and std
    with plt.style.context('ggplot'):
        fig = plt.figure(figsize=(9, 8))
        ax = fig.add_subplot(111)
        x = mean_abs_error.index
        for digit in mean_abs_error.columns:
            mean = mean_abs_error[digit]
            std = error_std[digit]
            line, = ax.plot(x, mean)
            ax.fill_between(x,
                            mean - std,
                            mean + std,
                            alpha=0.2,
                            facecolor=line.get_color(),
                            edgecolor=line.get_color())

        ax.set_xlabel('Degrees')
        ax.set_ylabel('Average error in degrees')
        ax.legend(loc="upper left",
                  bbox_to_anchor=[0, 1],
                  ncol=2,
                  shadow=True,
                  title="Digits",
                  fancybox=True)
        ax.tick_params(colors='gray', direction='out')
        fig.savefig(args.output_name + '_mean_&_std_curves.png')
        fig.clf()
Exemple #30
0
def main(_):
    # Load the configuration file.
    with open(FLAGS.config, 'r') as f:
        config = yaml.load(f)

    # Load the vocabularies.
    src_vocab = Vocab.load(config['data']['src']['vocab'])
    tgt_vocab = Vocab.load(config['data']['tgt']['vocab'])

    # Load the training and dev datasets.
    test_data = ShakespeareDataset('test', config, src_vocab, tgt_vocab)

    # Restore the model.
    src_vocab_size = len(src_vocab)
    tgt_vocab_size = len(tgt_vocab)

    encoder = Encoder(src_vocab_size, config['model']['embedding_dim'],
                      config['model']['bidirection'],
                      config['model']['dropout'], config['model']['layer'],
                      config['model']['mode'])
    decoder = Decoder(tgt_vocab_size, config['model']['embedding_dim'],
                      config['model']['bidirection'],
                      config['model']['dropout'], config['model']['layer'],
                      config['model']['mode'])

    if torch.cuda.is_available():
        encoder = encoder.cuda()
        decoder = decoder.cuda()
    ckpt_path = os.path.join(config['data']['ckpt'], config['experiment_name'],
                             'model.pt')
    if os.path.exists(ckpt_path):
        print('Loading checkpoint: %s' % ckpt_path)
        ckpt = torch.load(ckpt_path)
        encoder.load_state_dict(ckpt['encoder'])
        decoder.load_state_dict(ckpt['decoder'])
    else:
        print('Unable to find checkpoint. Terminating.')
        sys.exit(1)
    encoder.eval()
    decoder.eval()

    # Initialize translator.
    greedy_translator = GreedyTranslator(encoder, decoder, tgt_vocab)

    # Qualitative evaluation - print translations for first couple sentences in
    # test corpus.
    for i in range(10):
        src, tgt = test_data[i]
        translation = greedy_translator(src)
        src_sentence = [src_vocab.id2word(id) for id in src.data.cpu().numpy()]
        tgt_sentence = [tgt_vocab.id2word(id) for id in tgt.data.cpu().numpy()]
        translated_sentence = [tgt_vocab.id2word(id) for id in translation]
        print('---')
        print('Source: %s' % ' '.join(src_sentence))
        print('Ground truth: %s' % ' '.join(tgt_sentence))
        print('Model output: %s' % ' '.join(translated_sentence))
    print('---')

    # Quantitative evaluation - compute corpus level BLEU scores.
    hypotheses = []
    references = []
    for src, tgt in test_data:
        translation = greedy_translator(src)
        tgt_sentence = [tgt_vocab.id2word(id) for id in tgt.data.cpu().numpy()]
        translated_sentence = [tgt_vocab.id2word(id) for id in translation]
        # Remove start and end of sentence tokens.
        tgt_sentence = tgt_sentence[1:-1]
        translated_sentence = translated_sentence[1:-1]
        hypotheses.append(tgt_sentence)
        references.append([translated_sentence])
    print("Corpus BLEU score: %0.4f" % corpus_bleu(references, hypotheses))