コード例 #1
0
def main(args):

    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    splits = ['train', 'valid'] #+ (['test'] if args.test else [])

    datasets = OrderedDict()
    for split in splits:
        datasets[split] = PTB(
            data_dir=args.data_dir,
            split=split,
            create_data=args.create_data,
            max_sequence_length=args.max_sequence_length,
            min_occ=args.min_occ
        )

    model = SentenceVAE(
        vocab_size=datasets['train'].vocab_size,
        sos_idx=datasets['train'].sos_idx,
        eos_idx=datasets['train'].eos_idx,
        pad_idx=datasets['train'].pad_idx,
        max_sequence_length=args.max_sequence_length,
        embedding_size=args.embedding_size,
        rnn_type=args.rnn_type,
        hidden_size=args.hidden_size,
        word_dropout=args.word_dropout,
        latent_size=args.latent_size,
        num_layers=args.num_layers,
        bidirectional=args.bidirectional
        )

    if torch.cuda.is_available():
        model = model.cuda()

    if args.tensorboard_logging:
        writer = SummaryWriter(os.path.join('./',args.logdir, expierment_name(args,ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join('./',args.save_model_path,'VAE', ts)
    os.makedirs(save_model_path)

    def kl_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1/(1+np.exp(-k*(step-x0))))
        elif anneal_function == 'linear':
            return min(1, step/x0)

    NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx)
    def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0):

        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length).data[0]].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))
        
        # Negative Log Likelihood
        NLL_loss = NLL(logp, target)
        NLL_w_avg = NLL_loss/torch.sum(length).float()

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step, k, x0)

        return NLL_loss, KL_loss, KL_weight,NLL_w_avg
    print(model)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor
    step = 0
    for epoch in range(args.epochs):

        for split in splits:

            data_loader = DataLoader(
                dataset=datasets[split],
                batch_size=args.batch_size,
                shuffle=split=='train',
                num_workers=cpu_count(),
                pin_memory=torch.cuda.is_available()
            )

            tracker = defaultdict(tensor)
 
            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            for iteration, batch in enumerate(data_loader):

                batch_size = batch['input'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                logp, mean, logv, z = model(batch['input'], batch['length'])

                # loss calculation
                NLL_loss, KL_loss, KL_weight,NLL_w_avg = loss_fn(logp, batch['target'],
                    batch['length'], mean, logv, args.anneal_function, step, args.k, args.x0)

                loss = (NLL_loss + KL_weight * KL_loss)/batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1


                # bookkeepeing
		# Avoid the .cat error !!!
                #print(loss.data)
                #print(tracker['ELBO'])
                loss_data = torch.tensor([loss.data.item()])
                tracker['ELBO'] = torch.cat((tracker['ELBO'], loss_data)) #Orig: tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data),1)

                if args.tensorboard_logging:
                    writer.add_scalar("%s/ELBO"%split.upper(), loss.data[0], epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/NLL Loss"%split.upper(), NLL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss"%split.upper(), KL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Weight"%split.upper(), KL_weight, epoch*len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration+1 == len(data_loader):
                    print("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f, NLL-word-Loss %9.4f"
                        %(split.upper(), iteration, len(data_loader)-1, loss.data[0], NLL_loss.data[0]/batch_size, KL_loss.data[0]/batch_size, KL_weight,NLL_w_avg))
                
                #split = 'invalid' #JUST TO DEBUG!!!
                
                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) #ERROR HERE!!!
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

            print("%s Epoch %02d/%i, Mean ELBO %9.4f"%(split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO'])))

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO"%split.upper(), torch.mean(tracker['ELBO']), epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {'target_sents':tracker['target_sents'], 'z':tracker['z'].tolist()}
                if not os.path.exists(os.path.join('./dumps', ts)):
                    os.makedirs('dumps/'+ts)
                with open(os.path.join('./dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file:
                    json.dump(dump,dump_file)

            # save checkpoint
            if split == 'train' and epoch %10 ==0 :
                checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch))
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s"%checkpoint_path)
コード例 #2
0
ファイル: train.py プロジェクト: kaletap/Bert-VAE
def main(args):
    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    splits = ['train', 'valid'] + (['test'] if args.test else [])

    RANDOM_SEED = 42

    dataset = load_dataset("yelp_polarity", split="train")
    TRAIN_SIZE = len(dataset) - 2_000
    VALID_SIZE = 1_000
    TEST_SIZE = 1_000

    train_test_split = dataset.train_test_split(train_size=TRAIN_SIZE,
                                                seed=RANDOM_SEED)
    train_dataset = train_test_split["train"]
    test_val_dataset = train_test_split["test"].train_test_split(
        train_size=VALID_SIZE, test_size=TEST_SIZE, seed=RANDOM_SEED)
    val_dataset, test_dataset = test_val_dataset["train"], test_val_dataset[
        "test"]

    tokenizer = AutoTokenizer.from_pretrained(args.model_name, use_fast=True)
    datasets = OrderedDict()
    datasets['train'] = TextDataset(train_dataset, tokenizer,
                                    args.max_sequence_length,
                                    not args.disable_sent_tokenize)
    datasets['valid'] = TextDataset(val_dataset, tokenizer,
                                    args.max_sequence_length,
                                    not args.disable_sent_tokenize)
    if args.test:
        datasets['text'] = TextDataset(test_dataset, tokenizer,
                                       args.max_sequence_length,
                                       not args.disable_sent_tokenize)

    print(
        f"Loading {args.model_name} model. Setting {args.trainable_layers} trainable layers."
    )
    encoder = AutoModel.from_pretrained(args.model_name, return_dict=True)
    if not args.train_embeddings:
        for p in encoder.embeddings.parameters():
            p.requires_grad = False
    encoder_layers = encoder.encoder.layer
    if args.trainable_layers > len(encoder_layers):
        warnings.warn(
            f"You are asking to train {args.trainable_layers} layers, but this model has only {len(encoder_layers)}"
        )
    for layer in range(len(encoder_layers) - args.trainable_layers):
        for p in encoder_layers[layer].parameters():
            p.requires_grad = False
    params = dict(vocab_size=datasets['train'].vocab_size,
                  embedding_size=args.embedding_size,
                  rnn_type=args.rnn_type,
                  hidden_size=args.hidden_size,
                  word_dropout=args.word_dropout,
                  embedding_dropout=args.embedding_dropout,
                  latent_size=args.latent_size,
                  num_layers=args.num_layers,
                  bidirectional=args.bidirectional,
                  max_sequence_length=args.max_sequence_length)
    model = SentenceVAE(encoder=encoder, tokenizer=tokenizer, **params)

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, expierment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)

    with open(os.path.join(save_model_path, 'model_params.json'), 'w') as f:
        json.dump(params, f, indent=4)
    with open(os.path.join(save_model_path, 'train_args.json'), 'w') as f:
        json.dump(vars(args), f, indent=4)

    def kl_anneal_function(anneal_function, step, k, x0):
        if step <= x0:
            return args.initial_kl_weight
        if anneal_function == 'logistic':
            return float(1 / (1 + np.exp(-k * (step - x0 - 2500))))
        elif anneal_function == 'linear':
            return min(1, step / x0)

    NLL = torch.nn.NLLLoss(ignore_index=datasets['train'].pad_idx,
                           reduction='sum')

    def loss_fn(logp, target, length, mean, logv, anneal_function, step, k,
                x0):

        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length).item()].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))

        # Negative Log Likelihood
        NLL_loss = NLL(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step, k, x0)

        return NLL_loss, KL_loss, KL_weight

    params = [{
        'params': model.encoder.parameters(),
        'lr': args.encoder_learning_rate
    }, {
        'params': [
            *model.decoder_rnn.parameters(), *model.hidden2mean.parameters(),
            *model.hidden2logv.parameters(), *model.latent2hidden.parameters(),
            *model.outputs2vocab.parameters()
        ]
    }]
    optimizer = torch.optim.Adam(params,
                                 lr=args.learning_rate,
                                 weight_decay=args.weight_decay)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    step = 0
    for epoch in range(args.epochs):

        for split in splits:

            data_loader = DataLoader(dataset=datasets[split],
                                     batch_size=args.batch_size,
                                     shuffle=(split == 'train'),
                                     num_workers=cpu_count(),
                                     pin_memory=torch.cuda.is_available(),
                                     collate_fn=DataCollator(tokenizer))

            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            for iteration, batch in enumerate(data_loader):

                batch_size = batch['input'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                logp, mean, logv, z = model(batch['input'],
                                            batch['attention_mask'],
                                            batch['length'])

                # loss calculation
                NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'],
                                                       batch['length'], mean,
                                                       logv,
                                                       args.anneal_function,
                                                       step, args.k, args.x0)

                loss = (NLL_loss + KL_weight * KL_loss) / batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                # bookkeepeing
                tracker['ELBO'] = torch.cat(
                    (tracker['ELBO'], loss.data.view(1, -1)), dim=0)

                if args.tensorboard_logging:
                    writer.add_scalar("%s/ELBO" % split.upper(), loss.item(),
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/NLL Loss" % split.upper(),
                                      NLL_loss.item() / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss" % split.upper(),
                                      KL_loss.item() / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Weight" % split.upper(),
                                      KL_weight,
                                      epoch * len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration + 1 == len(
                        data_loader):
                    print(
                        "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
                        % (split.upper(), iteration, len(data_loader) - 1,
                           loss.item(), NLL_loss.item() / batch_size,
                           KL_loss.item() / batch_size, KL_weight))

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(
                        batch['target'].tolist(), tokenizer=tokenizer)
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

            print("%s Epoch %02d/%i, Mean ELBO %9.4f" %
                  (split.upper(), epoch, args.epochs, tracker['ELBO'].mean()))

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO" % split.upper(),
                                  torch.mean(tracker['ELBO']), epoch)

            # save a dump of all sentences, the encoded latent space and generated sequences
            if split == 'valid':
                samples, _ = model.inference(z=tracker['z'])
                generated_sents = idx2word(samples.tolist(), tokenizer)
                sents = [{
                    'original': target,
                    'generated': generated
                } for target, generated in zip(tracker['target_sents'],
                                               generated_sents)]
                dump = {'sentences': sents, 'z': tracker['z'].tolist()}
                if not os.path.exists(os.path.join('dumps', ts)):
                    os.makedirs('dumps/' + ts)
                with open(
                        os.path.join('dumps/' + ts +
                                     '/valid_E%i.json' % epoch),
                        'w') as dump_file:
                    json.dump(dump, dump_file, indent=3)

            # save checkpoint
            if split == 'train':
                checkpoint_path = os.path.join(save_model_path,
                                               "E%i.pytorch" % epoch)
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s" % checkpoint_path)
コード例 #3
0
ファイル: train.py プロジェクト: abcdefgs0324/Sentence-VAE
def main(args):

    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    splits = ['train', 'valid'] + (['test'] if args.test else [])

    datasets = OrderedDict()
    for split in splits:
        datasets[split] = PTB(data_dir=args.data_dir,
                              split=split,
                              create_data=args.create_data,
                              max_sequence_length=args.max_sequence_length,
                              min_occ=args.min_occ)

    log_file = open("res.txt", "a")
    log_file.write(expierment_name(args, ts))
    log_file.write("\n")
    graph_file = open("elbo-graph.txt", "a")
    graph_file.write(expierment_name(args, ts))
    graph_file.write("\n")

    model = SentenceVAE(vocab_size=datasets['train'].vocab_size,
                        sos_idx=datasets['train'].sos_idx,
                        eos_idx=datasets['train'].eos_idx,
                        pad_idx=datasets['train'].pad_idx,
                        unk_idx=datasets['train'].unk_idx,
                        max_sequence_length=args.max_sequence_length,
                        embedding_size=args.embedding_size,
                        rnn_type=args.rnn_type,
                        hidden_size=args.hidden_size,
                        word_dropout=args.word_dropout,
                        embedding_dropout=args.embedding_dropout,
                        latent_size=args.latent_size,
                        num_layers=args.num_layers,
                        bidirectional=args.bidirectional)

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, expierment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)

    def kl_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1 / (1 + np.exp(-k * (step - x0))))
        elif anneal_function == 'linear':
            return min(1, step / x0)
        elif anneal_function == "softplus":
            return min(1, np.log(1 + np.exp(k * step)))
        elif anneal_function == "no":
            return 1

    NLL = torch.nn.NLLLoss(size_average=False,
                           ignore_index=datasets['train'].pad_idx)

    def loss_fn(logp, target, length, mean, logv, anneal_function, step, k,
                x0):

        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length).data[0]].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))

        # Negative Log Likelihood
        NLL_loss = NLL(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step, k, x0)

        return NLL_loss, KL_loss, KL_weight

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    step = 0
    val_lowest_elbo = 5000
    val_accu_epoch = 0
    val_min_epoch = 0
    split_elbo = {"train": [], "valid": []}
    if args.test:
        split_elbo["test"] = []
    split_loss = {"train": [], "valid": []}
    if args.test:
        split_loss["test"] = []

    for epoch in range(args.epochs):

        for split in splits:

            data_loader = DataLoader(dataset=datasets[split],
                                     batch_size=args.batch_size,
                                     shuffle=split == 'train',
                                     num_workers=cpu_count(),
                                     pin_memory=torch.cuda.is_available())

            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            for iteration, batch in enumerate(data_loader):

                batch_size = batch['input'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                logp, mean, logv, z = model(batch['input'], batch['length'])

                # loss calculation
                NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'],
                                                       batch['length'], mean,
                                                       logv,
                                                       args.anneal_function,
                                                       step, args.k, args.x0)

                if split != 'train':
                    KL_weight = 1.0

                loss = (NLL_loss + KL_weight * KL_loss) / batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                # bookkeepeing
                tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data))

                if args.tensorboard_logging:
                    writer.add_scalar("%s/ELBO" % split.upper(), loss.data[0],
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/NLL Loss" % split.upper(),
                                      NLL_loss.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss" % split.upper(),
                                      KL_loss.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Weight" % split.upper(),
                                      KL_weight,
                                      epoch * len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration + 1 == len(
                        data_loader):
                    print(
                        "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
                        % (split.upper(), iteration, len(data_loader) - 1,
                           loss.data[0], NLL_loss.data[0] / batch_size,
                           KL_loss.data[0] / batch_size, KL_weight))
                    split_loss[split].append([
                        loss.data[0], NLL_loss.data[0] / batch_size,
                        KL_loss.data[0] / batch_size
                    ])

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(
                        batch['target'].data,
                        i2w=datasets['train'].get_i2w(),
                        pad_idx=datasets['train'].pad_idx)
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

            print("%s Epoch %02d/%i, Mean ELBO %9.4f" %
                  (split.upper(), epoch, args.epochs,
                   torch.mean(tracker['ELBO'])))
            split_elbo[split].append([torch.mean(tracker["ELBO"])])

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO" % split.upper(),
                                  torch.mean(tracker['ELBO']), epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {
                    'target_sents': tracker['target_sents'],
                    'z': tracker['z'].tolist()
                }
                if not os.path.exists(os.path.join('dumps', ts)):
                    os.makedirs('dumps/' + ts)
                with open(
                        os.path.join('dumps/' + ts +
                                     '/valid_E%i.json' % epoch),
                        'w') as dump_file:
                    json.dump(dump, dump_file)

            # save checkpoint
            if split == 'train':
                checkpoint_path = os.path.join(save_model_path,
                                               "E%i.pytorch" % (epoch))
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s" % checkpoint_path)

            if split == 'valid':
                if torch.mean(tracker['ELBO']) < val_lowest_elbo:
                    val_lowest_elbo = torch.mean(tracker['ELBO'])
                    val_accu_epoch = 0
                    val_min_epoch = epoch
                else:
                    val_accu_epoch += 1
                    if val_accu_epoch >= 3:
                        if not args.test:
                            exp_str = ""
                            exp_str += "train_ELBO={}\n".format(
                                split_elbo["train"][val_min_epoch])
                            exp_str += "valid_ELBO={}\n".format(
                                split_elbo["valid"][val_min_epoch])
                            exp_str += "==========\n"
                            log_file.write(exp_str)
                            log_file.close()
                            print(exp_str)
                            graph_file.write("ELBO\n")
                            line = ""
                            for s in splits:
                                for i in split_loss[s]:
                                    line += "{},".format(i[0])
                                line += "\n"
                            graph_file.write(line)
                            graph_file.write("NLL\n")
                            line = ""
                            for s in splits:
                                for i in split_loss[s]:
                                    line += "{},".format(i[1])
                                line += "\n"
                            graph_file.write(line)
                            graph_file.write("KL\n")
                            line = ""
                            for s in splits:
                                for i in split_loss[s]:
                                    line += "{},".format(i[2])
                                line += "\n"
                            graph_file.write(line)
                            graph_file.close()
                            exit()
            elif split == 'test' and val_accu_epoch >= 3:
                exp_str = ""
                exp_str += "train_ELBO={}\n".format(
                    split_elbo["train"][val_min_epoch])
                exp_str += "valid_ELBO={}\n".format(
                    split_elbo["valid"][val_min_epoch])
                exp_str += "test_ELBO={}\n".format(
                    split_elbo["test"][val_min_epoch])
                exp_str += "==========\n"
                log_file.write(exp_str)
                log_file.close()
                print(exp_str)
                graph_file.write("ELBO\n")
                line = ""
                for s in splits:
                    for i in split_loss[s]:
                        line += "{},".format(i[0])
                    line += "\n"
                for s in splits:
                    for i in split_elbo[s]:
                        line += "{},".format(i[0])
                    line += "\n"
                graph_file.write(line)
                graph_file.write("NLL\n")
                line = ""
                for s in splits:
                    for i in split_loss[s]:
                        line += "{},".format(i[1])
                    line += "\n"
                graph_file.write(line)
                graph_file.write("KL\n")
                line = ""
                for s in splits:
                    for i in split_loss[s]:
                        line += "{},".format(i[2])
                    line += "\n"
                graph_file.write(line)
                graph_file.close()
                exit()

        if epoch == args.epochs - 1:
            exp_str = ""
            exp_str += "train_ELBO={}\n".format(
                split_elbo["train"][val_min_epoch])
            exp_str += "valid_ELBO={}\n".format(
                split_elbo["valid"][val_min_epoch])
            if args.test:
                exp_str += "test_ELBO={}\n".format(
                    split_elbo["test"][val_min_epoch])
            exp_str += "==========\n"
            log_file.write(exp_str)
            log_file.close()
            print(exp_str)
            graph_file.write("ELBO\n")
            line = ""
            for s in splits:
                for i in split_loss[s]:
                    line += "{},".format(i[0])
                line += "\n"
            graph_file.write(line)
            graph_file.write("NLL\n")
            line = ""
            for s in splits:
                for i in split_loss[s]:
                    line += "{},".format(i[1])
                line += "\n"
            graph_file.write(line)
            graph_file.write("KL\n")
            line = ""
            for s in splits:
                for i in split_loss[s]:
                    line += "{},".format(i[2])
                line += "\n"
            graph_file.write(line)
            graph_file.close()
            exit()
コード例 #4
0
def main(args):
    #print('start')
    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    splits = ['train', 'valid']

    if args.tensorboard_logging:
        print('Tensorboard logging on')

    w_datasets, y_datasets = load_e2e(args.create_data,
                                      args.max_sequence_length, args.min_occ)
    'datsets loaded'
    print((y_datasets[splits[0]].shape[1]))
    label_sequence_len = y_datasets[splits[0]].shape[1]

    print('lsl')
    print(y_datasets['train'].shape)
    model = SentenceJMVAE(vocab_size=w_datasets['train'].vocab_size,
                          sos_idx=w_datasets['train'].sos_idx,
                          eos_idx=w_datasets['train'].eos_idx,
                          pad_idx=w_datasets['train'].pad_idx,
                          max_sequence_length=args.max_sequence_length,
                          embedding_size=args.embedding_size,
                          rnn_type=args.rnn_type,
                          hidden_size=args.hidden_size,
                          word_dropout=args.word_dropout,
                          latent_size=args.latent_size,
                          num_layers=args.num_layers,
                          label_sequence_len=label_sequence_len,
                          bidirectional=args.bidirectional)
    print('model created')
    if torch.cuda.is_available():
        model = model.cuda()

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join('./', args.logdir, 'JMVAE', expierment_name(args,
                                                                     ts)))
        writer.add_text("model_jmvae", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join('./', args.save_model_path, 'JMVAE', ts)
    os.makedirs(save_model_path)

    def kl_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1 / (1 + np.exp(-k * (step - x0))))
        elif anneal_function == 'linear':
            return min(1, step / x0)

    NLL = torch.nn.NLLLoss(size_average=False,
                           ignore_index=w_datasets['train'].pad_idx)
    BCE = torch.nn.BCELoss(size_average=False)

    def loss_fn_plus(logp, logp2, target, target2, length, mean, logv, mean_w,
                     logv_w, mean_y, logv_y, anneal_function, step, k, x0):

        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length).data[0]].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))
        # Negative Log Likelihood
        NLL_loss = NLL(logp, target)
        NLL_w_avg = NLL_loss / torch.sum(length).float()
        #Cross entropy loss
        BCE_loss = BCE(logp2, target2)
        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())

        KL_loss_w = [
            0.5 * ((sigma0.exp() / sigma1.exp()).sum() + torch.sum(
                ((mu1 - mu0)**2) * (1 / torch.exp(sigma1))) -
                   (mu0.size(0)) + sigma1.sum() - sigma0.sum())
            for mu0, sigma0, mu1, sigma1 in zip(mean, logv, mean_w, logv_w)
        ]
        KL_loss_w = sum(KL_loss_w)  #/len(KL_loss_w)

        KL_loss_y = [
            0.5 * ((sigma0.exp() / sigma1.exp()).sum() + torch.sum(
                ((mu1 - mu0)**2) * (1 / torch.exp(sigma1))) -
                   (mu0.size(0)) + sigma1.sum() - sigma0.sum())
            for mu0, sigma0, mu1, sigma1 in zip(mean, logv, mean_y, logv_y)
        ]
        KL_loss_y = sum(KL_loss_y)  #/len(KL_loss_y)

        KL_weight = kl_anneal_function(anneal_function, step, k, x0)

        return NLL_loss, BCE_loss, KL_loss, KL_loss_w, KL_loss_y, KL_weight, NLL_w_avg

    print(model)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    step = 0
    print('starting training')
    for epoch in range(args.epochs):
        for split in splits:
            print('split: ', split, '\tepoch: ', epoch)
            #print(split)
            #print((w_datasets[split][0]))
            #print(w_datasets['train'])

            data_loader = DataLoader(
                dataset=w_datasets[split],  #y_datasets[split],
                batch_size=args.batch_size,
                shuffle=split == 'train',
                num_workers=cpu_count(),
                pin_memory=torch.cuda.is_available())
            #print('Out dataloader received')
            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            for iteration, batch in enumerate(data_loader):
                #print('new batch')
                #print('batch')
                batch_size = batch['input'].size(0)
                #print(iteration,batch['labels'])
                batch['labels'] = batch['labels'].float()
                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)
                #print('labels preprocessed')
                # Forward pass
                logp, logp2, mean, logv, z, mean_w, logv_w, mean_y, logv_y = model(
                    batch['input'], batch['labels'], batch['length'])
                #print('forward pass done')
                # loss calculation
                NLL_loss, BCE_loss, KL_loss, KL_loss_w, KL_loss_y, KL_weight, NLL_w_avg = loss_fn_plus(
                    logp, logp2, batch['target'], batch['labels'],
                    batch['length'], mean, logv, mean_w, logv_w, mean_y,
                    logv_y, args.anneal_function, step, args.k, args.x0)
                #!!!!
                # MAYBE ADD WEIGHTS TO KL_W AND KL_Y BASED ON THEIR DIMENSIONALITY
                #!!!
                loss = (NLL_loss + args.bce_weight * BCE_loss + KL_weight *
                        (KL_loss + args.alpha *
                         (KL_loss_w + KL_loss_y))) / batch_size
                #print('loss calculated')

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                #print('backprop done')
                # bookkeepeing

# Avoid the .cat error !!!
#print(loss.data)
#print(tracker['ELBO'])

                loss_data = torch.cuda.FloatTensor([
                    loss.data.item()
                ]) if torch.cuda.is_available() else torch.tensor(
                    [loss.data.item()])
                tracker['ELBO'] = torch.cat(
                    (tracker['ELBO'], loss_data)
                )  #Orig: tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data),1)

                if args.tensorboard_logging:
                    writer.add_scalar("%s/ELBO" % split.upper(), loss.data[0],
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/NLL Loss" % split.upper(),
                                      NLL_loss.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/BCE Loss" % split.upper(),
                                      BCE_loss.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss" % split.upper(),
                                      KL_loss.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss-w" % split.upper(),
                                      KL_loss_w.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss-y" % split.upper(),
                                      KL_loss_y.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Weight" % split.upper(),
                                      KL_weight,
                                      epoch * len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration + 1 == len(
                        data_loader):
                    print(
                        "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, BCE-Loss %9.4f, KL-Loss-joint %9.4f, KL-Loss-w %9.4f, KL-Loss-y %9.4f, KL-Weight %6.3f, NLL-word-Loss %9.4f"
                        % (split.upper(), iteration, len(data_loader) - 1,
                           loss.data[0], NLL_loss.data[0] / batch_size,
                           BCE_loss.data[0] / batch_size, KL_loss.data[0] /
                           batch_size, KL_loss_w.data[0] / batch_size,
                           KL_loss_y.data[0] / batch_size, KL_weight,
                           NLL_w_avg.data[0]))

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(
                        batch['target'].data,
                        i2w=w_datasets['train'].get_i2w(),
                        pad_idx=w_datasets['train'].pad_idx)
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

            print("%s Epoch %02d/%i, Mean ELBO %9.4f" %
                  (split.upper(), epoch, args.epochs,
                   torch.mean(tracker['ELBO'])))

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO" % split.upper(),
                                  torch.mean(tracker['ELBO']), epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {
                    'target_sents': tracker['target_sents'],
                    'z': tracker['z'].tolist()
                }
                if not os.path.exists(os.path.join('./dumps', ts)):
                    os.makedirs('./dumps/' + ts)
                with open(
                        os.path.join('./dumps/' + ts +
                                     '/valid_E%i.json' % epoch),
                        'w+') as dump_file:
                    json.dump(dump, dump_file)

            # save checkpoint
            if split == 'train' and epoch % 10 == 0:
                checkpoint_path = os.path.join(save_model_path,
                                               "E%i.pytorch" % (epoch))
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s" % checkpoint_path)
コード例 #5
0
def main(args):

    #create dir name
    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())
    ts = ts.replace(':', '-')

    #prepare dataset
    splits = ['train', 'valid'] + (['test'] if args.test else [])

    #create dataset object
    datasets = OrderedDict()

    # create test and train split in data, also preprocess
    for split in splits:
        datasets[split] = PTB(data_dir=args.data_dir,
                              split=split,
                              create_data=args.create_data,
                              max_sequence_length=args.max_sequence_length,
                              min_occ=args.min_occ)

    #get training params
    params = dict(vocab_size=datasets['train'].vocab_size,
                  sos_idx=datasets['train'].sos_idx,
                  eos_idx=datasets['train'].eos_idx,
                  pad_idx=datasets['train'].pad_idx,
                  unk_idx=datasets['train'].unk_idx,
                  max_sequence_length=args.max_sequence_length,
                  embedding_size=args.embedding_size,
                  rnn_type=args.rnn_type,
                  hidden_size=args.hidden_size,
                  word_dropout=args.word_dropout,
                  embedding_dropout=args.embedding_dropout,
                  latent_size=args.latent_size,
                  num_layers=args.num_layers,
                  bidirectional=args.bidirectional)

    #init model object
    model = SentenceVAE(**params)

    if torch.cuda.is_available():
        model = model.cuda()

    #logging
    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, expierment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    # make dir
    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)

    #write params to json and save
    with open(os.path.join(save_model_path, 'model_params.json'), 'w') as f:
        json.dump(params, f, indent=4)

    #defining function that returns disentangling weight used for KL loss at each input step
    def kl_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1 / (1 + np.exp(-k * (step - x0))))
        elif anneal_function == 'linear':
            return min(1, step / x0)

    #defining NLL loss to measure accuracy of the decoding
    NLL = torch.nn.NLLLoss(ignore_index=datasets['train'].pad_idx,
                           reduction='sum')

    #this functiom is used to compute the 2 loss terms and KL loss weight
    def loss_fn(logp, target, length, mean, logv, anneal_function, step, k,
                x0):

        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length).item()].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))

        # Negative Log Likelihood
        NLL_loss = NLL(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())

        KL_weight = kl_anneal_function(anneal_function, step, k, x0)

        return NLL_loss, KL_loss, KL_weight

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor

    step = 0

    for epoch in range(args.epochs):

        #do train and then test
        for split in splits:

            #create dataloader
            data_loader = DataLoader(dataset=datasets[split],
                                     batch_size=args.batch_size,
                                     shuffle=split == 'train',
                                     num_workers=cpu_count(),
                                     pin_memory=torch.cuda.is_available())

            #tracker used to track the loss
            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            #start batch wise training/testing
            for iteration, batch in enumerate(data_loader):

                #get batch size
                batch_size = batch['input'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                logp, mean, logv, z = model(batch['input'], batch['length'])

                # loss calculation
                NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'],
                                                       batch['length'], mean,
                                                       logv,
                                                       args.anneal_function,
                                                       step, args.k, args.x0)

                # final loss calculation
                loss = (NLL_loss + KL_weight * KL_loss) / batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()  #flush grads
                    loss.backward()  #run bp
                    optimizer.step()  #run gd
                    step += 1

                # bookkeepeing
                tracker['ELBO'] = torch.cat(
                    (tracker['ELBO'], loss.data.view(1, -1)), dim=0)

                #logging of losses
                if args.tensorboard_logging:
                    writer.add_scalar("%s/ELBO" % split.upper(), loss.item(),
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/NLL Loss" % split.upper(),
                                      NLL_loss.item() / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss" % split.upper(),
                                      KL_loss.item() / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Weight" % split.upper(),
                                      KL_weight,
                                      epoch * len(data_loader) + iteration)

                #
                if iteration % args.print_every == 0 or iteration + 1 == len(
                        data_loader):
                    print(
                        "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
                        % (split.upper(), iteration, len(data_loader) - 1,
                           loss.item(), NLL_loss.item() / batch_size,
                           KL_loss.item() / batch_size, KL_weight))

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(
                        batch['target'].data,
                        i2w=datasets['train'].get_i2w(),
                        pad_idx=datasets['train'].pad_idx)
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

            print("%s Epoch %02d/%i, Mean ELBO %9.4f" %
                  (split.upper(), epoch, args.epochs, tracker['ELBO'].mean()))

            #more logging
            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO" % split.upper(),
                                  torch.mean(tracker['ELBO']), epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {
                    'target_sents': tracker['target_sents'],
                    'z': tracker['z'].tolist()
                }
                if not os.path.exists(os.path.join('dumps', ts)):
                    os.makedirs('dumps/' + ts)
                with open(
                        os.path.join('dumps/' + ts +
                                     '/valid_E%i.json' % epoch),
                        'w') as dump_file:
                    json.dump(dump, dump_file)

            # save checkpoint
            if split == 'train':
                checkpoint_path = os.path.join(save_model_path,
                                               "E%i.pytorch" % epoch)
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s" % checkpoint_path)
コード例 #6
0
def main(args):

    ################ config your params here ########################
    # ortho = False
    # attention = False
    # hspace_classifier = False
    # diversity = False # do not try this yet, need to fix bugs
    
    # create dir name
    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())
    ts = ts.replace(':', '-')
    ts = ts+'-'+args.dataset

    
    if(args.ortho):
        ts = ts+'-ortho'
    if(args.hspace):
        ts = ts+'-hspace'
    if(args.attention):
        ts = ts+'-self-attn'

   

    if(args.dataset == "multitask"):
        print("Running multitask dataset!")
        vae_model = SentenceVaeMultiTask
        dataset = SnliYelp
    if(args.dataset == "snli"):
        print("Running SNLI!")
        vae_model = SentenceVaeSnli
        dataset = SNLI
    if(args.dataset == "yelp"):
        print("Running Yelp!")
        vae_model = SentenceVaeYelp
        dataset = Yelpd

     # prepare dataset
    splits = ['train', 'test']

    # create dataset object
    datasets = OrderedDict()
    

    # create test and train split in data, also preprocess
    for split in splits:
        print("creating dataset for: {}".format(split))
        datasets[split] = dataset(
            split=split,
            create_data=args.create_data,
            min_occ=args.min_occ
        )

    i2w = datasets['train'].get_i2w()
    w2i = datasets['train'].get_w2i()

    # get training params
    params = dict(
        vocab_size=datasets['train'].vocab_size,
        sos_idx=datasets['train'].sos_idx,
        eos_idx=datasets['train'].eos_idx,
        pad_idx=datasets['train'].pad_idx,
        unk_idx=datasets['train'].unk_idx,
        max_sequence_length=datasets['train'].max_sequence_length,
        embedding_size=args.embedding_size,
        rnn_type=args.rnn_type,
        hidden_size=args.hidden_size,
        word_dropout=args.word_dropout,
        embedding_dropout=args.embedding_dropout,
        latent_size=args.latent_size,
        num_layers=args.num_layers,
        bidirectional=args.bidirectional,
        ortho=args.ortho,
        attention=args.attention,
        hspace_classifier=args.hspace,
        diversity=args.diversity
    )

    # init model object
    model = vae_model(**params)

    if torch.cuda.is_available():
        model = model.cuda()

    # logging
    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(os.path.join(args.logdir, expierment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    # make dir
    save_model_path = os.path.join(datasets["train"].save_model_path, ts)
    os.makedirs(save_model_path)

    # write params to json and save
    with open(os.path.join(save_model_path, 'model_params.json'), 'w') as f:
        json.dump(params, f, indent=4)

    # defining function that returns disentangling weight used for KL loss at each input step

    def kl_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1/(1+np.exp(-k*(step-x0))))
        elif anneal_function == 'linear':
            return min(1, step/x0)

    # defining NLL loss to measure accuracy of the decoding
    NLL = torch.nn.NLLLoss(ignore_index=datasets['train'].pad_idx, reduction='sum')

    loss_fn_2 = F.cross_entropy

    # this functiom is used to compute the 2 loss terms and KL loss weight
    def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0):

        # cut-off unnecessary padding from target, and flatten
       
        target = target[:, :datasets["train"].max_sequence_length].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))

        # Negative Log Likelihood        
        NLL_loss = NLL(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step, k, x0)

        return NLL_loss, KL_loss, KL_weight

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
    tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor
    step = 0

    
    overall_losses = defaultdict(dict)

    loss_at_epoch = {
        'nll_loss': 0.0,
        'kl_loss': 0.0,
        'style_loss': 0.0,
        'content_loss': 0.0,
        'diversity_loss': 0.0,
        'hspace_loss': 0.0,
        'nll_loss_test': 0.0,
        'kl_loss_test': 0.0,
        'style_loss_test': 0.0,
        'content_loss_test': 0.0,
        'diversity_loss_test': 0.0,
        'hspace_loss_test': 0.0
    }

    for epoch in range(args.epochs):

        # do train and then test
        for split in splits:

            # create dataloader
            data_loader = DataLoader(
                dataset=datasets[split],
                batch_size=args.batch_size,
                shuffle=split == 'train',
                num_workers=cpu_count(),
                pin_memory=torch.cuda.is_available()
            )

            # tracker used to track the loss
            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            # start batch wise training/testing
            for iteration, batch in enumerate(data_loader):

                # get batch size
                batch_size = batch['input'].size(0)

               
                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # try sample
                # print(idx2word(batch['target'][0:1], i2w=i2w, pad_idx=w2i['<pad>']))
                # print(batch['label'][0])
                # continue
                # print("neg: {}, pos: {}".format(style_preds[0:1,0], style_preds[0:1,1]))

                # Forward pass
                logp, final_mean, final_logv, final_z, style_preds, content_preds, hspace_preds, diversity_loss = model(batch['input'], batch['length'], batch['label'], batch['bow'])

                # loss calculation
                NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'], batch['length'], final_mean, final_logv, args.anneal_function, step, args.k, args.x0)
        
                style_loss = nn.MSELoss()(style_preds, batch['label'].type(torch.FloatTensor).cuda()) #classification loss
                content_loss = nn.MSELoss()(content_preds, batch['bow'].type(torch.FloatTensor).cuda()) #classification loss

                if(hspace_preds is None):
                    hspace_classifier_loss = 0
                else:
                    hspace_classifier_loss = nn.MSELoss()(hspace_preds, batch['label'].type(torch.FloatTensor).cuda()) 

                # final loss calculation
                loss = (NLL_loss + KL_weight * KL_loss) / batch_size + 1000 * style_loss + 1000*content_loss
                # loss = (NLL_loss + KL_weight * KL_loss) / batch_size 

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()  # flush grads
                    
                    if(args.diversity):
                        loss.backward(retain_graph = True)  # run bp
                        diversity_loss.backward()
                    else:
                        loss.backward()  # run bp

                    optimizer.step()  # run gd
                    step += 1

                

                overall_losses[len(overall_losses)] = loss_at_epoch

                # bookkeeping
                tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data.view(1, -1)), dim=0)

                # logging of losses
                if args.tensorboard_logging:
                    writer.add_scalar(
                        "%s/ELBO" % split.upper(), loss.item(), epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.item() / batch_size,
                                      epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.item() / batch_size,
                                      epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight,
                                      epoch*len(data_loader) + iteration)
                                      

                #
                if iteration % args.print_every == 0 or iteration+1 == len(data_loader):
                   print("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f, Style-Loss %9.4f, Content-Loss %9.4f, Hspace-Loss %9.4f, Diversity-Loss %9.4f"
                          % (split.upper(), iteration, len(data_loader)-1, loss.item(), NLL_loss.item()/batch_size,
                             KL_loss.item()/batch_size, KL_weight, style_loss, content_loss, hspace_classifier_loss, diversity_loss))
                    
  

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(),pad_idx=datasets['train'].pad_idx)
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

            print("%s Epoch %02d/%i, Mean ELBO %9.4f" %
                  (split.upper(), epoch, args.epochs, tracker['ELBO'].mean()))
            
             

            # more logging
            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO" % split.upper(),
                                  torch.mean(tracker['ELBO']), epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {
                    'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist()}
                if not os.path.exists(os.path.join('dumps', ts)):
                    os.makedirs('dumps/'+ts)
                with open(os.path.join('dumps/'+ts+'/valid_E%i.json' % epoch), 'w') as dump_file:
                    json.dump(dump, dump_file)

            # save checkpoint
            if split == 'train':
                checkpoint_path = os.path.join(
                    save_model_path, "E%i.pytorch" % epoch)
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s" % checkpoint_path)
            
            # update losses log
            if(split == "train"):
                loss_at_epoch['nll_loss'] = float(NLL_loss/args.batch_size)
                loss_at_epoch['kl_loss'] = float(KL_loss)
                loss_at_epoch['style_loss'] = float(style_loss)
                loss_at_epoch['content_loss'] = float(content_loss)
                loss_at_epoch['diversity_loss'] = float(diversity_loss)
                loss_at_epoch['hspace_loss'] = float(hspace_classifier_loss)
            else:
                loss_at_epoch['nll_loss_test'] = float(NLL_loss/args.batch_size)
                loss_at_epoch['kl_loss_test'] = float(KL_loss)
                loss_at_epoch['style_loss_test'] = float(style_loss)
                loss_at_epoch['content_loss_test'] = float(content_loss)
                loss_at_epoch['diversity_loss_test'] = float(diversity_loss)
                loss_at_epoch['hspace_loss_test'] = float(hspace_classifier_loss)
        
    # write losses to json
    with open(os.path.join(save_model_path, 'losses.json'), 'w') as f:
        json.dump(overall_losses, f, indent=4)
コード例 #7
0
def main(args):

    # Load the vocab
    with open(args.data_dir+'/ptb.vocab.json', 'r') as file:
        vocab = json.load(file)

    w2i, i2w = vocab['w2i'], vocab['i2w']

    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    splits = ['train', 'valid'] + (['test'] if args.test else [])

    # Initialize semantic loss
    sl = Semantic_Loss()

    datasets = OrderedDict()
    for split in splits:
        datasets[split] = PTB(
            data_dir=args.data_dir,
            split=split,
            create_data=args.create_data,
            max_sequence_length=args.max_sequence_length,
            min_occ=args.min_occ
        )

    params = dict(
        vocab_size=datasets['train'].vocab_size,
        sos_idx=datasets['train'].sos_idx,
        eos_idx=datasets['train'].eos_idx,
        pad_idx=datasets['train'].pad_idx,
        unk_idx=datasets['train'].unk_idx,
        max_sequence_length=args.max_sequence_length,
        embedding_size=args.embedding_size,
        rnn_type=args.rnn_type,
        hidden_size=args.hidden_size,
        word_dropout=args.word_dropout,
        embedding_dropout=args.embedding_dropout,
        latent_size=args.latent_size,
        num_layers=args.num_layers,
        bidirectional=args.bidirectional
    )
    model = SentenceVAE(**params)

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(os.path.join(args.logdir, expierment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)

    with open(os.path.join(save_model_path, 'model_params.json'), 'w') as f:
        json.dump(params, f, indent=4)

    def kl_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1/(1+np.exp(-k*(step-x0))))
        elif anneal_function == 'linear':
            return min(1, step/x0)

    def perplexity_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1/ 1+np.exp(-k*(step-x0)))
        elif anneal_function == 'linear':
            return min(1, (step/x0))

    NLL = torch.nn.NLLLoss(ignore_index=datasets['train'].pad_idx, reduction='sum')
    def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0, \
        batch_perplexity, perplexity_anneal_function):

        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length).item()].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))

        # Negative Log Likelihood
        NLL_loss = NLL(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step, k, x0)

        # Perplexity
        perp_loss = batch_perplexity
        perp_weight = perplexity_anneal_function(anneal_function, step, k, x0)

        return NLL_loss, KL_loss, KL_weight, perp_loss, perp_weight


    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor
    step = 0
    for epoch in range(args.epochs):

        # Keep track of epoch loss
        epoch_loss = []

        for split in splits:

            data_loader = DataLoader(
                dataset=datasets[split],
                batch_size=args.batch_size,
                shuffle=split=='train',
                num_workers=cpu_count(),
                pin_memory=torch.cuda.is_available()
            )

            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            batch_t_start = None

            for iteration, batch in enumerate(data_loader):

                if batch_t_start:
                    batch_run_time = time.time() - batch_t_start
                    # print("Batch run time: " + str(batch_run_time))
                batch_t_start = time.time()


                batch_size = batch['input_sequence'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Get the original sentences in this batch
                batch_sentences = idx2word(batch['input_sequence'], i2w=i2w, pad_idx=w2i['<pad>'])
                # Remove the first tag
                batch_sentences = [x.replace("<sos>", "") for x in batch_sentences]

                # Forward pass
                (logp, mean, logv, z), states = model(**batch)


                # Choose some random pairs of samples within the batch
                #  to get latent representations for
                batch_index_pairs = list(itertools.combinations(np.arange(batch_size), 2))
                random.shuffle(batch_index_pairs)
                batch_index_pairs = batch_index_pairs[:args.perplexity_samples_per_batch]

                batch_perplexity = []

                # If we start the perplexity
                start_perplexity = epoch > 10

                # If we should have perplexity loss
                if start_perplexity and args.perplexity_loss:
                    # For each pair, get the intermediate representations in the latent space
                    for index_pair in batch_index_pairs:

                        with torch.no_grad():
                            z1_hidden = states['z'][index_pair[0]].cpu()
                            z2_hidden = states['z'][index_pair[1]].cpu()

                        z_hidden = to_var(torch.from_numpy(interpolate(start=z1_hidden, end=z2_hidden, steps=1)).float())

                        if args.rnn_type == "lstm":

                            with torch.no_grad():
                                z1_cell_state = states['z_cell_state'].cpu().squeeze()[index_pair[0]]
                                z2_cell_state = states['z_cell_state'].cpu().squeeze()[index_pair[1]]

                            z_cell_states = \
                                to_var(torch.from_numpy(interpolate(start=z1_cell_state, end=z2_cell_state, steps=1)).float())

                            samples, _ = model.inference(z=z_hidden, z_cell_state=z_cell_states)
                        else:
                            samples, _ = model.inference(z=z_hidden, z_cell_state=None)

                        # Check interpolated sentences
                        interpolated_sentences = idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>'])
                        # For each sentence, get the perplexity and show it
                        perplexities = []
                        for sentence in interpolated_sentences:
                            perplexities.append(sl.get_perplexity(sentence))
                        avg_sample_perplexity = sum(perplexities) / len(perplexities)
                        batch_perplexity.append(avg_sample_perplexity)
                    # Calculate batch perplexity
                    avg_batch_perplexity = sum(batch_perplexity) / len(batch_perplexity)

                    # loss calculation
                    NLL_loss, KL_loss, KL_weight, perp_loss, perp_weight = loss_fn(logp, batch['target'],
                        batch['length'], mean, logv, args.anneal_function, step, \
                            args.k, args.x0, avg_batch_perplexity, perplexity_anneal_function)

                    loss = ((NLL_loss + KL_weight * KL_loss) / batch_size) + (perp_loss * perp_weight)

                else: # Epochs < X, so train without perplexity
                    # loss calculation
                    NLL_loss, KL_loss, KL_weight, perp_loss, perp_weight = loss_fn(logp, batch['target'],
                        batch['length'], mean, logv, args.anneal_function, step, \
                            args.k, args.x0, 0, perplexity_anneal_function)

                    loss = (NLL_loss + KL_weight * KL_loss) / batch_size


                # Turn model back into train, since inference changed to eval
                if split == 'train':
                    model.train()
                else:
                    model.eval()

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                    # Add loss
                    epoch_loss.append(loss.item())

                # bookkeepeing
                tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.data.view(1, -1)), dim=0)

                if args.tensorboard_logging:
                    writer.add_scalar("%s/ELBO" % split.upper(), loss.item(), epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.item() / batch_size,
                                      epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.item() / batch_size,
                                      epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight,
                                      epoch*len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration+1 == len(data_loader):
                    print("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f, Perp-loss %9.4f, Perp-weight %6.3f"
                          % (split.upper(), iteration, len(data_loader)-1, loss.item(), NLL_loss.item()/batch_size,
                          KL_loss.item()/batch_size, KL_weight, perp_loss, perp_weight))

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(),
                                                        pad_idx=datasets['train'].pad_idx)
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

            print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, tracker['ELBO'].mean()))

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO" % split.upper(), torch.mean(tracker['ELBO']), epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist()}
                if not os.path.exists(os.path.join('dumps', ts)):
                    os.makedirs('dumps/'+ts)
                with open(os.path.join('dumps/'+ts+'/valid_E%i.json' % epoch), 'w') as dump_file:
                    json.dump(dump,dump_file)

            # save checkpoint
            if split == 'train':
                checkpoint_path = os.path.join(save_model_path, "E%i.pytorch" % epoch)
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s" % checkpoint_path)
コード例 #8
0
def main(args):

    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.localtime())

    splits = ['train', 'valid'] + (['test'] if args.test else [])

    datasets = OrderedDict()
    for split in splits:
        datasets[split] = PTB(data_dir=args.data_dir,
                              split=split,
                              create_data=args.create_data,
                              max_sequence_length=args.max_sequence_length,
                              min_occ=args.min_occ,
                              use_bert=args. False)

    model = SentenceVAE(alphabet_size=datasets['train'].alphabet_size,
                        vocab_size=datasets['train'].vocab_size,
                        sos_idx=datasets['train'].sos_idx,
                        eos_idx=datasets['train'].eos_idx,
                        pad_idx=datasets['train'].pad_idx,
                        unk_idx=datasets['train'].unk_idx,
                        max_sequence_length=args.max_sequence_length,
                        embedding_size=args.embedding_size,
                        rnn_type=args.rnn_type,
                        hidden_size=args.hidden_size,
                        word_dropout=args.word_dropout,
                        embedding_dropout=args.embedding_dropout,
                        latent_size=args.latent_size,
                        num_layers=args.num_layers,
                        bidirectional=args.bidirectional)

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, expierment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)
    print("Saving model to directory: " + save_model_path)

    def kl_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1 / (1 + np.exp(-k * (step - x0))))
        elif anneal_function == 'linear':
            return min(1, step / x0)

    def word_weight_function(step, k, x0):
        return float(1 / (1 + np.exp(-k * (step - x0))))

    NLL = torch.nn.NLLLoss(reduction='sum',
                           ignore_index=datasets['train'].pad_idx)

    def loss_fn(def_logp, word_logp, def_target, def_length, word_target,
                word_length, mean, logv):

        # cut-off unnecessary padding from target definition, and flatten
        def_target = def_target[:, :torch.max(def_length).item()].contiguous(
        ).view(-1)
        def_logp = def_logp.view(-1, def_logp.size(2))

        # Negative Log Likelihood
        def_NLL_loss = NLL(def_logp, def_target)

        # cut off padding for words
        word_target = word_target[:, :torch.max(word_length).item(
        )].contiguous().view(-1)
        word_logp = word_logp.view(-1, word_logp.size(2))

        # Word NLL
        word_NLL_loss = NLL(word_logp, word_target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())

        return def_NLL_loss, word_NLL_loss, KL_loss

    def get_weights(anneal_function, step, k, x0):
        # for logistic function, k = growth rate
        KL_weight = kl_anneal_function(anneal_function, step, k, x0)
        word_weight = word_weight_function(step, k, x0)

        return {'def': 1, 'word': word_weight, 'kl': KL_weight}

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    step = 0
    for epoch in range(args.epochs):

        for split in splits:

            data_loader = DataLoader(dataset=datasets[split],
                                     batch_size=args.batch_size,
                                     shuffle=split == 'train',
                                     num_workers=cpu_count(),
                                     pin_memory=torch.cuda.is_available())

            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model = model.train()
            else:
                model = model.eval()

            for iteration, batch in enumerate(data_loader):

                batch_size = batch['input'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                [def_logp,
                 word_logp], mean, logv, z = model(batch['input'],
                                                   batch['length'],
                                                   batch['word_length'])

                # loss calculation
                def_NLL_loss, word_NLL_loss, KL_loss = loss_fn(
                    def_logp, word_logp, batch['target'], batch['length'],
                    batch['word'], batch['word_length'], mean, logv)
                weights = get_weights(args.anneal_function, step, args.k,
                                      args.x0)

                loss = (weights['def'] * def_NLL_loss + weights['word'] *
                        word_NLL_loss + weights['kl'] * KL_loss) / batch_size

                mean_logv = torch.mean(logv)

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                # bookkeepeing
                tracker['ELBO'] = torch.cat(
                    (tracker['ELBO'], loss.detach().unsqueeze(0)))

                if args.tensorboard_logging:
                    writer.add_scalar("%s/ELBO" % split.upper(), loss.item(),
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/Def NLL Loss" % split.upper(),
                                      def_NLL_loss.item() / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/Word NLL Loss" % split.upper(),
                                      word_NLL_loss.item() / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss" % split.upper(),
                                      KL_loss.item() / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Weight" % split.upper(),
                                      weights['kl'],
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/Word Weight" % split.upper(),
                                      weights['word'],
                                      epoch * len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration + 1 == len(
                        data_loader):
                    print(
                        "%s Batch %04d/%i, Loss %9.4f, Def NLL-Loss %9.4f, Word NLL-Loss %9.4f  Word-Weight %6.3f, KL-Loss %9.4f, KL-Weight %6.3f KL-VAL %9.4f"
                        % (split.upper(), iteration, len(data_loader) - 1,
                           loss.item(), def_NLL_loss.item() / batch_size,
                           word_NLL_loss.item() / batch_size, weights['word'],
                           KL_loss.item() / batch_size, weights['kl'],
                           mean_logv))

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(
                        batch['target'],
                        i2w=datasets['train'].get_i2w(),
                        pad_idx=datasets['train'].pad_idx)
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

            print("%s Epoch %02d/%i, Mean ELBO %9.4f" %
                  (split.upper(), epoch, args.epochs,
                   torch.mean(tracker['ELBO'])))

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO" % split.upper(),
                                  torch.mean(tracker['ELBO']), epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {
                    'target_sents': tracker['target_sents'],
                    'z': tracker['z'].tolist()
                }
                if not os.path.exists(os.path.join('dumps', ts)):
                    os.makedirs('dumps/' + ts)
                with open(
                        os.path.join('dumps/' + ts +
                                     '/valid_E%i.json' % epoch),
                        'w') as dump_file:
                    json.dump(dump, dump_file)

            # save checkpoint
            if split == 'train':
                checkpoint_path = os.path.join(save_model_path,
                                               "E%i.pytorch" % (epoch))
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s" % checkpoint_path)
コード例 #9
0
def main(args):

    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    splits = ['train', 'valid'] + (['test'] if args.test else [])

    datasets = OrderedDict()
    for split in splits:
        # datasets[split] = BGoogle(
        #     data_dir=args.data_dir,
        #     split=split,
        #     create_data=args.create_data,
        #     batch_size=args.batch_size ,
        #     max_sequence_length=args.max_sequence_length,
        #     min_occ=args.min_occ
        # )

        datasets[split] = Amazon(data_dir=args.data_dir,
                                 split=split,
                                 create_data=args.create_data,
                                 batch_size=args.batch_size,
                                 max_sequence_length=args.max_sequence_length,
                                 min_occ=args.min_occ)

    model = SentenceVAE(vocab_size=datasets['train'].vocab_size,
                        sos_idx=datasets['train'].sos_idx,
                        eos_idx=datasets['train'].eos_idx,
                        pad_idx=datasets['train'].pad_idx,
                        unk_idx=datasets['train'].unk_idx,
                        max_sequence_length=args.max_sequence_length,
                        embedding_size=args.embedding_size,
                        rnn_type=args.rnn_type,
                        hidden_size=args.hidden_size,
                        word_dropout=args.word_dropout,
                        embedding_dropout=args.embedding_dropout,
                        latent_size=args.latent_size,
                        num_layers=args.num_layers,
                        bidirectional=args.bidirectional)

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)

    tokenizer = TweetTokenizer(preserve_case=False)
    vocab_file = "amazon.vocab.json"
    with open(os.path.join(args.data_dir, vocab_file), 'r') as file:
        vocab = json.load(file)
        w2i, i2w = vocab['w2i'], vocab['i2w']

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, expierment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    # save_model_path = os.path.join(args.save_model_path, ts)
    save_model_path = args.save_model_path

    if not os.path.exists(save_model_path):
        os.makedirs(save_model_path)

    def kl_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1 / (1 + np.exp(-k * (step - x0))))
        elif anneal_function == 'linear':
            return min(1, step / x0)

    NLL = torch.nn.NLLLoss(size_average=False,
                           ignore_index=datasets['train'].pad_idx)

    def loss_fn(logp, target, length, mean, logv, anneal_function, step, k,
                x0):

        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length).data].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))

        # Negative Log Likelihood
        NLL_loss = NLL(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step, k, x0)

        return NLL_loss, KL_loss, KL_weight

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    step = 0
    save_mode = True
    last_ELBO = 1e32

    for epoch in range(args.epochs):
        print("+" * 20)

        # f_test_example(model, tokenizer, w2i, i2w)
        for split in splits:

            # data_loader = DataLoader(
            #     dataset=datasets[split],
            #     batch_size=args.batch_size,
            #     shuffle=split=='train',
            #     num_workers=cpu_count(),
            #     pin_memory=torch.cuda.is_available()
            # )
            batch_size = args.batch_size
            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            # for iteration, batch in enumerate(data_loader):
            iteration = 0
            iteration_total = datasets[split].batch_num
            print("batch_num", iteration_total)
            for input_batch_tensor, target_batch_tensor, length_batch_tensor in datasets[
                    split]:

                if torch.is_tensor(input_batch_tensor):
                    input_batch_tensor = to_var(input_batch_tensor)

                if torch.is_tensor(target_batch_tensor):
                    target_batch_tensor = to_var(target_batch_tensor)

                if torch.is_tensor(length_batch_tensor):
                    length_batch_tensor = to_var(length_batch_tensor)

                # batch_size = batch['input'].size(0)

                # for k, v in batch.items():
                #     if torch.is_tensor(v):
                #         batch[k] = to_var(v)

                # Forward pass
                # logp, mean, logv, z = model(batch['input'], batch['length'])
                logp, mean, logv, z = model(input_batch_tensor,
                                            length_batch_tensor)

                # loss calculation
                NLL_loss, KL_loss, KL_weight = loss_fn(
                    logp, target_batch_tensor, length_batch_tensor, mean, logv,
                    args.anneal_function, step, args.k, args.x0)

                loss = (NLL_loss + KL_weight * KL_loss) / batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                iteration += 1
                # bookkeepeing
                # print("elbo", tracker['ELBO'])
                # print("loss", loss)
                if iteration == 0:
                    tracker['ELBO'] = loss.data
                    tracker['ELBO'] = tracker['ELBO'].view(1)
                else:
                    tracker['ELBO'] = torch.cat(
                        (tracker['ELBO'], loss.view(1)))

                if args.tensorboard_logging:
                    # print(loss.data)
                    writer.add_scalar("%s/ELBO" % split.upper(),
                                      loss.data.item(),
                                      epoch * iteration_total + iteration)
                    writer.add_scalar("%s/NLL Loss" % split.upper(),
                                      NLL_loss.data.item() / batch_size,
                                      epoch * iteration_total + iteration)
                    writer.add_scalar("%s/KL Loss" % split.upper(),
                                      KL_loss.data.item() / batch_size,
                                      epoch * iteration_total + iteration)
                    writer.add_scalar("%s/KL Weight" % split.upper(),
                                      KL_weight,
                                      epoch * iteration_total + iteration)

                if iteration % args.print_every == 0 or iteration + 1 == iteration_total:
                    print(
                        "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
                        % (split.upper(), iteration, iteration_total - 1,
                           loss.data.item(), NLL_loss.data.item() / batch_size,
                           KL_loss.data.item() / batch_size, KL_weight))

                # if split == 'valid':
                # if 'target_sents' not in tracker:
                #     tracker['target_sents'] = list()
                # tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx)

                # # print("z", tracker['z'], z)
                # tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)
                # break

            print("%s Epoch %02d/%i, Mean ELBO %9.4f" %
                  (split.upper(), epoch, args.epochs,
                   torch.mean(tracker['ELBO'])))

            cur_ELBO = torch.mean(tracker['ELBO'])
            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO" % split.upper(), cur_ELBO,
                                  epoch)

            if split == "valid":
                if cur_ELBO < last_ELBO:
                    save_mode = True
                else:
                    save_mode = False
                last_ELBO = cur_ELBO

            # save a dump of all sentences and the encoded latent space
            # if split == 'valid':
            #     dump = {'target_sents':tracker['target_sents'], 'z':tracker['z'].tolist()}
            #     if not os.path.exists(os.path.join('dumps', ts)):
            #         os.makedirs('dumps/'+ts)
            #     with open(os.path.join('dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file:
            #         json.dump(dump,dump_file)

            # save checkpoint
            if split == 'train':
                # checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch))
                checkpoint_path = os.path.join(save_model_path, "best.pytorch")
                if save_mode == True:
                    torch.save(model.state_dict(), checkpoint_path)
                    print("Model saved at %s" % checkpoint_path)
コード例 #10
0
def main(args):

    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    ptb = PTB(vocab_file=args.vocab_file,
              train_file=args.train_file,
              train_with_vocab=False,
              create_data=args.create_data,
              max_sequence_length=args.max_sequence_length,
              min_occ=args.min_occ)
    datasets = PTBDataset(ptb)
    print('done preprocessing data')

    model = SentenceVAE(vocab_size=datasets.vocab_size,
                        sos_idx=datasets.sos_idx,
                        eos_idx=datasets.eos_idx,
                        pad_idx=datasets.pad_idx,
                        unk_idx=datasets.unk_idx,
                        max_sequence_length=args.max_sequence_length,
                        embedding_size=args.embedding_size,
                        rnn_type=args.rnn_type,
                        hidden_size=args.hidden_size,
                        word_dropout=args.word_dropout,
                        embedding_dropout=args.embedding_dropout,
                        latent_size=args.latent_size,
                        num_layers=args.num_layers,
                        bidirectional=args.bidirectional)

    model.ptb = ptb

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, expierment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)

    def kl_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1 / (1 + np.exp(-k * (step - x0))))
        elif anneal_function == 'linear':
            return min(1, step / x0)

    NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets.pad_idx)

    def loss_fn(logp, target, length, mean, logv, anneal_function, step, k,
                x0):

        # cut-off unnecessary padding from target, and flatten
        #target = target[:, :torch.max(length).data[0]].contiguous().view(-1)
        target = target[:, :torch.max(length).data].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))

        # Negative Log Likelihood
        NLL_loss = NLL(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step, k, x0)

        return NLL_loss, KL_loss, KL_weight

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    step = 0
    model.train()
    split = 'train'
    for epoch in range(args.epochs):
        data_loader = DataLoader(dataset=datasets,
                                 batch_size=args.batch_size,
                                 shuffle=True,
                                 num_workers=cpu_count(),
                                 pin_memory=torch.cuda.is_available())

        tracker = defaultdict(list)
        for iteration, batch in enumerate(data_loader):

            batch_size = batch['input'].size(0)

            for k, v in batch.items():
                if torch.is_tensor(v):
                    batch[k] = to_var(v)

            # Forward pass
            #logp, mean, logv, z = model(batch['input'], batch['length'])
            logp, mean, logv, z, encoder_last = model(batch['input'],
                                                      batch['length'])

            # loss calculation
            NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'],
                                                   batch['length'], mean, logv,
                                                   args.anneal_function, step,
                                                   args.k, args.x0)

            loss = (NLL_loss + KL_weight * KL_loss) / batch_size

            # backward + optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            step += 1

            # bookkeepeing
            tracker['ELBO'].append(loss.data.cpu().numpy().tolist())

            if args.tensorboard_logging:
                writer.add_scalar("%s/ELBO" % split.upper(), loss.data,
                                  epoch * len(data_loader) + iteration)
                writer.add_scalar("%s/NLL Loss" % split.upper(),
                                  NLL_loss.data / batch_size,
                                  epoch * len(data_loader) + iteration)
                writer.add_scalar("%s/KL Loss" % split.upper(),
                                  KL_loss.data / batch_size,
                                  epoch * len(data_loader) + iteration)
                writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight,
                                  epoch * len(data_loader) + iteration)

            if iteration % args.print_every == 0 or iteration + 1 == len(
                    data_loader):
                print(
                    "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
                    % (split.upper(), iteration, len(data_loader) - 1,
                       loss.data, NLL_loss.data / batch_size,
                       KL_loss.data / batch_size, KL_weight))

        if split == 'valid':
            if 'target_sents' not in tracker:
                tracker['target_sents'] = list()
            tracker['target_sents'] += idx2word(batch['target'].data,
                                                i2w=datasets.get_i2w(),
                                                pad_idx=datasets.pad_idx)
            tracker['z'].append(z.data)

        print("%s Epoch %02d/%i, Mean ELBO %9.4f" %
              (split.upper(), epoch, args.epochs, np.mean(tracker['ELBO'])))

        if args.tensorboard_logging:
            writer.add_scalar("%s-Epoch/ELBO" % split.upper(),
                              np.mean(tracker['ELBO']), epoch)
        '''
        # save a dump of all sentences and the encoded latent space
        if split == 'valid':
            dump = {'target_sents':tracker['target_sents'], 'z':tracker['z']}
            if not os.path.exists(os.path.join('dumps', ts)):
                os.makedirs('dumps/'+ts)
            with open(os.path.join('dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file:
                json.dump(dump,dump_file)
        '''

        # save checkpoint
        if split == 'train':
            checkpoint_path = os.path.join(save_model_path,
                                           "E%i.pytorch" % (epoch))
            torch.save(model.state_dict(), checkpoint_path)
            joblib.dump(model.cpu(), checkpoint_path)
            print("Model saved at %s" % checkpoint_path)

        if torch.cuda.is_available():
            model.cuda()
コード例 #11
0
ファイル: train.py プロジェクト: yfzhoucs/Sentence-VAE
def main(args):

    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    print("Loading Vocab", args.vocab_path)
    vocab = WordVocab.load_vocab(args.vocab_path)
    print("Vocab Size: ", len(vocab))

    print("Loading Train Dataset", args.train_dataset)
    train_dataset = BERTDataset(args.train_dataset, vocab, seq_len=args.max_sequence_length,
                                corpus_lines=args.corpus_lines, on_memory=args.on_memory)

    print("Loading Test Dataset", args.test_dataset)
    test_dataset = BERTDataset(args.test_dataset, vocab, seq_len=args.max_sequence_length, on_memory=args.on_memory) \
        if args.test_dataset is not None else None

    print("Creating Dataloader")
    train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers)
    test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) \
        if test_dataset is not None else None

    splits = ['train', 'test']
    data_loaders = {
        'train': train_data_loader,
        'test': test_data_loader
    }

    model = SentenceVAE(
        vocab_size=len(vocab),
        sos_idx=vocab.sos_index,
        eos_idx=vocab.eos_index,
        pad_idx=vocab.pad_index,
        unk_idx=vocab.unk_index,
        max_sequence_length=args.max_sequence_length,
        embedding_size=args.embedding_size,
        rnn_type=args.rnn_type,
        hidden_size=args.hidden_size,
        word_dropout=args.word_dropout,
        embedding_dropout=args.embedding_dropout,
        latent_size=args.latent_size,
        num_layers=args.num_layers,
        bidirectional=args.bidirectional
        )

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(os.path.join(args.logdir, expierment_name(args,ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path)
    if not os.path.exists(save_model_path):
        os.makedirs(save_model_path)

    def kl_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1/(1+np.exp(-k*(step-x0))))
        elif anneal_function == 'linear':
            return min(1, step/x0)

    NLL = torch.nn.NLLLoss(size_average=False, ignore_index=vocab.pad_index)
    def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0):

        # cut-off unnecessary padding from target, and flatten
        
        # Negative Log Likelihood
        NLL_loss = NLL(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step, k, x0)

        return NLL_loss, KL_loss, KL_weight

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor
    step = 0
    for epoch in range(args.epochs):

        for split in splits:

            data_loader = data_loaders[split]

            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            correct = 0
            close = 0
            total = 0
            for iteration, batch in enumerate(data_loader):

                batch_size = batch['input'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                logp, mean, logv, z = model(batch['input'], batch['raw_length'])

                # loss calculation
                NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'],
                    batch['raw_length'], mean, logv, args.anneal_function, step, args.k, args.x0)

                loss = (NLL_loss + KL_weight * KL_loss)/batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                correct += logp.argmax(dim=1).eq(batch['target']).sum().item()
                close += torch.mul(logp.argmax(dim=1).ge(batch["target"]-10), logp.argmax(dim=1).le(batch["target"]+10)).sum().item()
                total += batch['target'].nelement()


                # bookkeepeing
                tracker['ELBO'] = torch.cat((tracker['ELBO'], loss.view(1,)))

                if args.tensorboard_logging:
                    writer.add_scalar("%s/ELBO"%split.upper(), loss.data[0], epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/NLL Loss"%split.upper(), NLL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss"%split.upper(), KL_loss.data[0]/batch_size, epoch*len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Weight"%split.upper(), KL_weight, epoch*len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration+1 == len(data_loader):
                    print("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
                        %(split.upper(), iteration, len(data_loader)-1, loss.item(), NLL_loss.item()/batch_size, KL_loss.item()/batch_size, KL_weight))

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(batch['raw'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx)
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

            print("%s Epoch %02d/%i, Mean ELBO %9.4f, acc %f, clo %f"%(split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']), correct/total, close/total))

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO"%split.upper(), torch.mean(tracker['ELBO']), epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {'target_sents':tracker['target_sents'], 'z':tracker['z'].tolist()}
                if not os.path.exists(os.path.join('dumps', ts)):
                    os.makedirs('dumps/'+ts)
                with open(os.path.join('dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file:
                    json.dump(dump,dump_file)

            # save checkpoint
            if split == 'train':
                checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch))
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s"%checkpoint_path)
コード例 #12
0
def main(args):

    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    splits = ['train', 'valid']

    datasets = OrderedDict()
    for split in splits:
        datasets[split] = PoetryDataset(
            data_dir=args.data_dir,
            split=split,
            create_data=args.create_data,
            max_sequence_length=args.max_sequence_length,
            min_occ=args.min_occ)

    model = SentenceVAE(vocab_size=datasets['train'].vocab_size,
                        sos_idx=datasets['train'].sos_idx,
                        eos_idx=datasets['train'].eos_idx,
                        pad_idx=datasets['train'].pad_idx,
                        unk_idx=datasets['train'].unk_idx,
                        max_sequence_length=args.max_sequence_length,
                        embedding_size=args.embedding_size,
                        rnn_type=args.rnn_type,
                        hidden_size=args.hidden_size,
                        word_dropout=args.word_dropout,
                        embedding_dropout=args.embedding_dropout,
                        latent_size=args.latent_size,
                        num_layers=args.num_layers,
                        bidirectional=args.bidirectional,
                        condition_size=7)

    if torch.cuda.is_available():
        model = model.cuda()

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, expierment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)

    def kl_anneal_function(anneal_function, step, k, x0):
        if anneal_function == 'logistic':
            return float(1 / (1 + np.exp(-k * (step - x0))))
        elif anneal_function == 'linear':
            return min(1, step / x0)

    NLL = torch.nn.NLLLoss(size_average=False,
                           ignore_index=datasets['train'].pad_idx)

    def calculate_bleu_scores(original, decoded):
        reference = original.split(' ')
        hypothesis = decoded.split(' ')
        return nltk.translate.bleu_score.sentence_bleu([reference], hypothesis)

    def loss_fn(logp, target, length, mean, logv, anneal_function, step, k,
                x0):
        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length)].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))

        # Negative Log Likelihood
        NLL_loss = NLL(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step, k, x0)

        return NLL_loss, KL_loss, KL_weight

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    step = 0
    for epoch in range(args.epochs):

        total_BLEU_score = 0

        for split in splits:

            data_loader = DataLoader(dataset=datasets[split],
                                     batch_size=args.batch_size,
                                     shuffle=split == 'train',
                                     num_workers=0,
                                     pin_memory=torch.cuda.is_available())

            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            for iteration, batch in enumerate(data_loader):
                batch_size = batch['input'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                logp, mean, logv, z = model(
                    batch['input'],
                    batch['length'],
                    condition=batch['category'].float())
                # logp, mean, logv, z = model(batch['input'], batch['length'], condition=None)

                # loss calculation
                NLL_loss, KL_loss, KL_weight = loss_fn(logp, batch['target'],
                                                       batch['length'], mean,
                                                       logv,
                                                       args.anneal_function,
                                                       step, args.k, args.x0)

                loss = (NLL_loss + KL_weight * KL_loss) / batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                # bookkeepeing
                tracker['ELBO'] = torch.cat(
                    (tracker['ELBO'], loss.data.unsqueeze(0)))

                if args.tensorboard_logging:
                    writer.add_scalar("%s/ELBO" % split.upper(), loss.data[0],
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/NLL Loss" % split.upper(),
                                      NLL_loss.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss" % split.upper(),
                                      KL_loss.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Weight" % split.upper(),
                                      KL_weight,
                                      epoch * len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration + 1 == len(
                        data_loader):
                    print(
                        "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
                        % (split.upper(), iteration, len(data_loader) - 1,
                           loss.data.item(), NLL_loss.data.item() / batch_size,
                           KL_loss.data.item() / batch_size, KL_weight))

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(
                        batch['target'].data,
                        i2w=datasets['train'].get_i2w(),
                        pad_idx=datasets['train'].pad_idx)
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)
                    # Calculate BLEU score
                    decoded = torch.argmax(logp, dim=-1)

                    for i in range(decoded.shape[0]):
                        decoded_poem = idx2word(
                            [decoded[i]],
                            i2w=datasets['train'].get_i2w(),
                            pad_idx=datasets['train'].pad_idx)[0]
                        original_poem = idx2word(
                            [batch['target'].data[i]],
                            i2w=datasets['train'].get_i2w(),
                            pad_idx=datasets['train'].pad_idx)[0]
                        total_BLEU_score += calculate_bleu_scores(
                            original_poem, decoded_poem)

            print("%s Epoch %02d/%i, Mean ELBO %9.4f" %
                  (split.upper(), epoch, args.epochs,
                   torch.mean(tracker['ELBO'])))
            if split == 'valid':
                print("Average BLEU {}".format(total_BLEU_score /
                                               decoded.shape[0]))

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO" % split.upper(),
                                  torch.mean(tracker['ELBO']), epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {
                    'target_sents': tracker['target_sents'],
                    'z': tracker['z'].tolist()
                }
                if not os.path.exists(os.path.join('dumps', ts)):
                    os.makedirs('dumps/' + ts)
                with open(
                        os.path.join('dumps/' + ts +
                                     '/valid_E%i.json' % epoch),
                        'w') as dump_file:
                    json.dump(dump, dump_file)

            # save checkpoint
            if split == 'train':
                checkpoint_path = os.path.join(save_model_path,
                                               "E%i.pytorch" % (epoch))
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s" % checkpoint_path)
コード例 #13
0
ファイル: train.py プロジェクト: Yufeng98/Sentence-VAE
def main(args):
    ts = time.strftime('%Y-%b-%d-%H-%M-%S', time.gmtime())

    # Load dataset
    splits = ['train', 'valid']
    datasets = OrderedDict()
    for split in splits:
        datasets[split] = Data(split, args.num_region, args.batch_size,
                               args.site, args.subject, args.seq_len,
                               args.embedding_size, args.cut_start, args.lines)

    # load model
    model = LSTM_VAE(
        embedding_size=args.embedding_size,
        rnn_type=args.rnn_type,  # gru
        hidden_size=args.hidden_size,  # 256
        word_dropout=args.word_dropout,  # 0
        embedding_dropout=args.embedding_dropout,  # 0.5
        latent_size=args.latent_size,  # 8
        num_layers=args.num_layers,  # 1
        bidirectional=args.bidirectional  # false
    )

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)
    """
    SentenceVAE(
      (embedding_dropout): Dropout(p=0.5)
      (encoder_rnn): GRU(32, 256, batch_first=True)
      (decoder_rnn): GRU(32, 256, batch_first=True)
      (hidden2mean): Linear(in_features=256, out_features=8, bias=True)
      (hidden2logv): Linear(in_features=256, out_features=8, bias=True)
      (latent2hidden): Linear(in_features=16, out_features=256, bias=True)
    )
    """
    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, expierment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)

    # NLL = torch.nn.NLLLoss(size_average=False)
    mse_loss = torch.nn.MSELoss()
    cos_loss = torch.nn.CosineSimilarity(dim=-1)

    def loss_fn(output, target, mean, logvar):
        mse = mse_loss(output, target)
        cos = torch.mean(1 - cos_loss(output, target))
        KL_loss = -0.5 * torch.sum(1 + logvar - mean.pow(2) - logvar.exp())
        return cos, mse, KL_loss

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    step = 0
    learning_rate = args.learning_rate
    for epoch in range(1, args.epochs + 1):
        if epoch > args.decay_epoch:
            learning_rate = learning_rate * args.learning_rate_decay
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

        for split in splits:

            data_loader = DataLoader(dataset=datasets[split],
                                     batch_size=args.batch_size,
                                     shuffle=split == 'train',
                                     num_workers=cpu_count(),
                                     pin_memory=torch.cuda.is_available())

            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            for iteration, batch in enumerate(data_loader):

                batch_size = args.batch_size
                batch = batch.type(torch.float32)
                length = [args.seq_len for _ in range(args.batch_size)]
                if torch.is_tensor(batch):
                    batch = to_var(batch)
                target = batch.clone()

                # Forward pass
                output, mean, logvar, z = model(batch, length)

                # loss calculation
                cos, mse, KL_loss = loss_fn(output, target, mean, logvar)
                # print(cos.item(), mse.item(), KL_loss.item())
                loss = (cos + mse + KL_loss) / batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                # book keeping
                tracker['ELBO'] = torch.cat(
                    (tracker['ELBO'], loss.detach().reshape(1)))

                if args.tensorboard_logging:
                    writer.add_scalar("%s/ELBO" % split.upper(), loss.item(),
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/Cos Loss" % split.upper(),
                                      cos.item() / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/MSE Loss" % split.upper(),
                                      mse.item() / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL Loss" % split.upper(),
                                      KL_loss.item() / batch_size,
                                      epoch * len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration + 1 == len(
                        data_loader):
                    print(
                        "%s Batch %04d/%i, Loss %9.4f, Cos-Loss %9.4f, MSE-Loss %9.4f, KL-Loss %9.4f"
                        % (split.upper(), iteration, len(data_loader) - 1,
                           loss.item(), cos.item() / batch_size, mse.item() /
                           batch_size, KL_loss.item() / batch_size))

                # if split == 'valid':
                #     if 'target_sents' not in tracker:
                #         tracker['target_sents'] = list()
                #     tracker['target_sents'] += idx2word(batch['target'].detach(), i2w=datasets['train'].get_i2w(),
                #                                         pad_idx=datasets['train'].pad_idx)
                #     tracker['z'] = torch.cat((tracker['z'], z.detach()), dim=0)

            print("%s Epoch %02d/%i, Mean ELBO %9.4f" %
                  (split.upper(), epoch, args.epochs,
                   torch.mean(tracker['ELBO'])))

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/ELBO" % split.upper(),
                                  torch.mean(tracker['ELBO']), epoch)

            # # save a dump of all sentences and the encoded latent space
            # if split == 'valid':
            #     dump = {'target_sents': tracker['target_sents'], 'z': tracker['z'].tolist()}
            #     if not os.path.exists(os.path.join('dumps', ts)):
            #         os.makedirs('dumps/' + ts)
            #     with open(os.path.join('dumps/' + ts + '/valid_E%i.json' % epoch), 'w') as dump_file:
            #         json.dump(dump, dump_file)

            # save checkpoint
            if split == 'train':
                checkpoint_path = os.path.join(save_model_path,
                                               "E%i.pytorch" % (epoch))
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s" % checkpoint_path)

            # save target & output for last validation batch
            if (epoch == args.epochs) and (split == "valid"):
                save = {
                    'target': target.cpu().detach().numpy().tolist(),
                    'output': output.cpu().detach().numpy().tolist()
                }
                with io.open('./{}_save.json'.format(args.site),
                             'wb') as data_file:
                    data = json.dumps(save, ensure_ascii=False)
                    data_file.write(data.encode('utf8', 'replace'))

    # save latent space
    latent = []
    for split in splits:

        data_loader = DataLoader(dataset=datasets[split],
                                 batch_size=args.batch_size,
                                 num_workers=cpu_count(),
                                 pin_memory=torch.cuda.is_available())

        model.eval()

        for iteration, batch in enumerate(data_loader):

            batch = batch.type(torch.float32)
            length = [args.seq_len for _ in range(args.batch_size)]
            if torch.is_tensor(batch):
                batch = to_var(batch)

            # Forward pass
            output, mean, logv, z = model(batch, length)

            # save latent space for both training and validation batch
            latent.append(z.cpu().detach().numpy().tolist())
    latent = np.array(latent).reshape(args.subject, args.num_region,
                                      args.seq_len, args.latent_size)
    print(np.shape(latent))
    with io.open('./{}_latent.json'.format(args.site), 'wb') as data_file:
        data = json.dumps(latent.tolist(), ensure_ascii=False)
        data_file.write(data.encode('utf8', 'replace'))
コード例 #14
0
def main(args):

    # create dir name
    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())
    ts = ts.replace(':', '-')
    ts = ts + '-' + args.dataset

    if (args.attention):
        ts = ts + '-self-attn'

    ts = ts + "-" + str(args.epochs)

    if (args.dataset == "yelp"):
        print("Running Yelp!")

        dataset = Yelp

    # prepare dataset
    splits = ['train', 'test']

    # create dataset object
    datasets = OrderedDict()

    # create test and train split in data, also preprocess
    for split in splits:
        print("creating dataset for: {}".format(split))
        datasets[split] = dataset(split=split,
                                  create_data=args.create_data,
                                  min_occ=args.min_occ)

    i2w = datasets['train'].get_i2w()
    w2i = datasets['train'].get_w2i()

    # print(type(int(datasets['train'].yelp_max_sequence_length)))

    max_sequence_length = datasets['train'].max_sequence_length

    # get training params
    params = dict(
        vocab_size=datasets['train'].vocab_size,
        sos_idx=datasets['train'].sos_idx,
        eos_idx=datasets['train'].eos_idx,
        pad_idx=datasets['train'].pad_idx,
        unk_idx=datasets['train'].unk_idx,
        max_sequence_length=max_sequence_length,
        embedding_size=args.embedding_size,
        rnn_type=args.rnn_type,
        hidden_size=args.hidden_size,
        num_layers=args.num_layers,
        bidirectional=args.bidirectional,
        attention=args.attention,
        dataset=args.dataset,
    )

    # init model object
    model = BinaryClassifier(**params)

    if torch.cuda.is_available():
        model = model.cuda()

    # logging
    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, expierment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    # make dir
    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)

    # write params to json and save
    with open(os.path.join(save_model_path, 'model_params.json'), 'w') as f:
        json.dump(params, f, indent=4)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    step = 0

    overall_losses = defaultdict(dict)
    loss_at_epoch = {'loss': 0.0, 'acc': 0.0}

    for epoch in range(args.epochs):

        # do train and then test
        for split in splits:

            # create dataloader
            data_loader = DataLoader(dataset=datasets[split],
                                     batch_size=args.batch_size,
                                     shuffle=split == 'train',
                                     num_workers=cpu_count(),
                                     pin_memory=torch.cuda.is_available())

            # tracker used to track the loss
            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            # start batch wise training/testing
            for iteration, batch in enumerate(data_loader):

                # get batch size
                batch_size = batch['input'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                preds = model(batch['input'], batch['length'])

                # ae loss calculation
                loss = nn.BCELoss()(preds, batch['label'].type(
                    torch.FloatTensor).cuda())

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()  # flush grads
                    loss.backward()
                    optimizer.step()
                    step += 1

                # calculate accruracies
                preds = torch.argmax(preds, dim=1)
                ground_truth = torch.argmax(batch['label'], dim=1)

                acc = (preds == ground_truth).sum() / batch_size

                # try sample to verify style classifier is working
                # print(idx2word(batch['target'][0:1], i2w=i2w, pad_idx=w2i['<pad>']))
                # print(batch['label'][0])
                # print("neg: {}, pos: {}".format(style_preds[0:1,0], style_preds[0:1,1]))

                if iteration % args.print_every == 0 or iteration + 1 == len(
                        data_loader):
                    print(
                        "-----------------------------------------------------------------------"
                    )
                    print("%s Batch %04d/%i, Loss %9.4f, Acc %9.4f" %
                          (split.upper(), iteration, len(data_loader) - 1,
                           loss.item(), acc))

            # save checkpoint
            if split == 'train':
                loss_at_epoch['loss'] = float(loss)
                loss_at_epoch['acc'] = float(acc)
                overall_losses[len(overall_losses)] = loss_at_epoch
                checkpoint_path = os.path.join(args.save_model_path,
                                               "E%i.pytorch" % epoch)
                torch.save(model.state_dict(), checkpoint_path)
                print("Model saved at %s" % checkpoint_path)

    # write losses to json
    with open(os.path.join(args.save_model_path, 'losses.json'), 'w') as f:
        json.dump(overall_losses, f, indent=4)