Example #1
0
    def load_model_and_dataset(checkpt_filename):
        checkpt = torch.load(checkpt_filename)
        args = checkpt['args']
        state_dict = checkpt['state_dict']

        # backwards compatibility
        if not hasattr(args, 'conv'):
            args.conv = False

        from model import VAE, setup_data_loaders

        # model
        prior_dist = dist.Normal()
        q_dist = dist.Normal()
        vae = VAE(z_dim=args.latent_dim,
                  use_cuda=False,
                  prior_dist=prior_dist,
                  q_dist=q_dist,
                  conv=args.conv)
        vae.load_state_dict(state_dict, strict=False)
        vae.eval()

        # dataset loader
        loader = setup_data_loaders(args, use_cuda=False)
        return vae, loader
Example #2
0
def main(args):
    # Check if the output folder is exist
    if not os.path.exists('./vae_results/'):
        os.mkdir('./vae_results/')

    # Load data
    torch.manual_seed(args.seed)
    kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('./data', train=True, download=True,
                       transform=transforms.ToTensor()),
        batch_size=args.batch_size, shuffle=True, **kwargs)

    # Load model
    model = VAE().cuda() if torch.cuda.is_available() else VAE()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    # Train and generate sample every epoch
    loss_list = []
    for epoch in range(1, args.epochs + 1):
        model.train()
        _loss = train(epoch, model, train_loader, optimizer)
        loss_list.append(_loss)
        model.eval()
        sample = torch.randn(64, 20)
        sample = Variable(sample).cuda() if torch.cuda.is_available() else Variable(sample)
        sample = model.decode(sample).cpu()
        save_image(sample.view(64, 1, 28, 28).data, 'vae_results/sample_' + str(epoch) + '.png')
    plt.plot(range(len(loss_list)), loss_list, '-o')
    plt.savefig('vae_results/vae_loss_curve.png')
def load_model(path):
    restore_dict = torch.load(path)

    model = VAE(**restore_dict["model"])
    model.load_state_dict(restore_dict["model_state_dict"])
    model.eval()

    return model
Example #4
0
def main() -> None:
    tokenizer = Tokenizer(args.vocab_file)
    vocabulary_size = len(tokenizer)
    dataset = SentenceDataset(args.input_file, tokenizer=tokenizer.encode)
    loader = DataLoader(dataset,
                        args.batch_size,
                        shuffle=False,
                        collate_fn=dataset.collate_fn,
                        drop_last=False)

    searcher = BeamSearch(tokenizer.eos_index, beam_size=args.search_width)

    model = VAE(
        num_embeddings=len(tokenizer),
        dim_embedding=args.dim_embedding,
        dim_hidden=args.dim_hidden,
        dim_latent=args.dim_latent,
        num_layers=args.num_layers,
        bidirectional=args.bidirectional,
        dropout=0.,
        word_dropout=0.,
        dropped_index=tokenizer.unk_index,
    ).to(device)
    model.load_state_dict(torch.load(args.checkpoint_file,
                                     map_location=device))
    model.eval()

    print('Generating sentence...')
    all_hypotheses = []
    with torch.no_grad():
        for s in tqdm(loader):
            s = s.to(device)
            length = torch.sum(s != tokenizer.pad_index, dim=-1)
            bsz = s.shape[0]

            mean, logvar = model.encode(s, length)
            # z = model.reparameterize(mean, logvar)
            z = mean

            hidden = model.fc_hidden(z)
            hidden = hidden.view(bsz, -1,
                                 model.dim_hidden).transpose(0,
                                                             1).contiguous()

            start_predictions = torch.zeros(bsz, device=device).fill_(
                tokenizer.bos_index).long()
            start_state = {'hidden': hidden.permute(1, 0, 2)}
            predictions, log_probabilities = searcher.search(
                start_predictions, start_state, model.step)

            for preds in predictions:
                tokens = preds[0]
                tokens = tokens[tokens != tokenizer.eos_index].tolist()
                all_hypotheses.append(tokenizer.decode(tokens))
    print('Done')

    with open(args.output_file, 'w') as f:
        f.write('\n'.join(all_hypotheses))
Example #5
0
def main(ARGS, device):
    """
  Prepares the datasets for training, and optional, validation and
  testing. Then, initializes the VAE model and runs the training (/validation)
  process for a given number of epochs.
  """
    data_splits = ['train', 'val']
    datasets = {
        split: IMDB(ARGS.data_dir, split, ARGS.max_sequence_length,
                    ARGS.min_word_occ, ARGS.create_data)
        for split in data_splits
    }
    pretrained_embeddings = datasets['train'].get_pretrained_embeddings(
        ARGS.embed_dim).to(device)
    model = VAE(
        datasets['train'].vocab_size,
        ARGS.batch_size,
        device,
        pretrained_embeddings=pretrained_embeddings,
        trainset=datasets['train'],
        max_sequence_length=ARGS.max_sequence_length,
        lstm_dim=ARGS.lstm_dim,
        z_dim=ARGS.z_dim,
        embed_dim=ARGS.embed_dim,
        n_lstm_layers=ARGS.n_lstm_layers,
        kl_anneal_type=ARGS.kl_anneal_type,
        kl_anneal_x0=ARGS.kl_anneal_x0,
        kl_anneal_k=ARGS.kl_anneal_k,
        kl_fbits_lambda=ARGS.kl_fbits_lambda,
        word_keep_rate=ARGS.word_keep_rate,
    )
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters())

    print('Starting training process...')

    amount_of_files = len(os.listdir("trained_models"))
    for epoch in range(ARGS.epochs):
        elbos = run_epoch(model, datasets, device, optimizer)
        train_elbo, val_elbo = elbos
        print(
            f"[Epoch {epoch} train elbo: {train_elbo}, val_elbo: {val_elbo}]")

        # Perform inference on the trained model
        with torch.no_grad():
            model.eval()
            samples = model.inference()
            print(*idx2word(samples,
                            i2w=datasets['train'].i2w,
                            pad_idx=datasets['train'].pad_idx),
                  sep='\n')

        model.save(f"trained_models/{amount_of_files + 1}.model")
Example #6
0
def main() -> None:
    tokenizer = Tokenizer(args.vocab_file)
    vocabulary_size = len(tokenizer)

    searcher = BeamSearch(tokenizer.eos_index, beam_size=args.search_width)

    model = VAE(
        num_embeddings=len(tokenizer),
        dim_embedding=args.dim_embedding,
        dim_hidden=args.dim_hidden,
        dim_latent=args.dim_latent,
        num_layers=args.num_layers,
        bidirectional=args.bidirectional,
        dropout=0.,
        word_dropout=0.,
        dropped_index=tokenizer.unk_index,
    ).to(device)
    model.load_state_dict(torch.load(args.checkpoint_file,
                                     map_location=device))
    model.eval()

    sentence1 = input('Please input sentence1: ')
    sentence2 = input('Please input sentence2: ')

    s1 = [tokenizer.bos_index
          ] + tokenizer.encode(sentence1) + [tokenizer.eos_index]
    s2 = [tokenizer.bos_index
          ] + tokenizer.encode(sentence2) + [tokenizer.eos_index]

    z1, _ = model.encode(
        torch.tensor([s1]).to(device),
        torch.tensor([len(s1)]).to(device))
    z2, _ = model.encode(
        torch.tensor([s2]).to(device),
        torch.tensor([len(s2)]).to(device))

    print("\nGenerate intermediate sentences")
    print("      %s" % sentence1)
    for r in range(1, 10):
        z = (1 - 0.1 * r) * z1 + 0.1 * r * z2
        hidden = model.fc_hidden(z)
        hidden = hidden.view(1, -1,
                             model.dim_hidden).transpose(0, 1).contiguous()

        start_predictions = torch.zeros(1, device=device).fill_(
            tokenizer.bos_index).long()
        start_state = {'hidden': hidden.permute(1, 0, 2)}
        predictions, log_probabilities = searcher.search(
            start_predictions, start_state, model.step)

        tokens = predictions[0, 0]
        tokens = tokens[tokens != tokenizer.eos_index].tolist()
        print("[%d:%d] %s" % (10 - r, r, tokenizer.decode(tokens)))
    print("      %s" % sentence2)
Example #7
0
class Generator(object):
    def __init__(self):
        _, _, self.vocab = get_iterators(opt)

        self.vae = VAE(opt)
        self.vae.embedding.weight.data.copy_(self.vocab.vectors)

        self.vae = get_cuda(self.vae)
        checkpoint = T.load('data/saved_models/vae_model.121.pyt')
        self.vae.load_state_dict(checkpoint['vae_dict'])
        self.vae.eval()
        del checkpoint

    def generate(self, encodings):
        sentences = []
        for z in encodings.numpy():
            z = get_cuda(T.from_numpy(z)).view((1, -1))
            h_0 = get_cuda(T.zeros(opt.n_layers_G, 1, opt.n_hidden_G))
            c_0 = get_cuda(T.zeros(opt.n_layers_G, 1, opt.n_hidden_G))
            G_hidden = (h_0, c_0)
            G_inp = T.LongTensor(1, 1).fill_(self.vocab.stoi[opt.start_token])
            G_inp = get_cuda(G_inp)
            sentence = opt.start_token + " "
            num_words = 0
            while G_inp[0][0].item() != self.vocab.stoi[opt.end_token]:
                with T.autograd.no_grad():
                    logit, G_hidden, _ = self.vae(None, G_inp, z, G_hidden)
                probs = F.softmax(logit[0] / TEMPERATURE, dim=1)
                G_inp = T.multinomial(probs, 1)
                sentence += (self.vocab.itos[G_inp[0][0].item()] + " ")
                num_words += 1
                if num_words > 64:
                    break
            sentence = sentence.replace('<unk>',
                                        '').replace('<sos>', '').replace(
                                            '<eos>', '').replace('<pad>', '')
            sentences.append(sentence)

        return sentences
Example #8
0
def main(args):
    conf = None
    with open(args.config, 'r') as config_file:
        config = yaml.load(config_file, Loader=yaml.FullLoader)
        conf = config['separate']
        model_params = config['model']
        preprocess_params = config['preprocessor']
    date_time = time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime())
    conf['save_path'] = os.path.join(conf['save_path'], date_time)

    if not os.path.isdir(conf['save_path']):
        os.mkdir(conf['save_path'])

    separator = MidiSeparator(
        conf['songs_path'], conf['save_path'], conf['save_reconstructed'],
        model_params['roll_dim'], model_params['time_step'],
        preprocess_params['low_crop'], preprocess_params['high_crop'],
        preprocess_params['note_num'], preprocess_params['longest'])
    model = VAE(model_params['roll_dim'], model_params['hidden_dim'],
                model_params['infor_dim'], model_params['time_step'], 12)

    model.eval()
    separator.import_midi_from_folder(model)
Example #9
0
def main() -> None:
    tokenizer = Tokenizer(args.vocab_file)
    vocabulary_size = len(tokenizer)

    searcher = BeamSearch(tokenizer.eos_index, beam_size=args.search_width)

    model = VAE(
        num_embeddings=len(tokenizer),
        dim_embedding=args.dim_embedding,
        dim_hidden=args.dim_hidden,
        dim_latent=args.dim_latent,
        num_layers=args.num_layers,
        bidirectional=args.bidirectional,
        dropout=0.,
        word_dropout=0.,
        dropped_index=tokenizer.unk_index,
    ).to(device)
    model.load_state_dict(torch.load(args.checkpoint_file,
                                     map_location=device))
    model.eval()

    z = torch.randn(args.sample_size, args.dim_latent, device=device)
    hidden = model.fc_hidden(z)
    hidden = hidden.view(args.sample_size, -1,
                         model.dim_hidden).transpose(0, 1).contiguous()

    start_predictions = torch.zeros(args.sample_size, device=device).fill_(
        tokenizer.bos_index).long()
    start_state = {'hidden': hidden.permute(1, 0, 2)}
    predictions, log_probabilities = searcher.search(start_predictions,
                                                     start_state, model.step)

    for pred in predictions:
        tokens = pred[0]
        tokens = tokens[tokens != tokenizer.eos_index].tolist()
        print(tokenizer.decode(tokens))
Example #10
0
class TrainingModel(object):
    def __init__(self, args, config):

        self.__dict__.update(config)
        self.config = config
        random.seed(self.seed)
        torch.manual_seed(self.seed)
        np.random.seed(self.seed)

        if use_cuda:
            torch.cuda.manual_seed(self.seed)
            torch.cuda.manual_seed_all(self.seed)
            torch.cuda.set_device(args.gpu)

        #torch.backends.cudnn.benchmark = False
        #torch.backends.cudnn.deterministic = True

        self.message = args.m
        self.data_generator = DataGenerator(self.config)
        self.vocab_size = self.data_generator.vocab_size
        self.ent_size = self.data_generator.ent_size

        self.model_name = 'IERM'

        if args.m != "":
            self.saveModeladdr = './trainModel/checkpoint_%s_%s.pkl' % (
                self.model_name, args.m)
        else:
            self.saveModeladdr = './trainModel/' + args.save

        self.model = Ranker(self.vocab_size, self.ent_size, self.config)
        self.VAE_model = VAE(self.vocab_size, self.ent_size,
                             self.model.word_emb, self.model.ent_emb,
                             self.config)

        if use_cuda:
            self.model.cuda()
            self.VAE_model.cuda()

        vae_lr = self.config[
            'pretrain_lr'] if config['pretrain_step'] > 0 else config['vae_lr']
        self.vae_optimizer = getOptimizer(config['vae_optim'],
                                          self.VAE_model.parameters(),
                                          lr=vae_lr,
                                          betas=(0.99, 0.99))
        self.ranker_optimizer = getOptimizer(
            config['ranker_optim'],
            self.model.parameters(),
            lr=config['ranker_lr'],
            weight_decay=config['weight_decay'])

        vae_model_size = sum(p.numel() for p in self.VAE_model.parameters())
        ranker_size = sum(p.numel() for p in self.model.parameters())
        #print 'Model size: ', vae_model_size, ranker_size
        #exit(-1)
        if args.resume and os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            #print checkpoint.keys()
            self.model.load_state_dict(checkpoint['rank_state_dict'])
            self.VAE_model.load_state_dict(checkpoint['vae_state_dict'])
            self.vae_optimizer.load_state_dict(checkpoint['vae_optimizer'])
            self.ranker_optimizer.load_state_dict(checkpoint['rank_optimizer'])
        else:
            print("Creating a new model")

        self.timings = defaultdict(list)  #record the loss iterations
        self.evaluator = rank_eval()
        self.epoch = 0
        self.step = 0

        self.kl_weight = 1

        if args.visual:
            self.config['visual'] = True
            self.writer = SummaryWriter('runs/' + args.m)
        else:
            self.config['visual'] = False
        self.reconstr_loss = nn.MSELoss()

    def add_values(self, iter, value_dict):
        for key in value_dict:
            self.writer.add_scalar(key, value_dict[key], iter)

    def adjust_learning_rate(self, optimizer, lr, decay_rate=.5):
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr * decay_rate

    def kl_anneal_function(self, anneal_function, step, k=0.0025, x0=2500):
        if anneal_function == 'logistic':
            return float(1 / (1 + np.exp(-k * (step - x0))))
        elif anneal_function == 'linear':
            return min(1, step / x0)

    def vae_loss(self, input_qw, reconstr_w, input_qe, reconstr_e, prior_mean,
                 prior_var, posterior_mean, posterior_var, posterior_log_var):
        # Reconstruction term
        if self.config['reconstruct'] != 'entity':
            input_qw_bow = to_bow(input_qw, self.vocab_size)
            input_qw_bow = Tensor2Varible(torch.tensor(input_qw_bow).float())
            #reconstr_w = torch.log_softmax(reconstr_w + 1e-10,dim=1)
            #RL_w = -torch.sum(input_qw_bow * reconstr_w , dim=1)
            #RL_w = self.reconstr_loss(reconstr_w,input_qw_bow)
            RL_w = -torch.sum(
                input_qw_bow * reconstr_w +
                (1 - input_qw_bow) * torch.log(1 - torch.exp(reconstr_w)),
                dim=1)
        else:
            RL_w = Tensor2Varible(torch.tensor([0]).float())
        if self.config['reconstruct'] != 'word':
            input_qe_bow = to_bow(input_qe, self.ent_size)
            input_qe_bow = Tensor2Varible(torch.tensor(input_qe_bow).float())
            #RL_e = -torch.sum(input_qe_bow * reconstr_e, dim=1)
            #RL_e = self.reconstr_loss(reconstr_e,input_qe_bow)
            RL_e = -torch.sum(
                input_qe_bow * reconstr_e +
                (1 - input_qe_bow) * torch.log(1 - torch.exp(reconstr_e)),
                dim=1)
        else:
            RL_e = Tensor2Varible(torch.tensor([0]).float())

        # KL term
        # var division term
        var_division = torch.sum(posterior_var / prior_var, dim=1)
        # diff means term
        diff_means = prior_mean - posterior_mean
        diff_term = torch.sum((diff_means * diff_means) / prior_var, dim=1)
        # logvar det division term
        logvar_det_division = \
            prior_var.log().sum() - posterior_log_var.sum(dim=1)
        # combine terms
        KL = 0.5 * (var_division + diff_term - self.model.intent_num +
                    logvar_det_division)

        loss = self.kl_weight * KL + RL_w + RL_e
        #loss = 0.001 * KL + RL_w + RL_e

        return loss.sum(), KL.sum(), RL_w.sum(), RL_e.sum()

    def pretraining(self):
        if self.pretrain_step <= 0:
            return

        train_start_time = time.time()
        data_reader = self.data_generator.pretrain_reader(self.pretrain_bs)
        total_loss = 0.
        total_KL_loss = 0.
        total_RLw_loss = 0.
        total_RLe_loss = 0.
        for step in xrange(self.pretrain_step):
            input_qw, input_qe = next(data_reader)
            #self.kl_weight = self.kl_anneal_function('logistic', step)
            topic_e, vae_loss, kl_loss, rl_w_loss, rl_e_loss = self.train_VAE(
                input_qw, input_qe)
            vae_loss.backward()
            torch.nn.utils.clip_grad_value_(
                self.VAE_model.parameters(),
                self.clip_grad)  # clip_grad_norm(, )

            self.vae_optimizer.step()

            vae_loss = vae_loss.data

            #print ('VAE loss: %.3f\tKL: %.3f\tRL_w:%.3f\tRL_e:%.3f' % (vae_loss, kl_loss, rl_w_loss, rl_e_loss))

            if torch.isnan(vae_loss):
                print("Got NaN cost .. skipping")
                exit(-1)
                continue

            #if self.config['visual']:
            #    self.add_values(step, {'vae_loss': vae_loss, 'kl_loss': kl_loss, 'rl_w_loss': rl_w_loss,
            #                          'rl_e_loss': rl_e_loss, 'kl_weight': self.kl_weight})

            total_loss += vae_loss
            total_KL_loss += kl_loss
            total_RLw_loss += rl_w_loss
            total_RLe_loss += rl_e_loss

            if step != 0 and step % self.pretrain_freq == 0:
                total_loss /= self.pretrain_freq
                total_KL_loss /= self.pretrain_freq
                total_RLw_loss /= self.pretrain_freq
                total_RLe_loss /= self.pretrain_freq
                print('Step: %d\t Elapsed:%.2f' %
                      (step, time.time() - train_start_time))
                print(
                    'Pretrain VAE loss: %.3f\tKL: %.3f\tRL_w:%.3f\tRL_e:%.3f' %
                    (total_loss, total_KL_loss, total_RLw_loss,
                     total_RLe_loss))
                if self.config['visual']:
                    self.add_values(
                        step, {
                            'vae_loss': total_loss,
                            'kl_loss': total_KL_loss,
                            'rl_w_loss': total_RLw_loss,
                            'rl_e_loss': total_RLe_loss,
                            'kl_weight': self.kl_weight
                        })
                total_loss = 0.
                total_KL_loss = 0.
                total_RLw_loss = 0.
                total_RLe_loss = 0.
                print '=============================================='
                #self.generate_beta_phi_3(show_topic_limit=5)

        self.save_checkpoint(message=self.message + '-pretraining')
        print('Pretraining end')
        #recovering the learning rate
        self.adjust_learning_rate(self.vae_optimizer, self.config['vae_lr'], 1)

    def trainIters(self, ):
        self.step = 0
        train_start_time = time.time()
        patience = self.patience

        best_ndcg10 = 0.0
        last_ndcg10 = 0.0

        data_reader = self.data_generator.pair_reader(self.batch_size)
        total_loss = 0.0
        total_rank_loss = 0.
        total_vae_loss = 0.
        total_KL_loss = 0.
        total_RLw_loss = 0.
        total_RLe_loss = 0.

        for step in xrange(self.steps):
            out = next(data_reader)
            input_qw, input_qe, input_dw_pos, input_de_pos, input_dw_neg, input_de_neg = out
            rank_loss, vae_total_loss, KL_loss, RL_w_loss, RL_e_loss \
                = self.train(input_qw,input_qe,input_dw_pos,input_de_pos,input_dw_neg,input_de_neg)

            cur_total_loss = rank_loss + vae_total_loss
            if torch.isnan(cur_total_loss):
                print("Got NaN cost .. skipping")
                continue
            self.step += 1
            total_loss += cur_total_loss
            total_rank_loss += rank_loss
            total_vae_loss += vae_total_loss
            total_KL_loss += KL_loss
            total_RLw_loss += RL_w_loss
            total_RLe_loss += RL_e_loss

            if self.eval_freq != -1 and self.step % self.eval_freq == 0:
                with torch.no_grad():
                    valid_performance = self.test(
                        valid_or_test='valid',
                        source=self.config['click_model'])
                    current_ndcg10 = valid_performance['ndcg@10']

                    if current_ndcg10 > best_ndcg10:
                        print 'Got better result, save to %s' % self.saveModeladdr
                        best_ndcg10 = current_ndcg10
                        patience = self.patience
                        self.save_checkpoint(message=self.message)

                        #self.generate_beta_phi_3(show_topic_limit=5)
                    elif current_ndcg10 <= last_ndcg10 * self.cost_threshold:
                        patience -= 1
                    last_ndcg10 = current_ndcg10

            if self.step % self.train_freq == 0:
                total_loss /= self.train_freq
                total_rank_loss /= self.train_freq
                total_vae_loss /= self.train_freq
                total_KL_loss /= self.train_freq
                total_RLw_loss /= self.train_freq
                total_RLe_loss /= self.train_freq

                self.timings['train'].append(total_loss)
                print('Step: %d\t Elapsed:%.2f' %
                      (step, time.time() - train_start_time))
                print(
                    'Train total loss: %.3f\tRank loss: %.3f\tVAE loss: %.3f' %
                    (total_loss, total_rank_loss, total_vae_loss))
                print('KL loss: %.3f\tRL W: %.3f\tRL E: %.3f' %
                      (total_KL_loss, total_RLw_loss, total_RLe_loss))
                print('Patience left: %d' % patience)

                if self.config['visual']:
                    self.add_values(
                        step, {
                            'Train vae_loss': total_loss,
                            'Train kl_loss': total_KL_loss,
                            'Train rl_w_loss': total_RLw_loss,
                            'Train rl_e_loss': total_RLe_loss,
                            'Train Rank loss': total_rank_loss
                        })

                total_loss = 0
                total_rank_loss = 0.
                total_vae_loss = 0.
                total_KL_loss = 0.
                total_RLw_loss = 0.
                total_RLe_loss = 0.

            if patience < 0:
                print 'patience runs out...'
                break

        print 'Patience___: ', patience
        print("All done, exiting...")

    def test(self, valid_or_test, source):

        predicted = []
        results = defaultdict(list)

        if valid_or_test == 'valid':
            is_test = False
            data_addr = self.valid_rank_addr
            data_source = self.data_generator.pointwise_reader_evaluation(
                data_addr, is_test=is_test, label_type=source)
        elif valid_or_test == 'ntcir13' or valid_or_test == 'ntcir14':
            is_test = True
            data_source = self.data_generator.pointwise_ntcir_generator(
                valid_or_test)
            source = 'HUMAN'
        else:
            is_test = True
            data_addr = self.test_rank_addr
            data_source = self.data_generator.pointwise_reader_evaluation(
                data_addr, is_test=is_test, label_type=source)
        start = time.clock()
        count = 0
        for out in data_source:
            (qid, dids, input_qw, input_qe, input_dw, input_de, gt_rels) = out
            gt_rels = map(lambda t: score2cutoff(source, t), gt_rels)
            rels_predicted = self.predict(input_qw, input_qe, input_dw,
                                          input_de).view(-1).cpu().numpy()

            result = self.evaluator.eval(gt_rels, rels_predicted)
            for did, gt, pred in zip(dids, gt_rels, rels_predicted):
                predicted.append((qid, did, pred, gt))

            for k, v in result.items():
                results[k].append(v)
            count += 1
        elapsed = (time.clock() - start)
        print('Elapsed:%.3f\tAvg:%.3f' % (elapsed, elapsed / count))
        performances = {}

        for k, v in results.items():
            performances[k] = np.mean(v)

        print '------Source: %s\tPerformance-------:' % source
        print 'Validating...' if valid_or_test == 'valid' else 'Testing'
        print 'Message: %s' % self.message
        print 'Source: %s' % source
        print performances

        if valid_or_test != 'valid':
            path = './results/' + self.message + '_' + valid_or_test + '_' + source
            if not os.path.exists(path):
                os.makedirs(path)
            out_file = open('%s/%s.predicted.txt' % (path, self.model_name),
                            'w')
            for qid, did, pred, gt in predicted:
                print >> out_file, '\t'.join([qid, did, str(pred), str(gt)])

        return performances

    def get_text(self, input, map_fun):
        text_list = []
        for element in input:
            if element == 0:
                break
            text_list.append(map_fun(element))
        return ' '.join(text_list)

    def generate_beta_phi_3(self, topK=10, show_topic_limit=-1):
        beta, phi = self.VAE_model.infer_topic_dis(topK)
        topics = defaultdict(list)
        topics_ents = defaultdict(list)
        show_topic_num = self.config[
            'intent_num'] if show_topic_limit == -1 else show_topic_limit

        for i in range(show_topic_num):
            idxs = beta[i]
            eidxs = phi[i]
            component_words = [
                self.data_generator.id2word[idx] for idx in idxs.cpu().numpy()
            ]
            component_ents = [
                self.data_generator.id2ent[self.data_generator.new2old[idx]]
                for idx in eidxs.cpu().numpy()
            ]
            topics[i] = component_words
            topics_ents[i] = component_ents

        print '--------Topic-Word-------'
        prefix = ('./topic/%s/' % args.m)
        if not os.path.exists(prefix):
            os.makedirs(prefix)
        outfile = open(prefix + 'topic-words.txt', 'w')
        for k in topics:
            print >> outfile, (str(k) + ' : ' + ' '.join(topics[k]))
            print >> outfile, (str(k) + ' : ' + ' '.join(topics_ents[k]))
        return topics, topics_ents

    def run_test_topic(self, out_file_name, topK, topicNum):
        topics_words, topics_ents = self.generate_beta_phi_3(topK)
        data_addr = self.test_rank_addr
        data_source = self.data_generator.pointwise_reader_evaluation(
            data_addr, is_test=True, label_type=self.config['click_model'])
        out_file = open(out_file_name, 'w')
        with torch.no_grad():
            self.VAE_model.eval()
            self.model.eval()
            for i, out in enumerate(data_source):
                (qid, dids, input_qw, input_qe, input_dw, input_de,
                 gt_rels) = out
                theta = self.VAE_model.get_theta(input_qw, input_qe)
                input_qw = input_qw[0]
                input_qe = input_qe[0]

                input_w = self.get_text(
                    input_qw, lambda w: self.data_generator.id2word[w])
                input_e = self.get_text(
                    input_qe, lambda e: self.data_generator.id2ent[
                        self.data_generator.new2old[e]])

                theta = theta[0].data.cpu().numpy()
                top_indices = np.argsort(theta)[::-1][:3]

                #print '========================='
                print >> out_file, 'Query: ', input_w
                print >> out_file, 'Entity: ', input_e
                for j, k in enumerate(top_indices):
                    ws = topics_words[k]
                    es = topics_ents[k]
                    print >> out_file, '%d Word Topic %d: %s' % (j, k,
                                                                 ' '.join(ws))
                    print >> out_file, '%d Entity Topic %d: %s' % (
                        j, k, ' '.join(es))

    def generate_topic_word_ent(self, out_file, topK=10):
        print 'Visualizing ...'
        data_addr = self.test_rank_addr
        data_source = self.data_generator.pointwise_reader_evaluation(
            data_addr, is_test=True, label_type=self.config['click_model'])
        out_file = open(out_file, 'w')
        with torch.no_grad():
            self.VAE_model.eval()
            self.model.eval()
            for i, out in enumerate(data_source):
                (input_qw, input_qe, input_dw, input_de, gt_rels) = out
                _, word_indices, ent_indices = self.VAE_model.get_topic_words(
                    input_qw, input_qe, topK=topK)
                word_indices = word_indices[0].data.cpu().numpy()
                ent_indices = ent_indices[0].data.cpu().numpy()

                #print 'ent_indices: ', ent_indices
                #print 'word_indices: ', word_indices
                input_qw = input_qw[0]
                input_qe = input_qe[0]

                input_w = self.get_text(
                    input_qw, lambda w: self.data_generator.id2word[w])
                input_e = self.get_text(
                    input_qe, lambda e: self.data_generator.id2ent[
                        self.data_generator.new2old[e]])
                reconstuct_w = self.get_text(
                    word_indices, lambda w: self.data_generator.id2word[w])
                reconstuct_e = self.get_text(
                    ent_indices, lambda e: self.data_generator.id2ent[
                        self.data_generator.new2old[e]])

                print >> out_file, ('%d: Word: %s\tRecons: %s' %
                                    (i + 1, input_w, reconstuct_w))
                print >> out_file, ('%d: Ent: %s\tRecons: %s' %
                                    (i + 1, input_e, reconstuct_e))

    def train_VAE(self, input_qw, input_qe):
        self.VAE_model.train()
        self.VAE_model.zero_grad()
        self.vae_optimizer.zero_grad()

        topic_embeddings, logPw, logPe, prior_mean, prior_variance,\
            poster_mu, poster_sigma, poster_log_sigma = self.VAE_model(input_qw,input_qe)

        vae_total_loss, KL, RL_w, RL_e = self.vae_loss(
            input_qw, logPw, input_qe, logPe, prior_mean, prior_variance,
            poster_mu, poster_sigma, poster_log_sigma)

        #vae_total_loss.backward(retain_graph=True)
        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        #torch.nn.utils.clip_grad_value_(self.VAE_model.parameters(), self.clip_grad)  # clip_grad_norm(, )
        #self.vae_optimizer.step()

        return topic_embeddings, vae_total_loss, KL.data, RL_w.data, RL_e.data

    def train(self, input_qw, input_qe, input_dw_pos, input_de_pos,
              input_dw_neg, input_de_neg):
        # Turn on training mode which enables dropout.
        self.model.train()
        self.model.zero_grad()
        self.ranker_optimizer.zero_grad()

        topic_embeddings, vae_total_loss, KL_loss, RL_w_loss, RL_e_loss = self.train_VAE(
            input_qw, input_qe)

        score_pos, orth_loss_1 = self.model(input_qw, input_qe, input_dw_pos,
                                            input_de_pos, topic_embeddings)
        score_neg, orth_loss_2 = self.model(input_qw, input_qe, input_dw_neg,
                                            input_de_neg, topic_embeddings)

        rank_loss = torch.sum(torch.clamp(1.0 - score_pos + score_neg, min=0))
        vae_weight = self.config['intent_lambda']

        orth_loss = (orth_loss_1 + orth_loss_2) / 2
        total_loss = rank_loss + vae_weight * vae_total_loss + orth_loss
        total_loss.backward()

        ## update parameters
        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_value_(self.VAE_model.parameters(),
                                        self.clip_grad)  # clip_grad_norm(, )
        torch.nn.utils.clip_grad_value_(self.model.parameters(),
                                        self.clip_grad)  #clip_grad_norm(, )

        self.ranker_optimizer.step()
        self.vae_optimizer.step()

        return rank_loss.data, vae_total_loss.data, KL_loss, RL_w_loss, RL_e_loss

    def predict(self, input_qw, input_qe, input_dw, input_de):
        # Turn on evaluation mode which disables dropout.
        with torch.no_grad():
            self.VAE_model.eval()
            self.model.eval()
            topic_embeddings = self.VAE_model(input_qw, input_qe)
            rels_predicted, _ = self.model(input_qw, input_qe, input_dw,
                                           input_de, topic_embeddings)

        return rels_predicted

    def save_checkpoint(self, message):
        filePath = os.path.join(self.saveModeladdr)
        #if not os.path.exists(filePath):
        #    os.makedirs(filePath)
        torch.save(
            {
                'vae_state_dict': self.VAE_model.state_dict(),
                'rank_state_dict': self.model.state_dict(),
                'vae_optimizer': self.vae_optimizer.state_dict(),
                'rank_optimizer': self.ranker_optimizer.state_dict()
            }, filePath)

    def get_embeddings(self):
        word_embeddings = self.model.word_emb.weight.detach().cpu().numpy()
        ent_embeddings = self.model.ent_emb.weight.detach().cpu().numpy()
        topic_embeddings = self.model.topic_embedding.detach().cpu().numpy()

        print 'Topic size: ', topic_embeddings.shape[0]
        cPickle.dump((word_embeddings, ent_embeddings, topic_embeddings),
                     open('./topic_analysis/w_e_t_embedding.pkl', 'w'))
        print 'saved'
        return
Example #11
0
def main():
    logger = logging.getLogger(__name__)
    handler1 = logging.StreamHandler()
    handler1.setLevel(logging.INFO)
    handler2 = logging.FileHandler(filename=args.log_file, mode='w')
    handler2.setFormatter(
        logging.Formatter("%(asctime)s %(levelname)8s %(message)s"))
    handler2.setLevel(logging.INFO)
    logger.setLevel(logging.INFO)
    logger.addHandler(handler1)
    logger.addHandler(handler2)

    tokenizer = Tokenizer(args.vocab_file)
    train_dataset = SentenceDataset(args.train_file, tokenizer.encode)
    valid_dataset = SentenceDataset(args.valid_file, tokenizer.encode)
    train_loader = DataLoader(train_dataset,
                              args.batch_size,
                              shuffle=True,
                              collate_fn=train_dataset.collate_fn,
                              drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              args.batch_size,
                              shuffle=False,
                              collate_fn=valid_dataset.collate_fn,
                              drop_last=True)

    model = VAE(
        num_embeddings=len(tokenizer),
        dim_embedding=args.dim_embedding,
        dim_hidden=args.dim_hidden,
        dim_latent=args.dim_latent,
        num_layers=args.num_layers,
        bidirectional=args.bidirectional,
        dropout=args.dropout,
        word_dropout=args.word_dropout,
        dropped_index=tokenizer.unk_index,
    ).to(device)

    annealer = KLAnnealer(x0=args.x0, k=args.k)

    criterion = LmCrossEntropyLoss(tokenizer.pad_index, reduction='batchmean')
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.learning_rate,
                                 betas=(0.9, 0.98),
                                 eps=1e-09)

    logger.info('Start training')
    for epoch in range(args.num_epochs):
        train_loss, train_ce_loss, train_kl_loss, valid_loss, valid_ce_loss, valid_kl_loss = 0., 0., 0., 0., 0., 0.
        pbar = tqdm(train_loader)
        pbar.set_description("[Epoch %d/%d]" % (epoch, args.num_epochs))

        # Train
        model.train()
        for itr, s in enumerate(pbar):
            beta = annealer()

            s = s.to(device)
            length = torch.sum(s != tokenizer.pad_index, dim=-1)
            output, mean, logvar, z = model(s, length)
            ce_loss = criterion(output[:, :-1, :], s[:, 1:])
            kl_loss = -0.5 * torch.mean(
                torch.sum(1 + logvar - mean.pow(2) - logvar.exp(), dim=-1))
            loss = ce_loss + beta * kl_loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            annealer.step()

            train_loss += loss.item()
            train_ce_loss += ce_loss.item()
            train_kl_loss += kl_loss.item()
            if itr % args.print_every == 0:
                pbar.set_postfix(loss=train_loss / (itr + 1), beta=beta)
        train_loss /= len(train_loader)
        train_ce_loss /= len(train_loader)
        train_kl_loss /= len(train_loader)

        # Valid
        model.eval()
        with torch.no_grad():
            for s in valid_loader:
                beta = annealer()

                s = s.to(device)
                length = torch.sum(s != tokenizer.pad_index, dim=-1)
                output, mean, logvar, z = model(s, length)
                ce_loss = criterion(output[:, :-1, :], s[:, 1:])
                kl_loss = -0.5 * torch.mean(
                    torch.sum(1 + logvar - mean.pow(2) - logvar.exp(), dim=-1))
                loss = ce_loss + beta * kl_loss

                valid_loss += loss.item()
                valid_ce_loss += ce_loss.item()
                valid_kl_loss += kl_loss.item()
            valid_loss /= len(valid_loader)
            valid_ce_loss /= len(valid_loader)
            valid_kl_loss /= len(valid_loader)

        logger.info(
            '[Epoch %d/%d] Training loss: %.2f, CE loss: %.2f, KL loss: %.2f, Validation loss: %.2f, CE loss: %.2f, KL loss: %.2f'
            % (
                epoch,
                args.num_epochs,
                train_loss,
                train_ce_loss,
                train_kl_loss,
                valid_loss,
                valid_ce_loss,
                valid_kl_loss,
            ))

        torch.save(model.state_dict(), args.checkpoint_file)
Example #12
0
class ReconstructionBERTTrainer:
    """
    BERTTrainer make the pretrained BERT model with two LM training method.

        1. Masked Language Model : 3.3.1 Task #1: Masked LM
        2. Next Sentence prediction : 3.3.2 Task #2: Next Sentence Prediction

    please check the details on README.md with simple example.

    """
    def __init__(self,
                 bert: BERT,
                 vocab_size: int,
                 markdown_vocab_size,
                 markdown_emb_size,
                 train_dataloader: DataLoader,
                 test_dataloader: DataLoader,
                 lr: float = 1e-4,
                 betas=(0.9, 0.999),
                 weight_decay: float = 0.01,
                 warmup_steps=10000,
                 with_cuda: bool = True,
                 cuda_devices=None,
                 log_freq: int = 10,
                 pad_index=0,
                 loss_lambda=1,
                 model_path=None,
                 n_topics=50,
                 weak_supervise=False,
                 context=False,
                 markdown=False,
                 hinge_loss_start_point=20,
                 entropy_start_point=30):
        """
        :param bert: BERT model which you want to train
        :param vocab_size: total word vocab size
        :param train_dataloader: train dataset data loader
        :param test_dataloader: test dataset data loader [can be None]
        :param lr: learning rate of optimizer
        :param betas: Adam optimizer betas
        :param weight_decay: Adam optimizer weight decay param
        :param with_cuda: traning with cuda
        :param log_freq: logging frequency of the batch iteration
        :param context: use information from neighbor cells
        """

        # Setup cuda device for BERT training, argument -c, --cuda should be true
        self.loss_lambda = loss_lambda
        self.n_topics = n_topics
        self.weak_supervise = weak_supervise
        self.context = context
        self.markdown = markdown
        self.hinge_loss_start_point = hinge_loss_start_point
        self.entropy_start_point = entropy_start_point

        cuda_condition = torch.cuda.is_available() and with_cuda

        self.device = torch.device("cuda:0" if cuda_condition else "cpu")

        # This BERT model will be saved every epoch
        self.bert = bert
        # Initialize the BERT Language Model, with BERT model
        self.model = VAE(bert,
                         vocab_size,
                         markdown_vocab_size,
                         markdown_emb_size,
                         n_topics=n_topics,
                         weak_supervise=weak_supervise,
                         context=context,
                         markdown=markdown).to(self.device)
        if model_path:
            self.model.load_state_dict(
                torch.load(model_path)["model_state_dict"])
            last_epoch = int(model_path.split('.')[-1][2:])
            self.last_epoch = last_epoch

        else:
            self.last_epoch = None
            # raise NotImplementedError
            # pdb.set_trace()
            # Distributed GPU training if CUDA can detect more than 1 GPU
        if with_cuda and torch.cuda.device_count() > 1:
            # pdb.set_trace()
            print("Using %d GPUS for BERT" % torch.cuda.device_count())
            self.model = nn.DataParallel(self.model, device_ids=cuda_devices)
        # pdb.set_trace()
        # Setting the train and test data loader
        self.train_data = train_dataloader
        self.test_data = test_dataloader

        self.pad_index = pad_index
        # Setting the Adam optimizer with hyper-param
        # self.optim = Adam(self.model.parameters(), lr=lr,
        #                   betas=betas, weight_decay=weight_decay)
        # self.optim_schedule = ScheduledOptim(
        #     self.optim, self.bert.hidden, n_warmup_steps=warmup_steps)
        self.optim = SGD(self.model.parameters(), lr=lr, momentum=0.9)
        if self.last_epoch and self.last_epoch >= self.hinge_loss_start_point:
            self.optim = SGD(self.model.parameters(), lr=0.00002, momentum=0.9)

        # Using Negative Log Likelihood Loss function for predicting the masked_token
        # self.criterion = nn.NLLLoss(ignore_index=self.pad_index)
        self.best_loss = None
        self.updated = False
        self.log_freq = log_freq
        self.cross_entropy = nn.CrossEntropyLoss(ignore_index=0)

        print("Total Parameters:",
              sum([p.nelement() for p in self.model.parameters()]))

    def train(self, epoch):
        self.model.train()
        # self.optim.zero_grad()
        return self.iteration(epoch, self.train_data)

    def test(self, epoch):
        self.model.eval()
        with torch.no_grad():
            loss = self.iteration(epoch, self.test_data, train=False)
        return loss

    def api(self, data_loader=None):
        self.model.eval()
        # str_code = "train" if train else "test"
        if not data_loader:
            data_loader = self.test_data

        # Setting the tqdm progress bar
        data_iter = tqdm.tqdm(
            enumerate(data_loader),
            # desc="EP_%s:%d" % (str_code, epoch),
            total=len(data_loader),
            bar_format="{l_bar}{r_bar}")

        avg_loss = 0.0
        total_correct = 0
        total_element = 0

        # for (i, data), (ni, ndata) in data_iter, neg_data_iter:
        phases = []
        stages = []
        stage_vecs = []
        with torch.no_grad():
            for i, item in data_iter:
                data = item[0]
                ndata = item[1]
                data = {
                    key: value.to(self.device)
                    for key, value in data.items()
                }
                ndata = {
                    key: value.to(self.device)
                    for key, value in ndata.items()
                }

                # 0. batch_data will be sent into the device(GPU or cpu)
                data = {
                    key: value.to(self.device)
                    for key, value in data.items()
                }
                ndata = {
                    key: value.to(self.device)
                    for key, value in ndata.items()
                }
                # pdb.set_trace()
                # 1. forward the next_sentence_prediction and masked_lm model
                # pdb.set_trace()
                reconstructed_vec, graph_vec, origin_neg, topic_dist, stage_vec = self.model.forward(
                    data["bert_input"],
                    ndata["bert_input"],
                    data["segment_label"],
                    ndata["segment_label"],
                    data["adj_mat"],
                    ndata["adj_mat"],
                    train=False,
                    context_topic_dist=data["context_topic_vec"],
                    markdown_label=data["markdown_label"],
                    markdown_len=data["markdown_len"],
                    neg_markdown_label=ndata["markdown_label"],
                    neg_markdown_len=ndata["markdown_len"])
                data_loader.dataset.update_topic_dist(topic_dist, data["id"])

                phases += torch.max(topic_dist, 1)[-1].tolist()
                # print(torch.max(stage_vec, 1)[-1].tolist())
                stages += torch.max(stage_vec, 1)[-1].tolist()
                stage_vecs += stage_vec.tolist()
                # pdb.set_trace()
        return phases, stages, stage_vecs

    def iteration(self, epoch, data_loader, train=True):
        """
        loop over the data_loader for training or testing
        if on train status, backward operation is activated
        and also auto save the model every peoch

        :param epoch: current epoch index
        :param data_loader: torch.utils.data.DataLoader for iteration
        :param train: boolean value of is train or test
        :return: None
        """
        str_code = "train" if train else "test"

        # Setting the tqdm progress bar
        data_iter = tqdm.tqdm(enumerate(data_loader),
                              desc="EP_%s:%d" % (str_code, epoch),
                              total=len(data_loader),
                              bar_format="{l_bar}{r_bar}")

        avg_loss = 0.0
        total_correct = 0

        # def calculate_iter(data):

        for i, item in data_iter:
            data = item[0]
            ndata = item[1]

            data = {key: value.to(self.device) for key, value in data.items()}
            ndata = {
                key: value.to(self.device)
                for key, value in ndata.items()
            }

            # 1. forward the next_sentence_prediction and masked_lm model

            reconstructed_vec, graph_vec, origin_neg, topic_dist, stage_vec = self.model.forward(
                data["bert_input"],
                ndata["bert_input"],
                data["segment_label"],
                ndata["segment_label"],
                data["adj_mat"],
                ndata["adj_mat"],
                train=train,
                context_topic_dist=data["context_topic_vec"],
                markdown_label=data["markdown_label"],
                markdown_len=data["markdown_len"],
                neg_markdown_label=ndata["markdown_label"],
                neg_markdown_len=ndata["markdown_len"])
            # pdb.set_trace()
            if self.context:
                data_loader.dataset.update_topic_dist(topic_dist, data["id"])
            bs, hid_size = reconstructed_vec.shape
            nbs, hid_size = origin_neg.shape
            duplicate = int(nbs / bs)
            # pdb.set_trace()
            # if str_code == 'test':
            #     pdb.set_trace()
            hinge_loss = my_loss(reconstructed_vec, graph_vec, origin_neg)
            weight_loss = torch.norm(
                torch.mm(self.model.reconstruction.weight.T,
                         self.model.reconstruction.weight) -
                torch.eye(self.n_topics).cuda())
            loss = self.loss_lambda * weight_loss + hinge_loss
            # if self.weak_supervise:

            c_entropy = self.cross_entropy(stage_vec, data['stage'])

            entropy = -1 * (F.softmax(stage_vec, dim=1) *
                            F.log_softmax(stage_vec, dim=1)).sum()

            loss += 2 * c_entropy  # + 0.001 * entropy
            if epoch < self.hinge_loss_start_point:
                loss = c_entropy
            # else:
            elif epoch < self.entropy_start_point:
                loss = c_entropy + self.loss_lambda * weight_loss + hinge_loss
            else:
                loss = c_entropy + entropy + self.loss_lambda * weight_loss + hinge_loss

            if epoch == self.hinge_loss_start_point:
                self.optim = SGD(self.model.parameters(),
                                 lr=0.00002,
                                 momentum=0.9)

            # 3. backward and optimization only in train

            if train:
                self.optim.zero_grad()
                loss.backward()
                # self.optim.step_and_update_lr()
                self.optim.step()

            avg_loss += loss.item()

            post_fix = {
                "epoch": epoch,
                "iter": i,
                "avg_loss": avg_loss / (i + 1),
                # "avg_acc": total_correct / total_element * 100,
                "loss": loss.item(),
                "cross_entropy": c_entropy.item(),
                "entropy": entropy.item(),
                "hinge_loss": hinge_loss.item()
            }

            if i % self.log_freq == 0:
                data_iter.write(str(post_fix))

        print("EP%d_%s, avg_loss=" % (epoch, str_code),
              avg_loss / len(data_iter))
        return avg_loss / len(data_iter)

    def save(self, epoch, file_path="output/bert_trained.model"):
        """
        Saving the current BERT model on file_path

        :param epoch: current epoch number
        :param file_path: model output path which gonna be file_path+"ep%d" % epoch
        :return: final_output_path
        """
        output_path = file_path + ".ep%d" % epoch
        # if self.updated:
        #     return output_path
        # torch.save(self.bert.cpu(), output_path)
        torch.save(
            {
                'epoch': epoch,
                'model_state_dict': self.model.state_dict()
                # 'optimizer_state_dict': optimizer.state_dict(),
                # 'loss': loss,
                # ...
            },
            output_path)
        # self.bert.to(self.device)
        print("EP:%d Model Saved on:" % epoch, output_path)
        # self.updated = True
        return output_path
Example #13
0
reparam = False

for epoch in range(n_epochs):
    epoch_batch = 0
    verbose_loss = 0
    verbose_penalty = 0
    verbose_batch = 0
    epoch_train_elbo = 0
    epoch_val_elbo = 0

    # Evaluate and snapshot the model at each epoch (even before training)
    recs = []
    mus = []
    log_vars = []
    with torch.no_grad():
        model.eval()
        frame_idx = 0

        for test_data in test_loader:
            test_data = test_data.to(device)
            # test_data -= mean[None, ...]
            rec, penalty = model(test_data)
            mu, log_var = model.encoder(test_data)
            mus.append(mu.clone().detach().cpu().numpy())
            log_vars.append(log_var.clone().detach().cpu().numpy())
            if reparam:
                latent = reparameterize(mu, log_var)
            else:
                latent = mu
            rec = model.decoder(latent).cpu().numpy()
            recs.append(rec)
Example #14
0
def main(args):
    conf = None
    with open(args.config, 'r') as config_file:
        config = yaml.load(config_file, Loader=yaml.FullLoader)
        conf = config['combine']
        model_params = config['model']
        preprocess_params = config['preprocessor']
    date_time = time.strftime('%Y_%m_%d_%H_%M_%S', time.localtime())
    path = os.path.join(conf['save_path'], date_time)
    path = conf['save_path']

    model = VAE(model_params['roll_dim'], model_params['hidden_dim'],
                model_params['infor_dim'], model_params['time_step'], 12)

    model.load_state_dict(torch.load(conf['model_path']))
    if torch.cuda.is_available():
        print('Using: ',
              torch.cuda.get_device_name(torch.cuda.current_device()))
        model.cuda()
    else:
        print('CPU mode')
    model.eval()
    pitch_path = conf['p_path'] + ".txt"
    rhythm_path = conf['r_path'] + ".txt"
    #chord_path = conf['chord_path'] + ".txt"
    name1 = pitch_path.split("/")[-3]
    name2 = rhythm_path.split("/")[-3]
    name = name1 + "+" + name2 + ".mid"
    name2 = name1 + "+" + name2 + ".txt"

    pitch = np.loadtxt(pitch_path)
    print(pitch)
    rhythm = np.loadtxt(rhythm_path)
    print(rhythm)

    print("Importing " + name1 + " pitch and " + name2 + " rhythm")

    #line_graph(pitch,rhythm)
    #bar_graph(pitch,rhythm)

    pitch = torch.from_numpy(pitch).float()
    rhythm = torch.from_numpy(rhythm).float()
    recon = model.decoder(pitch, rhythm)

    recon = torch.squeeze(recon, 0)
    recon = mf._sampling(recon)
    recon = np.array(recon.cpu().detach().numpy())
    length = torch.sum(rhythm).int()
    recon = recon[:length]
    #打印生成的音符分布
    note = recon[:, :-2]
    note = np.nonzero(note)[1]
    note = np.bincount(note, minlength=34).astype(float)
    recon = mf.modify_pianoroll_dimentions(recon,
                                           preprocess_params['low_crop'],
                                           preprocess_params['high_crop'],
                                           "add")

    #bar_graph(pitch,rhythm)
    mf.numpy_to_midi(recon, 120, path, name,
                     preprocess_params['smallest_note'])

    #pitch_rhythm(recon,path,name2) # write pitch information

    print("combine succeed")
Example #15
0
        # print('ok2')
        loss.backward() #retain_graph=True
        optimizer.step()


        recon_loss_ += recon_loss.item() #+ kl_loss.item()
        kl_loss_ += kl_loss.item()
        loss_+=loss.item()
        # num_iter += real.size(0)
    # print('recon', recon_loss.item(), 'kl', kl_loss.item(), 'loss', loss.item())
    RE_LOSS.append(recon_loss_ / len_loader)
    KL_LOSS.append(kl_loss_ / len_loader)
    LOSS.append(loss_ / len_loader)

    with torch.no_grad():
        vae.eval()
        ff = vae.dec(fixed_noise).detach().cpu()
    img_list.append(vutils.make_grid(ff, padding=2, normalize=True))

    print('EPOCH %d : recon_loss : %.4f , kl_loss = %.4f , loss = %.4f ' % (epoch, recon_loss_ / len_loader, kl_loss_ / len_loader, loss_ / len_loader))
    ## save fig
    plt.axis('off')
    plt.imshow(np.transpose(img_list[-1], (1, 2, 0)))
    plt.savefig('glips.png', format='png')
    plt.close()

    ### save plot
    x = [i for i in range(epoch + 1)]
    plt.plot(x, RE_LOSS, label='RE_LOSS')
    plt.plot(x, KL_LOSS, label='KL_LOSS')
    plt.plot(x, LOSS, label='LOSS')
Example #16
0
class Trainer(object):
    def __init__(self, args):
        self.args = args

        torch.manual_seed(self.args.seed)
        if self.args.cuda:
            torch.cuda.manual_seed(self.args.seed)

        kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
        train_loader = torch.utils.data.DataLoader(
            datasets.MNIST('./data',
                           train=True,
                           download=True,
                           transform=transforms.ToTensor()),
            batch_size=self.args.batch_size,
            shuffle=True,
            **kwargs)
        test_loader = torch.utils.data.DataLoader(
            datasets.MNIST('./data',
                           train=False,
                           transform=transforms.ToTensor()),
            batch_size=self.args.batch_size,
            shuffle=True,
            **kwargs)
        self.train_loader = train_loader
        self.test_loader = test_loader

        self.model = VAE()
        if self.args.cuda:
            self.model.cuda()

        self.optimizer = optim.Adam(self.model.parameters(), lr=1e-3)

    def loss_function(self, recon_x, x, mu, logvar):
        BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784))
        KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
        KLD /= self.args.batch_size * 784
        return BCE + KLD

    def train_one_epoch(self, epoch):
        train_loader = self.train_loader
        args = self.args

        self.model.train()
        train_loss = 0
        for batch_idx, (data, _) in enumerate(train_loader):
            data = Variable(data)
            if args.cuda:
                data = data.cuda()
            self.optimizer.zero_grad()
            recon_batch, mu, logvar = self.model(data)
            loss = self.loss_function(recon_batch, data, mu, logvar)
            loss.backward()
            train_loss += loss.data[0]
            self.optimizer.step()
            if batch_idx % args.log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader),
                    loss.data[0] / len(data)))
        print('=====> Epoch: {} Average loss: {:.4f}'.format(
            epoch, train_loss / len(train_loader.dataset)))

    def test(self, epoch):
        test_loader = self.test_loader
        args = self.args

        self.model.eval()
        test_loss = 0
        for i, (data, _) in enumerate(test_loader):
            if args.cuda:
                data = data.cuda()
            data = Variable(data, volatile=True)
            recon_batch, mu, logvar = self.model(data)
            test_loss += self.loss_function(recon_batch, data, mu,
                                            logvar).data[0]
            if i == 0:
                n = min(data.size(0), 8)
                comparison = torch.cat([
                    data[:n],
                    recon_batch.view(args.batch_size, 1, 28, 28)[:n]
                ])
                fname = 'results/reconstruction_' + str(epoch) + '.png'
                save_image(comparison.data.cpu(), fname, nrow=n)

        test_loss /= len(test_loader.dataset)
        print('=====> Test set loss: {:.4f}'.format(test_loss))

    def train(self):
        args = self.args
        for epoch in range(1, args.epochs + 1):
            self.train_one_epoch(epoch)
            self.test(epoch)
            sample = Variable(torch.randn(64, 20))
            if args.cuda:
                sample = sample.cuda()
            sample = self.model.decode(sample).cpu()
            save_image(sample.data.view(64, 1, 28, 28),
                       './results/sample_' + str(epoch) + '.png')
Example #17
0
class AudioToBodyDynamics(object):
    """
    Defines a wrapper class for training and evaluating a model.
    Inputs:
           args    (argparse object):      model settings
           generator (tuple DataLoader):   a tuple of at least one DataLoader
    """
    def __init__(self, args, generator, freestyle=False):
        # TODO
        super(AudioToBodyDynamics, self).__init__()
        self.device = args.device
        self.log_frequency = args.log_frequency

        self.is_freestyle_mode = freestyle

        self.generator = generator
        self.model_name = args.model_name
        self.ident = args.ident
        self.model_name = args.model_name

        input_dim, output_dim = generator[0].dataset.getDimsPerBatch()

        model_options = {
            'seq_len': args.seq_len,
            'device': args.device,
            'dropout': args.dp,
            'batch_size': args.batch_size,
            'hidden_dim': args.hidden_size,
            'input_dim': input_dim,
            'output_dim': output_dim,
            'trainable_init': args.trainable_init
        }

        if args.model_name == "AudioToJointsThree":
            from model import AudioToJointsThree
            self.model = AudioToJointsThree(model_options).cuda(args.device)
        elif args.model_name == 'AudioToJointsNonlinear':
            from model import AudioToJointsNonlinear
            self.model = AudioToJointsNonlinear(model_options).cuda(
                args.device)
        elif args.model_name == "AudioToJoints":
            from model import AudioToJoints
            self.model = AudioToJoints(model_options).cuda(args.device)
        elif args.model_name == 'JointsToJoints':
            from model import JointsToJoints
            self.model = JointsToJoints(model_options).cuda(
                args.device).double()
        elif args.model_name == 'LSTMToDense':
            from model import LSTMToDense
            self.model = LSTMToDense(model_options).cuda(args.device).double()
        elif args.model_name == 'AudioToJointsSeq2Seq':
            from model import AudioToJointsSeq2Seq
            self.model = AudioToJointsSeq2Seq(model_options).cuda(
                args.device).double()
        elif args.model_name == 'MDNRNN':
            from model import MDNRNN
            self.model = MDNRNN(model_options).cuda(args.device).double()
        elif args.model_name == 'VAE':
            from model import VAE
            self.model = VAE(model_options).cuda(args.device).double()

        # construct the model
        self.optim = optim.Adam(self.model.parameters(), lr=args.lr)

        # Load checkpoint model
        if self.is_freestyle_mode:
            path = f"{model_dir}{args.model_name}_{str(args.ident)}.pth"
            print(path)
            self.loadModelCheckpoint(path)

    # general loss function
    def buildLoss(self, predictions, targets):
        square_diff = (predictions - targets)**2
        out = torch.sum(square_diff, -1, keepdim=True)
        return torch.mean(out)

    def mdn_loss(self, y, pi, mu, sigma):
        m = torch.distributions.Normal(loc=mu, scale=sigma)
        loss = torch.exp(m.log_prob(y))
        loss = torch.sum(loss * pi, dim=2)
        loss = -torch.log(loss)
        return torch.mean(loss)

    # Loss function from https://github.com/pytorch/examples/blob/master/vae/main.py,
    # Appendix B of https://github.com/pytorch/examples/blob/master/vae/main.py
    def vae_loss(self, targets, recon_targets, mu, logvar):
        BCE = nn.functional.binary_cross_entropy(recon_targets,
                                                 targets,
                                                 reduction='sum')
        KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
        return BCE + KLD

    def saveModel(self, state_info, path):
        torch.save(state_info, path)

    def loadModelCheckpoint(self, path):
        checkpoint = torch.load(path, map_location=self.device)
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.optim.load_state_dict(checkpoint['optim_state_dict'])

    def runNetwork(self, inputs, targets):
        """
        Train on one given mfcc pose pair
        Args:
             inputs (array): [batch, seq_len, mfcc_features * 3]
             targets (array): [batch, seq_len, 19 * 2 poses]
        Returns:
             predictions, truth, loss
        """
        def to_numpy(x):
            # import from gpu device to cpu, convert to numpy
            return x.cpu().data.numpy()

        inputs = Variable(torch.DoubleTensor(inputs.double()).to(self.device))

        # reshape targets into (batch * seq_len, input features)
        targets = Variable(torch.DoubleTensor(targets).to(self.device))

        if self.model_name == 'AudioToJointsSeq2Seq':
            predictions = self.model.forward(inputs, targets)
        elif self.model_name == 'VAE':
            predictions, mu, logvar = self.model.forward(inputs)
        else:
            predictions = self.model.forward(inputs)

        criterion = nn.L1Loss()
        if self.model_name == 'AudioToJointsSeq2Seq':
            loss = criterion(predictions.to(self.device),
                             targets.to(self.device).float())
        elif self.model_name == 'MDNRNN':
            # predictions = (pi, mu, sigma), (h, c)
            loss = self.mdn_loss(targets, predictions[0][0], predictions[0][1],
                                 predictions[0][2])
        elif self.model_name == 'VAE':
            loss = self.vae_loss(targets, predictions, mu, logvar)
        else:
            loss = criterion(predictions, targets)
        return (to_numpy(predictions), to_numpy(targets)), loss

    def runEpoch(self):
        # given one epoch
        train_losses = []  #coeff_losses
        val_losses = []
        predictions, targets = [], []

        if not self.is_freestyle_mode:  # train
            # for each data point
            for mfccs, poses in self.generator[0]:
                self.model.train()  # pass train flag to model

                pred_targs, train_loss = self.runNetwork(mfccs, poses)
                self.optim.zero_grad()
                train_loss.backward()
                self.optim.step()
                train_loss = train_loss.data.tolist()
                train_losses.append(train_loss)

            # validation loss
            for mfccs, poses in self.generator[1]:
                self.model.eval()
                pred_targs, val_loss = self.runNetwork(mfccs, poses)

                val_loss = val_loss.data.tolist()
                val_losses.append(val_loss)
                pred = pred_targs[0].reshape(
                    int(pred_targs[0].shape[0] * pred_targs[0].shape[1]), 19,
                    2)
                predictions.append(pred)
                targets.append(pred_targs[1])

        # test or predict / play w/ model
        if self.is_freestyle_mode:
            for mfccs, poses in self.generator[0]:
                self.model.eval()
                # mfccs = mfccs.float()
                pred_targs, val_loss = self.runNetwork(mfccs, poses)
                val_loss = val_loss.data.tolist()
                val_losses.append(val_loss)
                pred = pred_targs[0].reshape(
                    int(pred_targs[0].shape[0] * pred_targs[0].shape[1]), 19,
                    2)
                predictions.append(pred)
                targets.append(pred_targs[1])

        return train_losses, val_losses, predictions, targets

    def trainModel(self, max_epochs, logfldr, model_dir):
        # TODO
        log.debug("Training model")
        epoch_losses = []
        batch_losses = []
        val_losses = []
        i, best_loss, iters_without_improvement = 0, float('inf'), 0
        best_train_loss, best_val_loss = float('inf'), float('inf')

        if logfldr:
            if logfldr[-1] != '/':
                logfldr += '/'
        filename = f'{logfldr}epoch_of_model_{str(self.ident)}.txt'
        state_info = {
            'epoch': i,
            'epoch_losses': epoch_losses,
            'batch_losses': batch_losses,
            'validation_losses': val_losses,
            'model_state_dict': self.model.state_dict(),
            'optim_state_dict': self.optim.state_dict(),
        }

        for i in range(max_epochs):
            if int(i / 10) == 0:
                if i == 0:
                    with open(filename, 'w') as f:
                        f.write(f"Epoch: {i} started\n")
                else:
                    with open(filename, 'a+') as f:
                        f.write(f"Epoch: {i} started\n")
                # save the model
                if model_dir:
                    if model_dir[-1] != '/':
                        model_dir += '/'
                path = f"{model_dir}{self.model_name}_{str(self.ident)}.pth"
                self.saveModel(state_info, path)

            # train_info, val_info, predictions, targets
            iter_train, iter_val, predictions, targets = self.runEpoch()

            iter_mean = np.mean(iter_train)
            iter_val_mean = np.mean(iter_val)
            # iter_val_mean = np.mean(iter_val[0]), np.mean(iter_val[1])

            epoch_losses.append(iter_mean)
            batch_losses.extend(iter_train)
            val_losses.append(iter_val_mean)

            log.info("Epoch {} / {}".format(i, max_epochs))
            log.info(f"Training Loss : {iter_mean}")
            log.info(f"Validation Loss : {iter_val_mean}")

            best_train_loss = iter_mean if iter_mean < best_train_loss else best_train_loss
            best_val_loss = iter_val_mean if iter_val_mean < best_val_loss else best_val_loss

        # Visualize VAE latent space
        if self.model_name == 'VAE':
            self.vae_plot()

        self.plotResults(logfldr, epoch_losses, batch_losses, val_losses)
        path = f"{model_dir}{self.model_name}_{str(self.ident)}.pth"
        self.saveModel(state_info, path)
        return best_train_loss, best_val_loss

    # plot random subset of poses in VAE latent space
    def vae_plot(self):
        z_list = torch.Tensor(1, 2)
        poses = []
        for input, output in self.generator:
            for inp in input:
                poses.append(inp)
            mu, logvar = self.model.encode(input)
            z = self.model.reparameterize(mu, logvar)
            z2 = z[:, -1, :]
            z_list = torch.cat((z_list.double(), z2.double()), 0)

        indices = np.random.randint(low=1, high=z_list.shape[0], size=1000)
        coords = np.array([z_list[ind, :].detach().numpy() for ind in indices])

        # # k-means clustering for coloring
        # kmeans = KMeans(n_clusters=5).fit(coords)
        # y_kmeans = kmeans.predict(coords)
        # plt.scatter(coords[:,0], coords[:,1], c=y_kmeans, cmap='viridis')
        # plt.show()
        #
        # # draw each mean pose
        # centers = kmeans.cluster_centers_
        # recons = [self.model.decode(torch.from_numpy(center)).detach().numpy().reshape(19,2) for center in centers]

        # k-medoids clustering for coloring
        kmedoids = KMedoids(n_clusters=5).fit(coords)
        y_kmedoids = kmedoids.predict(coords)
        plt.scatter(coords[:, 0], coords[:, 1], c=y_kmedoids, cmap='viridis')
        plt.show()

        recons = []
        for center in kmedoids.cluster_centers_:
            c = np.array(center)
            for i in range(len(coords)):
                if np.array_equal(c, coords[i]):
                    recons.append(poses[indices[i] -
                                        1].detach().numpy().reshape(19, 2))

        self.draw_poses(np.array(recons))

    # Takes in np array of poses that are each 19x2 arrays
    def draw_poses(self, poses):
        count = 0
        shift_by = np.array([750, 800]) - poses[0][8]
        poses += shift_by
        for pose in poses:
            person_id = str(0) + ", " + str([0])
            canvas = draw_pose_figure(person_id, pose)
            file_name = "images/" + f"{count:05}.jpg"
            cv2.imwrite(file_name, canvas)
            count += 1

    def plotResults(self, logfldr, epoch_losses, batch_losses, val_losses):
        losses = [epoch_losses, batch_losses, val_losses]
        names = [["Epoch loss"], ["Batch loss"], ["Val loss"]]
        _, ax = plt.subplots(nrows=len(losses), ncols=1)
        for index, pair in enumerate(zip(losses, names)):
            data = [pair[0][j] for j in range(len(pair[0]))]
            ax[index].plot(data, label=pair[1])
            ax[index].legend()
        if logfldr:
            if logfldr[-1] != '/':
                logfldr += '/'
        save_filename = os.path.join(
            logfldr, f"{self.model_name}_{str(self.ident)}_results.png")
        plt.savefig(save_filename)
        plt.close()
class Experiment():

    def __init__(self, args):
        self.args = args

        # data
        self.train_loader = DataLoader(
            datasets.MNIST('./data', train=True, download=True,
                           transform=transforms.ToTensor()),
            batch_size=args.batch_size, num_workers=args.n_workers, shuffle=True
        )
        self.test_loader = DataLoader(
            datasets.MNIST('./data', train=False, download=True,
                           transform=transforms.ToTensor()),
            batch_size=args.batch_size, num_workers=args.n_workers, shuffle=False
        )
        self.model = VAE().to(args.device)
        self.loss = VAE.loss
        self.optimizer = optim.Adam(self.model.parameters(), lr=1e-3)

    def train(self, epoch):
        self.model.train()
        train_loss = 0.
        n_samples = 0
        iter_i = 1
        for data, _ in self.train_loader:
            n_samples += len(data)
            data = data.to(self.args.device)
            self.optimizer.zero_grad()
            x_rec, mu, logvar = self.model(data)
            loss = self.loss(data, x_rec, mu, logvar)
            loss.backward()
            train_loss += loss.item()  # * len(data)
            self.optimizer.step()

            if iter_i % self.args.log_freq == 0:
                print('Epoch {} Train [{}/{}]:  LossAvg {:.4f}'.format(
                    epoch, n_samples, len(self.train_loader.dataset), loss.item() / len(data)))

            iter_i += 1
        print('Epoch {} Train: LossAvg {:.4f}'.format(
            epoch, train_loss / len(self.train_loader.dataset)))

    def test(self, epoch):
        self.model.eval()
        test_loss = 0.
        iter_i = 1
        with torch.no_grad():
            for data, _ in self.test_loader:
                data = data.to(self.args.device)
                x_rec, mu, logvar = self.model(data)
                loss = self.loss(data, x_rec, mu, logvar)
                test_loss += loss.item()  # * len(data)

                if iter_i == 1:
                    n = min(data.size(0), 8)
                    comparison = torch.cat(
                        [data[:n], x_rec.view(self.args.batch_size, 1, 28, 28)[:n]])

                iter_i += 1

        print('Epoch {} Test: LossAvg {:.4f}'.format(
            epoch, test_loss / len(self.test_loader.dataset)))
        self.save(epoch)
        return comparison.cpu()

    def save(self, epoch):
        torch.save(self.model.state_dict(), './results/checkpoint_{}.pt'.format(epoch))

    def run(self):
        for epoch_i in range(1, 1 + self.args.epochs):
            self.train(epoch_i)
            comparison = self.test(epoch_i)
            n = 8
            save_image(comparison, './results/comparison_{}.png'.format(epoch_i), nrow=n)
            visualize = make_grid(comparison, nrow=n)
            show(visualize.cpu())
Example #19
0
plt.figure(figsize=(6, 10))
rows, cols = 5, 2
for i in range(rows):
    plt.subplot(rows, cols, 2 * i + 1)
    plt.imshow(x[i].reshape(28, 28), vmin=0, vmax=1, cmap="Greys_r")
    plt.title("test input")
    plt.colorbar()
    plt.subplot(rows, cols, 2 * i + 2)
    plt.imshow(x_reconstr[i].reshape(28, 28), vmin=0, vmax=1, cmap="Greys_r")
    plt.title("reconstruct")
    plt.colorbar()
    plt.tight_layout()
plt.show()
""" check generation
"""
vae_model.eval()
noise = torch.randn(batch_size, net_arch["n_z"], device=device)
images = vae_model.generate(noise).cpu().detach()
torch.save(images, gen_images_path)
print(f"{gen_images_path} saved")
""" check latent space, in order to do that, we need to train another VAE model with n_z=2
"""
net_arch["n_z"] = 2

vae_model_2d = VAE(net_arch, lr, batch_size, device)
print(vae_model_2d)

if not os.path.exists(model_2d_save_path):
    vae_model_2d.train()
    n_epoch = 50
    for epoch in range(n_epoch):
Example #20
0
class Runner(object):
    def __init__(self,
                 hparams,
                 train_size: int,
                 class_weight: Optional[Tensor] = None):
        # model, criterion
        self.model = VAE()

        # optimizer and scheduler
        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          lr=hparams.learning_rate,
                                          eps=hparams.eps,
                                          weight_decay=hparams.weight_decay)
        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            self.optimizer, **hparams.scheduler)
        self.bce = nn.BCEWithLogitsLoss(reduction='none')
        # self.kld = nn.KLDivLoss(reduction='sum')
        # device
        device_for_summary = self.__init_device(hparams.device,
                                                hparams.out_device)

        # summary
        self.writer = SummaryWriter(logdir=hparams.logdir)
        # TODO: fill in ~~DUMMY~~INPUT~~SIZE~~
        path_summary = Path(self.writer.logdir, 'summary.txt')
        if not path_summary.exists():
            print_to_file(path_summary, summary, (self.model, (40, 11)),
                          dict(device=device_for_summary))

        # save hyperparameters
        path_hparam = Path(self.writer.logdir, 'hparams.txt')
        if not path_hparam.exists():
            print_to_file(path_hparam, hparams.print_params)

    def __init_device(self, device, out_device):
        if device == 'cpu':
            self.in_device = torch.device('cpu')
            self.out_device = torch.device('cpu')
            self.str_device = 'cpu'
            return 'cpu'

        # device type: List[int]
        if type(device) == int:
            device = [device]
        elif type(device) == str:
            device = [int(device[-1])]
        else:  # sequence of devices
            if type(device[0]) != int:
                device = [int(d[-1]) for d in device]

        self.in_device = torch.device(f'cuda:{device[0]}')

        if len(device) > 1:
            if type(out_device) == int:
                self.out_device = torch.device(f'cuda:{out_device}')
            else:
                self.out_device = torch.device(out_device)
            self.str_device = ', '.join([f'cuda:{d}' for d in device])

            self.model = nn.DataParallel(self.model,
                                         device_ids=device,
                                         output_device=self.out_device)

        else:
            self.out_device = self.in_device
            self.str_device = str(self.in_device)

        self.model.cuda(self.in_device)
        self.bce.cuda(self.out_device)  ##

        torch.cuda.set_device(self.in_device)

        return 'cuda'

    # Running model for train, test and validation.
    def run(self, dataloader, mode: str, epoch: int):
        self.model.train() if mode == 'train' else self.model.eval()
        if mode == 'test':
            state_dict = torch.load(Path(self.writer.logdir, f'{epoch}.pt'),
                                    map_location='cpu')
            if isinstance(self.model, nn.DataParallel):
                self.model.module.load_state_dict(state_dict)
            else:
                self.model.load_state_dict(state_dict)
            path_test_result = Path(self.writer.logdir, f'test_{epoch}')
            os.makedirs(path_test_result, exist_ok=True)
        else:
            path_test_result = None

        avg_loss = 0.
        y = []
        y_est = []
        pred_prob = []

        pbar = tqdm(dataloader,
                    desc=f'{mode} {epoch:3d}',
                    postfix='-',
                    dynamic_ncols=True)

        for i_batch, batch in enumerate(pbar):
            # data
            x = batch['batch_x']
            x = x.to(self.in_device)  # B, F, T

            # forward
            reconstruct_x, mu, logvar = self.model(x)

            # loss
            BCE = self.bce(reconstruct_x, x.view(-1, 440)).mean(dim=1)  # (B,)
            if mode != 'test':
                loss = torch.mean(
                    BCE - 0.5 *
                    torch.sum(1 + logvar - mu.pow(2) - logvar.exp(), dim=1))
            else:
                loss = 0.

            if mode == 'train':
                # backward
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                loss = loss.item()

            elif mode == 'valid':
                loss = loss.item()

            else:
                y += batch['batch_y']
                y_est += (BCE < 0.5).int().tolist()
                pred_prob += BCE.tolist()

            pbar.set_postfix_str('')

            avg_loss += loss

        avg_loss = avg_loss / len(dataloader.dataset)

        y = np.array(y)
        y_est = np.array(y_est)
        pred_prob = np.array(pred_prob, dtype=np.float32)

        return avg_loss, (y, y_est, pred_prob)

    def step(self, valid_loss: float, epoch: int):
        """

        :param valid_loss:
        :param epoch:
        :return: test epoch or 0
        """
        # self.scheduler.step()
        self.scheduler.step(valid_loss)

        # print learning rate
        for param_group in self.optimizer.param_groups:
            self.writer.add_scalar('learning rate', param_group['lr'], epoch)

        if epoch % 5 == 0:
            torch.save((self.model.module.state_dict() if isinstance(
                self.model, nn.DataParallel) else self.model.state_dict(), ),
                       Path(hparams.logdir) / f'VAE_{epoch}.pt')
        return 0
                print('    bits : {:.2f}\n'.format(losses['bits'][-1]))

                print('     max : {:.6f}'.format(torch.max(weight_vec[-1])))
                print(' max idx : {:.0f}'.format(torch.argmax(weight_vec[-1])))
                print('     min : {:.6f}\n'.format(torch.min(weight_vec[-1])))

                # print('sparsity : {}'.format(sum(1 for x in weight_vec[-1] if x > 0.2)) / weight_vec)  # sparsity of weight vector
                print('*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*')

            # print('Epoch {}: train loss: {}'.format(epoch, loss.item()))

        # losses['weights'].append(weight_vec)

        # ================ validation ================
        with torch.no_grad():
            model.eval()  # Put model in evaluation mode

            for batch_idx, local_batch in enumerate(val_loader):
                # Transfer to GPU
                local_batch = local_batch.to(device, dtype=torch.float32)

                # Model computations
                x_hat, mu, logvar, q_z, _ = model(local_batch)
                loss, _, _, _, _, _ = criterion(
                    create_stft(local_batch).cpu(), x_hat.cpu(), q_z, 48, 1,
                    model.global_step, 0.2, device, weight_vec, balance,
                    loss_weight(epoch))

            print('loading validation loss...\n')
            print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
            print('Validation Loss: {}'.format(loss.item()))
Example #22
0
def main():
    # parse command line arguments
    parser = argparse.ArgumentParser(description="parse args")
    parser.add_argument('-d',
                        '--dataset',
                        default='shapes',
                        type=str,
                        help='dataset name',
                        choices=['shapes', 'faces'])
    parser.add_argument('-dist',
                        default='normal',
                        type=str,
                        choices=['normal', 'laplace', 'flow'])
    parser.add_argument('-n',
                        '--num-epochs',
                        default=1,
                        type=int,
                        help='number of training epochs')
    parser.add_argument('-b',
                        '--batch-size',
                        default=2048,
                        type=int,
                        help='batch size')
    parser.add_argument('-l',
                        '--learning-rate',
                        default=1e-3,
                        type=float,
                        help='learning rate')
    parser.add_argument('-z',
                        '--latent-dim',
                        default=10,
                        type=int,
                        help='size of latent dimension')
    parser.add_argument('--beta',
                        default=5,
                        type=float,
                        help='ELBO penalty term')
    parser.add_argument('--tcvae', action='store_true')
    parser.add_argument('--exclude-mutinfo', action='store_true')
    parser.add_argument('--beta-anneal', action='store_true')
    parser.add_argument('--lambda-anneal', action='store_true')
    parser.add_argument('--mss',
                        action='store_true',
                        help='use the improved minibatch estimator')
    parser.add_argument('--conv', action='store_true')
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--save', default='shapes')
    parser.add_argument('--log_freq',
                        default=200,
                        type=int,
                        help='num iterations per log')
    args = parser.parse_args()

    # torch.cuda.set_device(args.gpu)

    # data loader
    train_loader = setup_data_loaders(args, use_cuda=False)

    # setup the VAE
    prior_dist = dist.Normal()
    q_dist = dist.Normal()

    vae = VAE(z_dim=args.latent_dim,
              use_cuda=False,
              prior_dist=prior_dist,
              q_dist=q_dist,
              include_mutinfo=not args.exclude_mutinfo,
              tcvae=args.tcvae,
              mss=args.mss)

    # setup the optimizer
    optimizer = optim.Adam(vae.parameters(), lr=args.learning_rate)

    train_elbo = []

    # training loop
    dataset_size = len(train_loader.dataset)
    num_iterations = len(train_loader) * args.num_epochs
    iteration = 0
    # initialize loss accumulator
    elbo_running_mean = utils.running_avg_meter()
    while iteration < num_iterations:
        for i, x in enumerate(train_loader):
            iteration += 1
            batch_time = time.time()
            vae.train()
            anneal_kl(args, vae, iteration)
            optimizer.zero_grad()
            x = Variable(x)

            obj, elbo = vae.elbo(x, dataset_size)
            # if utils.isnan(obj).any():
            #     raise ValueError('NaN spotted in objective.')
            obj.mean().mul(-1).backward()
            elbo_running_mean.update(elbo.mean())
            optimizer.step()

            # report training diagnostics
            if iteration % args.log_freq == 0:
                train_elbo.append(elbo_running_mean.avg)
                print(
                    '[iteration %03d] time: %.2f \tbeta %.2f \tlambda %.2f training ELBO: %.4f (%.4f)'
                    % (iteration, time.time() - batch_time, vae.beta, vae.lamb,
                       elbo_running_mean.val, elbo_running_mean.avg))

                vae.eval()

                utils.save_checkpoint(
                    {
                        'state_dict': vae.state_dict(),
                        'args': args
                    }, args.save, 0)
                # eval('plot_vs_gt_' + args.dataset)(vae, train_loader.dataset,
                #     os.path.join(args.save, 'gt_vs_latent_{:05d}.png'.format(iteration)))

    # Report statistics after training
    vae.eval()
    utils.save_checkpoint({
        'state_dict': vae.state_dict(),
        'args': args
    }, args.save, 0)
    dataset_loader = DataLoader(train_loader.dataset,
                                batch_size=1000,
                                num_workers=1,
                                shuffle=False)
    logpx, dependence, information, dimwise_kl, analytical_cond_kl, marginal_entropies, joint_entropy = \
        elbo_decomposition(vae, dataset_loader)
    torch.save(
        {
            'logpx': logpx,
            'dependence': dependence,
            'information': information,
            'dimwise_kl': dimwise_kl,
            'analytical_cond_kl': analytical_cond_kl,
            'marginal_entropies': marginal_entropies,
            'joint_entropy': joint_entropy
        }, os.path.join(args.save, 'elbo_decomposition.pth'))
    # eval('plot_vs_gt_' + args.dataset)(vae, dataset_loader.dataset, os.path.join(args.save, 'gt_vs_latent.png'))
    return vae