def eval_elbo_l(model, guide, num_pars, vec_pars, svi_arg_l, param_state_l):
    # return [elbo(`model`, `guide`, _,
    #              ELBO(num_particles=`num_pars`, vectorize_particles=`vec_pars`))
    #         when params are set to param_state,
    #         for param_state in `param_state_l`]

    svi = SVI(model,
              guide,
              pyro.optim.Adam({}),
              loss=Trace_ELBO(num_particles=num_pars,
                              vectorize_particles=vec_pars))
    elbo_l = []
    cnt, cnt_prog = 0, max(1, int(len(param_state_l) / 20))

    for param_state in param_state_l:
        # set params
        pyro.get_param_store().set_state(param_state)

        # compute elbo
        loss = svi.evaluate_loss(*svi_arg_l)
        elbo_l.append(-loss)

        # print
        cnt += 1
        if cnt % cnt_prog == 0: print('.', end='')

    return elbo_l
예제 #2
0
    def _valid_epoch(self, epoch):
        """
        Validate after training an epoch

        :param epoch: Integer, current training epoch.
        :return: A log that contains information about validation
        """
        elbo = TraceGraph_ELBO(vectorize_particles=False, num_particles=4)
        svi = SVI(self.model.model, self.model.guide, self.optimizer, loss=elbo)
        imps = ImportanceSampler(self.model.model, self.model.guide,
                                 num_samples=4)

        self.model.eval()
        self.valid_metrics.reset()
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(self.valid_data_loader):
                data, target = data.to(self.device), target.to(self.device)
                loss = svi.evaluate_loss(observations=data) / data.shape[0]
                imps.sample(observations=data)
                log_likelihood = imps.get_log_likelihood().item() / data.shape[0]
                log_marginal = imps.get_log_normalizer().item() / data.shape[0]

                self.writer.set_step((epoch - 1) * len(self.valid_data_loader) + batch_idx, 'valid')
                self.valid_metrics.update('loss', loss)
                self.valid_metrics.update('log_likelihood', log_likelihood)
                self.valid_metrics.update('log_marginal', log_marginal)

                for met in self.metric_ftns:
                    metric_val = met(self.model.model, self.model.guide, data, target, 4)
                    self.valid_metrics.update(met.__name__, metric_val)

                if self.log_images:
                    self.writer.add_image('input', make_grid(data.cpu(), nrow=8, normalize=True))

        return self.valid_metrics.result()
예제 #3
0
def main(args):
    # load data
    print('loading training data...')
    dataset_directory = get_data_directory(__file__)
    dataset_path = os.path.join(dataset_directory, 'faces_training.csv')
    if not os.path.exists(dataset_path):
        try:
            os.makedirs(dataset_directory)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise
            pass
        wget.download(
            'https://d2hg8soec8ck9v.cloudfront.net/datasets/faces_training.csv',
            dataset_path)
    data = torch.tensor(np.loadtxt(dataset_path, delimiter=',')).float()

    sparse_gamma_def = SparseGammaDEF()

    # Due to the special logic in the custom guide (e.g. parameter clipping), the custom guide
    # seems to be more amenable to higher learning rates.
    # Nevertheless, the easy guide performs the best (presumably because of numerical instabilities
    # related to the gamma distribution in the custom guide).
    learning_rate = 0.2 if args.guide in ['auto', 'easy'] else 4.5
    momentum = 0.05 if args.guide in ['auto', 'easy'] else 0.1
    opt = optim.AdagradRMSProp({"eta": learning_rate, "t": momentum})

    # use one of our three different guide types
    if args.guide == 'auto':
        guide = AutoDiagonalNormal(sparse_gamma_def.model,
                                   init_loc_fn=init_to_feasible)
    elif args.guide == 'easy':
        guide = MyEasyGuide(sparse_gamma_def.model)
    else:
        guide = sparse_gamma_def.guide

    # this is the svi object we use during training; we use TraceMeanField_ELBO to
    # get analytic KL divergences
    svi = SVI(sparse_gamma_def.model, guide, opt, loss=TraceMeanField_ELBO())

    # we use svi_eval during evaluation; since we took care to write down our model in
    # a fully vectorized way, this computation can be done efficiently with large tensor ops
    svi_eval = SVI(sparse_gamma_def.model,
                   guide,
                   opt,
                   loss=TraceMeanField_ELBO(num_particles=args.eval_particles,
                                            vectorize_particles=True))

    print('\nbeginning training with %s guide...' % args.guide)

    # the training loop
    for k in range(args.num_epochs):
        loss = svi.step(data)
        # for the custom guide we clip parameters after each gradient step
        if args.guide == 'custom':
            clip_params()

        if k % args.eval_frequency == 0 and k > 0 or k == args.num_epochs - 1:
            loss = svi_eval.evaluate_loss(data)
            print("[epoch %04d] training elbo: %.4g" % (k, -loss))
예제 #4
0
def train_vae(
    model: BaseAutoEncoder,
    epochs: int,
    train_loader: DataLoader,
    test_loader: DataLoader,
    lr: float,
    loss_fn: callable,
) -> Tuple[Dict[str, List[float]], Dict[str, List[float]]]:
    """ Train VAE model.

    :param model: VAE model
    :param epochs: number of epochs to train
    :param train_loader: train dataset loader
    :param test_loader: test dataset loader
    :param lr: learning rate
    :param loss_fn: loss function to be applied
    :return: training results; see train_metrics and test_metrics
    """
    train_metrics = {
        "loss": [],
        "step": [],
    }
    test_metrics = {
        "loss": [],
        "step": [],
    }

    global_step = 0

    optimizer = optim.Adam({"lr": lr})
    svi = SVI(model.model, model.guide, optimizer, loss=loss_fn)

    for epoch in trange(epochs):
        print(f"Epoch: {epoch + 1} / {epochs}.")

        # training step
        pbar = tqdm(train_loader)
        for inputs, _ in pbar:  # we are not using labels for training
            inputs = inputs.view((-1, 28 * 28))

            loss = svi.step(inputs)

            train_metrics["loss"].append(loss / 32)
            train_metrics["step"].append(global_step)

            global_step += 1
            pbar.update(1)
        pbar.close()

        # validation step
        val_loss = 0.0

        for inputs, _ in test_loader:
            inputs = inputs.view((-1, 28 * 28))
            val_loss += svi.evaluate_loss(inputs)
        test_metrics["loss"].append(val_loss / len(test_loader.dataset))
        test_metrics["step"].append(global_step)

    return train_metrics, test_metrics
예제 #5
0
파일: cvae.py 프로젝트: yufengwa/pyro
def train(device, dataloaders, dataset_sizes, learning_rate, num_epochs,
          early_stop_patience, model_path, pre_trained_baseline_net):

    # clear param store
    pyro.clear_param_store()

    cvae_net = CVAE(200, 500, 500, pre_trained_baseline_net)
    cvae_net.to(device)
    optimizer = pyro.optim.Adam({"lr": learning_rate})
    svi = SVI(cvae_net.model, cvae_net.guide, optimizer, loss=Trace_ELBO())

    best_loss = np.inf
    early_stop_count = 0
    Path(model_path).parent.mkdir(parents=True, exist_ok=True)

    for epoch in range(num_epochs):
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            running_loss = 0.0
            num_preds = 0

            # Iterate over data.
            bar = tqdm(dataloaders[phase],
                       desc='CVAE Epoch {} {}'.format(epoch, phase).ljust(20))
            for i, batch in enumerate(bar):
                inputs = batch['input'].to(device)
                outputs = batch['output'].to(device)

                if phase == 'train':
                    loss = svi.step(inputs, outputs)
                else:
                    loss = svi.evaluate_loss(inputs, outputs)

                # statistics
                running_loss += loss / inputs.size(0)
                num_preds += 1
                if i % 10 == 0:
                    bar.set_postfix(loss='{:.2f}'.format(running_loss /
                                                         num_preds),
                                    early_stop_count=early_stop_count)

            epoch_loss = running_loss / dataset_sizes[phase]
            # deep copy the model
            if phase == 'val':
                if epoch_loss < best_loss:
                    best_loss = epoch_loss
                    torch.save(cvae_net.state_dict(), model_path)
                    early_stop_count = 0
                else:
                    early_stop_count += 1

        if early_stop_count >= early_stop_patience:
            break

    # Save model weights
    cvae_net.load_state_dict(torch.load(model_path))
    cvae_net.eval()
    return cvae_net
예제 #6
0
def main(args):
	train_loader, test_loader = get_data()

	vae = VAE(use_cuda=False)
	optimizer = Adam({"lr": 0.0001})
	svi = SVI(vae.model, vae.guide, optimizer, loss="ELBO")

	# setup visdom for visualization
	if args.visdom_flag:
		vis = visdom.Visdom()

	train_elbo = []
	test_elbo = []

	# training loop
	for epoch in range(args.num_epochs):
		# initialize loss accumulator
		epoch_loss = 0.
		# do a training epoch over each mini-batch x returned
		# by the data loader
		for _, (x, _) in enumerate(train_loader):
			# wrap the mini-batch in a PyTorch Variable
			x = Variable(x)
			# do ELBO gradient and accumulate loss
			epoch_loss += svi.step(x)

		# report training diagnostics
		normalizer_train = len(train_loader.dataset)
		total_epoch_loss_train = epoch_loss / normalizer_train
		train_elbo.append(total_epoch_loss_train)
		print("[epoch %03d]  average training loss: %.4f" % (epoch, total_epoch_loss_train))

		if epoch % args.test_frequency == 0:
			# initialize loss accumulator
			test_loss = 0.
			# compute the loss over the entire test set
			for i, (x, _) in enumerate(test_loader):
				# wrap the mini-batch in a PyTorch Variable
				x = Variable(x)
				# compute ELBO estimate and accumulate loss
				test_loss += svi.evaluate_loss(x)
				# visualize how well we're reconstructing them
				if i == 0:
					if args.visdom_flag:
						plot_vae_samples(vae, vis)
			# report test diagnostics
			normalizer_test = len(test_loader.dataset)
			total_epoch_loss_test = test_loss / normalizer_test
			test_elbo.append(total_epoch_loss_test)
			print("[epoch %03d]  average test loss: %.4f" % (epoch, total_epoch_loss_test))
예제 #7
0
def main(args):
    # load data
    print('loading training data...')
    dataset_directory = get_data_directory(__file__)
    dataset_path = os.path.join(dataset_directory, 'faces_training.csv')
    if not os.path.exists(dataset_path):
        try:
            os.makedirs(dataset_directory)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise
            pass
        wget.download('https://d2fefpcigoriu7.cloudfront.net/datasets/faces_training.csv', dataset_path)
    data = torch.tensor(np.loadtxt(dataset_path, delimiter=',')).float()

    sparse_gamma_def = SparseGammaDEF()

    # due to the special logic in the custom guide (e.g. parameter clipping), the custom guide
    # is more numerically stable and enables us to use a larger learning rate (and consequently
    # achieves better results)
    learning_rate = 0.2 if args.auto_guide else 4.5
    momentum = 0.05 if args.auto_guide else 0.1
    opt = optim.AdagradRMSProp({"eta": learning_rate, "t": momentum})

    # either use an automatically constructed guide (see pyro.contrib.autoguide for details) or our custom guide
    guide = AutoDiagonalNormal(sparse_gamma_def.model) if args.auto_guide else sparse_gamma_def.guide

    # this is the svi object we use during training; we use TraceMeanField_ELBO to
    # get analytic KL divergences
    svi = SVI(sparse_gamma_def.model, guide, opt, loss=TraceMeanField_ELBO())

    # we use svi_eval during evaluation; since we took care to write down our model in
    # a fully vectorized way, this computation can be done efficiently with large tensor ops
    svi_eval = SVI(sparse_gamma_def.model, guide, opt,
                   loss=TraceMeanField_ELBO(num_particles=args.eval_particles, vectorize_particles=True))

    guide_description = 'automatically constructed' if args.auto_guide else 'custom'
    print('\nbeginning training with %s guide...' % guide_description)

    # the training loop
    for k in range(args.num_epochs):
        loss = svi.step(data)
        if not args.auto_guide:
            # for the custom guide we clip parameters after each gradient step
            sparse_gamma_def.clip_params()

        if k % args.eval_frequency == 0 and k > 0 or k == args.num_epochs - 1:
            loss = svi_eval.evaluate_loss(data)
            print("[epoch %04d] training elbo: %.4g" % (k, -loss))
예제 #8
0
파일: vi.py 프로젝트: BratChar/vipsy
 def fit(self, optim=Adam({'lr': 1e-3}), loss=Trace_ELBO(num_particles=1), max_iter=5000, random_instance=None):
     svi = SVI(self.model, self.guide, optim=optim, loss=loss)
     with trange(max_iter) as t:
         for i in t:
             t.set_description(f'迭代:{i}')
             svi.step(self.data)
             loss = svi.evaluate_loss(self.data)
             with torch.no_grad():
                 postfix_kwargs = {}
                 if random_instance is not None:
                     g = pyro.param('g')
                     s = pyro.param('s')
                     postfix_kwargs.update({
                         'g': '{0}'.format((g - random_instance.g).abs().mean()),
                         's': '{0}'.format((s - random_instance.s).abs().mean())
                     })
                 t.set_postfix(loss=loss, **postfix_kwargs)
def main(args):
    # load data
    print('loading training data...')
    dataset_directory = get_data_directory(__file__)
    dataset_path = os.path.join(dataset_directory, 'faces_training.csv')
    if not os.path.exists(dataset_path):
        try:
            os.makedirs(dataset_directory)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise
            pass
        wget.download(
            'https://d2fefpcigoriu7.cloudfront.net/datasets/faces_training.csv',
            dataset_path)
    data = torch.tensor(np.loadtxt(dataset_path, delimiter=',')).float()

    learning_rate = 4.5
    momentum = 0.1
    opt = optim.AdagradRMSProp({"eta": learning_rate, "t": momentum})

    # this is the svi object we use during training; we use TraceMeanField_ELBO to
    # get analytic KL divergences
    svi = SVI(model, guide, opt, loss=TraceMeanField_ELBO())

    # we use svi_eval during evaluation; since we took care to write down our model in
    # a fully vectorized way, this computation can be done efficiently with large tensor ops
    svi_eval = SVI(model_original,
                   guide,
                   opt,
                   loss=TraceMeanField_ELBO(num_particles=args.eval_particles,
                                            vectorize_particles=True))

    guide_description = 'custom'
    print('\nbeginning training with %s guide...' % guide_description)

    # the training loop
    for k in range(args.num_epochs):
        loss = svi.step(data)
        clip_params()

        if k % args.eval_frequency == 0 and k > 0 or k == args.num_epochs - 1:
            loss = svi_eval.evaluate_loss(data)
            print("[epoch %04d] training elbo: %.4g" % (k, -loss))
예제 #10
0
파일: vi.py 프로젝트: BratChar/vipsy
 def fit(self, optim=Adam({'lr': 5e-2}), loss=Trace_ELBO(num_particles=1), max_iter=5000, random_instance=None):
     svi = SVI(self.model, self.guide, optim=optim, loss=loss)
     with trange(max_iter) as t:
         for i in t:
             t.set_description(f'迭代:{i}')
             svi.step(self.data)
             loss = svi.evaluate_loss(self.data)
             with torch.no_grad():
                 postfix_kwargs = {}
                 if random_instance is not None:
                     b = pyro.param('b')
                     postfix_kwargs['threshold_error'] = '{0}'.format((b - random_instance.b).abs().mean())
                     if self._model in ('irt_2pl', 'irt_3pl', 'irt_4pl'):
                         a = pyro.param('a')
                         postfix_kwargs['slop_error'] = '{0}'.format((a - random_instance.a).abs().mean())
                     if self._model in ('irt_3pl', 'irt_4pl'):
                         c = pyro.param('c')
                         postfix_kwargs['guess_error'] = '{0}'.format((c - random_instance.c).abs().mean())
                     if self._model == 'irt_4pl':
                         d = pyro.param('d')
                         postfix_kwargs['slip_error'] = '{0}'.format((d - random_instance.d).abs().mean())
                 t.set_postfix(loss=loss, **postfix_kwargs)
예제 #11
0
    def _valid_epoch(self, epoch):
        """
        Validate after training an epoch

        :param epoch: Integer, current training epoch.
        :return: A log that contains information about validation
        """
        if self.jit:
            elbo = JitTraceGraph_ELBO(vectorize_particles=False,
                                      num_particles=self.num_particles)
        else:
            elbo = TraceGraph_ELBO(vectorize_particles=False,
                                   num_particles=self.num_particles)
        svi = SVI(self.model.model,
                  self.model.guide,
                  self.optimizer,
                  loss=elbo)

        self.model.eval()
        self.valid_metrics.reset()
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(self.valid_data_loader):
                data, target = data.to(self.device), target.to(self.device)
                loss = svi.evaluate_loss(observations=data)

                self.writer.set_step(
                    (epoch - 1) * len(self.valid_data_loader) + batch_idx,
                    'valid')
                self.valid_metrics.update('loss', loss.item())
                for met in self.metric_ftns:
                    self.valid_metrics.update(met.__name__, met(target))
                self.writer.add_image(
                    'input', make_grid(data.cpu(), nrow=8, normalize=True))

        # add histogram of model parameters to the tensorboard
        for name, p in self.model.named_parameters():
            self.writer.add_histogram(name, p, bins='auto')
        return self.valid_metrics.result()
def main(args):
    # clear param store
    pyro.clear_param_store()

    # setup MNIST data loaders
    # train_loader, test_loader
    train_loader, test_loader = setup_data_loaders(MNIST,
                                                   use_cuda=args.cuda,
                                                   batch_size=256)

    # setup the VAE
    vae = VAE(use_cuda=args.cuda)

    # setup the optimizer
    adam_args = {"lr": args.learning_rate}
    optimizer = Adam(adam_args)

    # setup the inference algorithm
    elbo = JitTrace_ELBO() if args.jit else Trace_ELBO()
    svi = SVI(vae.model, vae.guide, optimizer, loss=elbo)

    # setup visdom for visualization
    if args.visdom_flag:
        vis = visdom.Visdom()

    train_elbo = []
    test_elbo = []
    # training loop
    for epoch in range(args.num_epochs):
        # initialize loss accumulator
        epoch_loss = 0.
        # do a training epoch over each mini-batch x returned
        # by the data loader
        for x, _ in train_loader:
            # if on GPU put mini-batch into CUDA memory
            if args.cuda:
                x = x.cuda()
            # do ELBO gradient and accumulate loss
            epoch_loss += svi.step(x)

        # report training diagnostics
        normalizer_train = len(train_loader.dataset)
        total_epoch_loss_train = epoch_loss / normalizer_train
        train_elbo.append(total_epoch_loss_train)
        print("[epoch %03d]  average training loss: %.4f" %
              (epoch, total_epoch_loss_train))

        if epoch % args.test_frequency == 0:
            # initialize loss accumulator
            test_loss = 0.
            # compute the loss over the entire test set
            for i, (x, _) in enumerate(test_loader):
                # if on GPU put mini-batch into CUDA memory
                if args.cuda:
                    x = x.cuda()
                # compute ELBO estimate and accumulate loss
                test_loss += svi.evaluate_loss(x)

                # pick three random test images from the first mini-batch and
                # visualize how well we're reconstructing them
                if i == 0:
                    if args.visdom_flag:
                        plot_vae_samples(vae, vis)
                        reco_indices = np.random.randint(0, x.shape[0], 3)
                        for index in reco_indices:
                            test_img = x[index, :]
                            reco_img = vae.reconstruct_img(test_img)
                            vis.image(test_img.reshape(
                                28, 28).detach().cpu().numpy(),
                                      opts={'caption': 'test image'})
                            vis.image(reco_img.reshape(
                                28, 28).detach().cpu().numpy(),
                                      opts={'caption': 'reconstructed image'})

            # report test diagnostics
            normalizer_test = len(test_loader.dataset)
            total_epoch_loss_test = test_loss / normalizer_test
            test_elbo.append(total_epoch_loss_test)
            print("[epoch %03d]  average test loss: %.4f" %
                  (epoch, total_epoch_loss_test))

        if epoch == args.tsne_iter:
            mnist_test_tsne(vae=vae, test_loader=test_loader)
            plot_llk(np.array(train_elbo), np.array(test_elbo))

    return vae
예제 #13
0
            train_loss += svi.step(x)

            if args.verbose and i % 1e3 == 0:
                print(">>> [{:03d}%] current training ELBO: {:.3f}".format(
                    np.round(100 * (i+1) * args.batch_size / len(train_loader.dataset)).astype(int),
                    train_loss))

        # testing
        for i, (x, _) in enumerate(test_loader):
            if opts['use_cuda']:
                x = x.cuda()

            # wrap mini-batch in pytorch variable,
            # compute ELBO estimate and accumulate loss
            x = Variable(x)
            test_loss += svi.evaluate_loss(x)

            if args.verbose and i % 1e3 == 0:
                print(">>> [{:03d}%] current testing ELBO: {:.3f}".format(
                    np.round(100 * (i+1) * args.batch_size / len(test_loader.dataset)).astype(int),
                    test_loss))

        # record mean training and testing losses
        train_elbo[epoch] = -train_loss / len(train_loader.dataset)
        test_elbo[epoch] = -test_loss / len(test_loader.dataset)

        # logging
        log = '[Epoch {:03d}/{:03d}] Training ELBO: {:.4f}, Testing ELBO: {:.4f}, Mins: {:.1f}'.format(
            epoch + 1, args.epochs, train_elbo[epoch], test_elbo[epoch],
            (dt.now() - start_time).total_seconds() / 60)
        print('>>> {}'.format(log))
예제 #14
0
            for i, data in enumerate(train_loader):
                x, targets = data
                targets = targets.view(-1)
                loss = svi.step(x.to(device), targets.to(device))
                train_props['loss'] += loss
            L = len(train_loader)
            train_props = {k: v / L for k, v in train_props.items()}

            cv_props = {k: 0 for k in status_properties}
            for j, data in enumerate(cv_loader):
                x, targets = data
                targets = targets.view(-1)
                x.to(device)
                targets.to(device)
                preds = clf.predict(x)
                cv_props['loss'] += svi.evaluate_loss(x.to(device),
                                                      targets.to(device))
                cv_props['accuracy'] += accuracy(preds.to(device),
                                                 targets.to(device))
            L = len(cv_loader)
            cv_props = {k: v / L for k, v in cv_props.items()}
            if cv_props['loss'] < best_loss:
                print('Saving state')
                state = {
                    'state_dict': clf.state_dict(),
                    'train_props': train_props,
                    'cv_props': cv_props
                }
                torch.save(state, 'nn_state.pth.tar')
                torch.save(opt, 'nn_opt.pth.tar')
            status(epoch, train_props, cv_props)
    except KeyboardInterrupt:
예제 #15
0
        train_elbo.append(-total_epoch_loss_train)
    
    
        # --------------------------Do testing for each epoch here--------------------------------
        test_loss = 0.
        # compute the loss over the entire test set
        for x_test,y_test in test_loader:
            x_test = x_test.cuda()
            y_test = y_test.cuda()
            # compute ELBO estimate and accumulate loss
            labels_y_test = torch.tensor(np.zeros((y_test.shape[0],2)))
            y_test_2=torch.Tensor.cpu(y_test.reshape(1,y_test.size()[0])[0]).numpy().astype(int)  
            labels_y_test=np.eye(2)[y_test_2]
            labels_y_test = torch.from_numpy(labels_y_test)
        
            test_loss += svi.evaluate_loss(x_test.reshape(-1,10000),labels_y_test.cuda().float()) 
        
        
        normalizer_test = len(test_loader.dataset)
        total_epoch_loss_test = test_loss / normalizer_test
    
        print("[epoch %03d]  average training loss: %.4f testing loss: %.4f" % (epoch, total_epoch_loss_train,total_epoch_loss_test))
    df['learning_rate'][count]=LEARNING_RATE
    df['train_loss'][count]=total_epoch_loss_train
    count = count + 1

    print('+++++++++++++++++++++++++++++++++++++Incrementing Learning Rate++++++++++++++++++++++++++++++++++++')
    learning_rates.append(LEARNING_RATE)
    train_losses.append(total_epoch_loss_train)
    
    df.to_csv('data_lr_experiment_sup_d'+str(d)+'.csv')
예제 #16
0
파일: vae.py 프로젝트: Magica-Chen/pyro
def main():
    # parse command line arguments
    parser = argparse.ArgumentParser(description="parse args")
    parser.add_argument('-n', '--num-epochs', default=101, type=int, help='number of training epochs')
    parser.add_argument('-tf', '--test-frequency', default=5, type=int, help='how often we evaluate the test set')
    parser.add_argument('-lr', '--learning-rate', default=1.0e-3, type=float, help='learning rate')
    parser.add_argument('-b1', '--beta1', default=0.95, type=float, help='beta1 adam hyperparameter')
    parser.add_argument('--cuda', action='store_true', default=False, help='whether to use cuda')
    parser.add_argument('-visdom', '--visdom_flag', default=False, help='Whether plotting in visdom is desired')
    parser.add_argument('-i-tsne', '--tsne_iter', default=100, type=int, help='epoch when tsne visualization runs')
    args = parser.parse_args()

    # setup MNIST data loaders
    # train_loader, test_loader
    train_loader, test_loader = setup_data_loaders(MNIST, use_cuda=args.cuda, batch_size=256)

    # setup the VAE
    vae = VAE(use_cuda=args.cuda)

    # setup the optimizer
    adam_args = {"lr": args.learning_rate}
    optimizer = Adam(adam_args)

    # setup the inference algorithm
    svi = SVI(vae.model, vae.guide, optimizer, loss="ELBO")

    # setup visdom for visualization
    if args.visdom_flag:
        vis = visdom.Visdom()

    train_elbo = []
    test_elbo = []
    # training loop
    for epoch in range(args.num_epochs):
        # initialize loss accumulator
        epoch_loss = 0.
        # do a training epoch over each mini-batch x returned
        # by the data loader
        for _, (x, _) in enumerate(train_loader):
            # if on GPU put mini-batch into CUDA memory
            if args.cuda:
                x = x.cuda()
            # wrap the mini-batch in a PyTorch Variable
            x = Variable(x)
            # do ELBO gradient and accumulate loss
            epoch_loss += svi.step(x)

        # report training diagnostics
        normalizer_train = len(train_loader.dataset)
        total_epoch_loss_train = epoch_loss / normalizer_train
        train_elbo.append(total_epoch_loss_train)
        print("[epoch %03d]  average training loss: %.4f" % (epoch, total_epoch_loss_train))

        if epoch % args.test_frequency == 0:
            # initialize loss accumulator
            test_loss = 0.
            # compute the loss over the entire test set
            for i, (x, _) in enumerate(test_loader):
                # if on GPU put mini-batch into CUDA memory
                if args.cuda:
                    x = x.cuda()
                # wrap the mini-batch in a PyTorch Variable
                x = Variable(x)
                # compute ELBO estimate and accumulate loss
                test_loss += svi.evaluate_loss(x)

                # pick three random test images from the first mini-batch and
                # visualize how well we're reconstructing them
                if i == 0:
                    if args.visdom_flag:
                        plot_vae_samples(vae, vis)
                        reco_indices = np.random.randint(0, x.size(0), 3)
                        for index in reco_indices:
                            test_img = x[index, :]
                            reco_img = vae.reconstruct_img(test_img)
                            vis.image(test_img.contiguous().view(28, 28).data.cpu().numpy(),
                                      opts={'caption': 'test image'})
                            vis.image(reco_img.contiguous().view(28, 28).data.cpu().numpy(),
                                      opts={'caption': 'reconstructed image'})

            # report test diagnostics
            normalizer_test = len(test_loader.dataset)
            total_epoch_loss_test = test_loss / normalizer_test
            test_elbo.append(total_epoch_loss_test)
            print("[epoch %03d]  average test loss: %.4f" % (epoch, total_epoch_loss_test))

        if epoch == args.tsne_iter:
            mnist_test_tsne(vae=vae, test_loader=test_loader)
            plot_llk(np.array(train_elbo), np.array(test_elbo))

    return vae
def evaluate(dmm: nn.Module, svi: SVI, data_loader: DataLoader) -> float:
    dmm.eval()
    return sum(svi.evaluate_loss(x)
               for x in data_loader) / len(data_loader.dataset)
예제 #18
0
def main():
    # parse command line arguments
    parser = argparse.ArgumentParser(description="parse args")
    parser.add_argument('-n',
                        '--num-epochs',
                        default=101,
                        type=int,
                        help='number of training epochs')
    parser.add_argument('-tf',
                        '--test-frequency',
                        default=5,
                        type=int,
                        help='how often we evaluate the test set')
    parser.add_argument('-lr',
                        '--learning-rate',
                        default=1.0e-3,
                        type=float,
                        help='learning rate')
    parser.add_argument('-b1',
                        '--beta1',
                        default=0.95,
                        type=float,
                        help='beta1 adam hyperparameter')
    parser.add_argument('--cuda',
                        action='store_true',
                        default=False,
                        help='whether to use cuda')
    parser.add_argument('-visdom',
                        '--visdom_flag',
                        default=False,
                        help='Whether plotting in visdom is desired')
    parser.add_argument('-i-tsne',
                        '--tsne_iter',
                        default=100,
                        type=int,
                        help='epoch when tsne visualization runs')
    args = parser.parse_args()

    # setup MNIST data loaders
    # train_loader, test_loader
    train_loader, test_loader = setup_data_loaders(MNIST,
                                                   use_cuda=args.cuda,
                                                   batch_size=256)

    # setup the VAE
    vae = VAE(use_cuda=args.cuda)

    # setup the optimizer
    adam_args = {"lr": args.learning_rate}
    optimizer = Adam(adam_args)

    # setup the inference algorithm
    svi = SVI(vae.model, vae.guide, optimizer, loss="ELBO")

    # setup visdom for visualization
    if args.visdom_flag:
        vis = visdom.Visdom()

    train_elbo = []
    test_elbo = []
    # training loop
    for epoch in range(args.num_epochs):
        # initialize loss accumulator
        epoch_loss = 0.
        # do a training epoch over each mini-batch x returned
        # by the data loader
        for _, (x, _) in enumerate(train_loader):
            # if on GPU put mini-batch into CUDA memory
            if args.cuda:
                x = x.cuda()
            # wrap the mini-batch in a PyTorch Variable
            x = Variable(x)
            # do ELBO gradient and accumulate loss
            epoch_loss += svi.step(x)

        # report training diagnostics
        normalizer_train = len(train_loader.dataset)
        total_epoch_loss_train = epoch_loss / normalizer_train
        train_elbo.append(total_epoch_loss_train)
        print("[epoch %03d]  average training loss: %.4f" %
              (epoch, total_epoch_loss_train))

        if epoch % args.test_frequency == 0:
            # initialize loss accumulator
            test_loss = 0.
            # compute the loss over the entire test set
            for i, (x, _) in enumerate(test_loader):
                # if on GPU put mini-batch into CUDA memory
                if args.cuda:
                    x = x.cuda()
                # wrap the mini-batch in a PyTorch Variable
                x = Variable(x)
                # compute ELBO estimate and accumulate loss
                test_loss += svi.evaluate_loss(x)

                # pick three random test images from the first mini-batch and
                # visualize how well we're reconstructing them
                if i == 0:
                    if args.visdom_flag:
                        plot_vae_samples(vae, vis)
                        reco_indices = np.random.randint(0, x.size(0), 3)
                        for index in reco_indices:
                            test_img = x[index, :]
                            reco_img = vae.reconstruct_img(test_img)
                            vis.image(test_img.contiguous().view(
                                28, 28).data.cpu().numpy(),
                                      opts={'caption': 'test image'})
                            vis.image(reco_img.contiguous().view(
                                28, 28).data.cpu().numpy(),
                                      opts={'caption': 'reconstructed image'})

            # report test diagnostics
            normalizer_test = len(test_loader.dataset)
            total_epoch_loss_test = test_loss / normalizer_test
            test_elbo.append(total_epoch_loss_test)
            print("[epoch %03d]  average test loss: %.4f" %
                  (epoch, total_epoch_loss_test))

        if epoch == args.tsne_iter:
            mnist_test_tsne(vae=vae, test_loader=test_loader)
            plot_llk(np.array(train_elbo), np.array(test_elbo))

    return vae
예제 #19
0
              iaf_dim=50,
              use_cuda=True)

    learning_rate = 0.01
    beta1 = 0.9
    beta2 = 0.999
    clip_norm = 10.0
    lr_decay = 1.0
    weight_decay = 0
    adam_params = {
        "lr": learning_rate,
        "betas": (beta1, beta2),
        "clip_norm": clip_norm,
        "lrd": lr_decay,
        "weight_decay": weight_decay
    }
    adam = ClippedAdam(adam_params)

    elbo = Trace_ELBO()
    svi = SVI(dmm.model, dmm.guide, adam, loss=elbo)
    for i in range(100):
        loss = svi.step(input_tensor, input_tensor_reversed, input_tensor_mask)
        val_nll = svi.evaluate_loss(input_tensor, input_tensor_reversed,
                                    input_tensor_mask)
        print(val_nll)
        _, _, loss_loc, loss_scale = do_prediction(dmm, pred_tensor,
                                                   pred_tensor_reversed,
                                                   pred_tensor_mask, 5,
                                                   ground_truth)
        print(loss_loc, loss_scale)
예제 #20
0
class SVIExperiment(BaseCovariateExperiment):
    def __init__(self, hparams, pyro_model: BaseSEM):
        super().__init__(hparams, pyro_model)

        self.svi_loss = CustomELBO(num_particles=hparams.num_svi_particles)

        self._build_svi()

    def _build_svi(self, loss=None):
        def per_param_callable(module_name, param_name):
            params = {
                'eps': 1e-5,
                'amsgrad': self.hparams.use_amsgrad,
                'weight_decay': self.hparams.l2
            }
            if 'flow_components' in module_name or 'sex_logits' in param_name:
                params['lr'] = self.hparams.pgm_lr
            else:
                params['lr'] = self.hparams.lr

            print(
                f'building opt for {module_name} - {param_name} with p: {params}'
            )
            return params

        if loss is None:
            loss = self.svi_loss

        if self.hparams.use_cf_guide:

            def guide(*args, **kwargs):
                return self.pyro_model.counterfactual_guide(
                    *args,
                    **kwargs,
                    counterfactual_type=self.hparams.cf_elbo_type)

            self.svi = SVI(self.pyro_model.svi_model, guide,
                           Adam(per_param_callable), loss)
        else:
            self.svi = SVI(self.pyro_model.svi_model,
                           self.pyro_model.svi_guide, Adam(per_param_callable),
                           loss)
        self.svi.loss_class = loss

    def backward(self, *args, **kwargs):
        pass  # No loss to backpropagate since we're using Pyro's optimisation machinery

    def print_trace_updates(self, batch):
        with torch.no_grad():
            print('Traces:\n' + ('#' * 10))

            guide_trace = pyro.poutine.trace(
                self.pyro_model.svi_guide).get_trace(**batch)
            model_trace = pyro.poutine.trace(
                pyro.poutine.replay(self.pyro_model.svi_model,
                                    trace=guide_trace)).get_trace(**batch)

            guide_trace = pyro.poutine.util.prune_subsample_sites(guide_trace)
            model_trace = pyro.poutine.util.prune_subsample_sites(model_trace)

            model_trace.compute_log_prob()
            guide_trace.compute_score_parts()

            print(f'model: {model_trace.nodes.keys()}')
            for name, site in model_trace.nodes.items():
                if site["type"] == "sample":
                    fn = site['fn']
                    if isinstance(fn, Independent):
                        fn = fn.base_dist
                    print(f'{name}: {fn} - {fn.support}')
                    log_prob_sum = site["log_prob_sum"]
                    is_obs = site["is_observed"]
                    print(
                        f'model - log p({name}) = {log_prob_sum} | obs={is_obs}'
                    )
                    if torch.isnan(log_prob_sum):
                        value = site['value'][0]
                        conc0 = fn.concentration0
                        conc1 = fn.concentration1

                        print(f'got:\n{value}\n{conc0}\n{conc1}')

                        raise Exception()

            print(f'guide: {guide_trace.nodes.keys()}')

            for name, site in guide_trace.nodes.items():
                if site["type"] == "sample":
                    fn = site['fn']
                    if isinstance(fn, Independent):
                        fn = fn.base_dist
                    print(f'{name}: {fn} - {fn.support}')
                    entropy = site["score_parts"].entropy_term.sum()
                    is_obs = site["is_observed"]
                    print(f'guide - log q({name}) = {entropy} | obs={is_obs}')

    def get_trace_metrics(self, batch):
        metrics = {}

        model = self.svi.loss_class.trace_storage['model']
        guide = self.svi.loss_class.trace_storage['guide']

        metrics['log p(x)'] = model.nodes['x']['log_prob'].mean()
        metrics['log p(age)'] = model.nodes['age']['log_prob'].mean()
        metrics['log p(sex)'] = model.nodes['sex']['log_prob'].mean()
        metrics['log p(ventricle_volume)'] = model.nodes['ventricle_volume'][
            'log_prob'].mean()
        metrics['log p(brain_volume)'] = model.nodes['brain_volume'][
            'log_prob'].mean()
        metrics['p(z)'] = model.nodes['z']['log_prob'].mean()
        metrics['q(z)'] = guide.nodes['z']['log_prob'].mean()
        metrics['log p(z) - log q(z)'] = metrics['p(z)'] - metrics['q(z)']

        return metrics

    def prep_batch(self, batch):
        x = batch['image'] * 255.
        age = batch['age'].unsqueeze(1).float()
        sex = batch['sex'].unsqueeze(1).float()
        ventricle_volume = batch['ventricle_volume'].unsqueeze(1).float()
        brain_volume = batch['brain_volume'].unsqueeze(1).float()

        x = x.float()

        if self.training:
            x += torch.rand_like(x)

        return {
            'x': x,
            'age': age,
            'sex': sex,
            'ventricle_volume': ventricle_volume,
            'brain_volume': brain_volume
        }

    def training_step(self, batch, batch_idx):
        batch = self.prep_batch(batch)

        if self.hparams.validate:
            print('Validation:')
            self.print_trace_updates(batch)

        loss = self.svi.step(**batch)

        metrics = self.get_trace_metrics(batch)

        if np.isnan(loss):
            self.logger.experiment.add_text(
                'nan', f'nand at {self.current_epoch}:\n{metrics}')
            raise ValueError(
                'loss went to nan with metrics:\n{}'.format(metrics))

        tensorboard_logs = {('train/' + k): v for k, v in metrics.items()}
        tensorboard_logs['train/loss'] = loss

        return {'loss': torch.Tensor([loss]), 'log': tensorboard_logs}

    def validation_step(self, batch, batch_idx):
        batch = self.prep_batch(batch)

        loss = self.svi.evaluate_loss(**batch)

        metrics = self.get_trace_metrics(batch)

        return {'loss': loss, **metrics}

    def test_step(self, batch, batch_idx):
        batch = self.prep_batch(batch)

        loss = self.svi.evaluate_loss(**batch)

        metrics = self.get_trace_metrics(batch)

        samples = self.build_test_samples(batch)

        return {'loss': loss, **metrics, 'samples': samples}

    @classmethod
    def add_arguments(cls, parser):
        parser = super().add_arguments(parser)

        parser.add_argument(
            '--num_svi_particles',
            default=4,
            type=int,
            help="number of particles to use for ELBO (default: %(default)s)")
        parser.add_argument(
            '--num_sample_particles',
            default=32,
            type=int,
            help=
            "number of particles to use for MC sampling (default: %(default)s)"
        )
        parser.add_argument(
            '--use_cf_guide',
            default=False,
            action='store_true',
            help="whether to use counterfactual guide (default: %(default)s)")
        parser.add_argument(
            '--cf_elbo_type',
            default=-1,
            choices=[-1, 0, 1, 2],
            help=
            "-1: randomly select per batch, 0: shuffle thickness, 1: shuffle intensity, 2: shuffle both (default: %(default)s)"
        )

        return parser
                train_props['accuracy'] += a.item()
                # train_props['accuracy_1'] += a1
                # train_props['accuracy_2'] += a2
                # train_props['accuracy_3'] += a3
            L = len(train_loader)
            train_props = {k:v/L for k,v in train_props.items()}

            cv_props = {k:0 for k in status_properties}
            for j, data in enumerate(cv_loader):
                x, targets = data
                targets = targets.view(-1)
                x = x.to(device)
                targets = targets.to(device)
                clf.eval()
                preds = clf.predict(x)
                cv_props['loss'] += svi.evaluate_loss(x, targets)
                # preds = F.log_softmax(preds, dim=1)
                # preds = torch.argmax(preds, dim=1)
                # a, a1, a2, a3 = accuracy(preds, targets)
                # a = accuracy(preds, targets)
                a = (preds == targets).float().mean()
                cv_props['accuracy'] += a.item()
                # cv_props['accuracy_1'] += a1
                # cv_props['accuracy_2'] += a2
                # cv_props['accuracy_3'] += a3
            L = len(cv_loader)
            cv_props = {k:v/L for k,v in cv_props.items()}
            # if cv_props['loss'] < best_loss:
            if cv_props['accuracy'] > best_acc:
                print('Saving state')
                state = {'state_dict': clf.state_dict(), 'train_props': train_props, 'cv_props': cv_props, 'epoch': epoch}
class SVIExperiment(BaseCovariateExperiment):
    def __init__(self, hparams, pyro_model: BaseSEM):
        super().__init__(hparams, pyro_model)
        if hparams.tracegraph_elbo:
            self.svi_loss = StorageTraceGraph_ELBO(
                num_particles=hparams.num_svi_particles)
        else:
            self.svi_loss = StorageTrace_ELBO(
                num_particles=hparams.num_svi_particles)
        self._build_svi()

    def _build_svi(self, loss=None):
        def per_param_callable(module_name, param_name):
            if self.hparams.use_adagrad_rmsprop:
                params = {
                    'eta': self.hparams.eta,
                    'delta': self.hparams.delta,
                    't': self.hparams.t
                }
            else:
                params = {
                    'weight_decay': self.hparams.weight_decay,
                    'betas': self.hparams.betas,
                    'eps': 1e-5
                }
                if any([(pn in module_name)
                        for pn in ('prior_flow', 'posterior_flow')]):
                    params['lr'] = self.hparams.lr
                elif 'affine' in module_name:
                    params['lr'] = self.hparams.lr
                    params['weight_decay'] = 0.
                elif 'flow_components' in module_name:
                    params['lr'] = self.hparams.pgm_lr
                elif 'sex_logits' in param_name:
                    params['lr'] = self.hparams.pgm_lr
                    params['weight_decay'] = 0.
                elif 'decoder' in module_name and 'logstd_head' in param_name:
                    params['weight_decay'] = self.hparams.logstd_weight_decay
                else:
                    params['lr'] = self.hparams.lr
                logger.info(
                    f'building opt for {module_name} - {param_name} with p: {params}'
                )
            return params

        def per_param_clip_args(module_name, param_name):
            clip_args = defaultdict(lambda: None)
            if any([(pn in module_name)
                    for pn in ('prior_flow', 'posterior_flow')]):
                clip_args['clip_norm'] = self.hparams.flow_clip_norm
            elif any([(pn in param_name)
                      for pn in ('affine', 'sex_logits', 'flow_components')]):
                clip_args['clip_norm'] = self.hparams.pgm_clip_norm
            else:
                clip_args['clip_norm'] = self.hparams.clip_norm
            logger.info(
                f'building clip args for {module_name} - {param_name} with p: {clip_args}'
            )
            return clip_args

        if loss is None:
            loss = self.svi_loss

        optimizer = AdagradRMSProp if self.hparams.use_adagrad_rmsprop else AdamW
        verbose = self.hparams.verbosity > 1  # only print lr in debug mode
        if self.hparams.use_exponential_lr:
            self.scheduler = ExponentialLR(
                {
                    'optimizer': optimizer,
                    'optim_args': per_param_callable,
                    'gamma': self.hparams.lrd,
                    'verbose': verbose
                },
                clip_args=per_param_clip_args)
        else:
            self.scheduler = OneCycleLR(
                {
                    'optimizer': optimizer,
                    'optim_args': per_param_callable,
                    'epochs': self.hparams.n_epochs,
                    'steps_per_epoch': self._steps_per_epoch(),
                    'pct_start': self.hparams.pct_start,
                    'div_factor': self.hparams.div_factor,
                    'final_div_factor': self.hparams.final_div_factor,
                    'verbose': verbose
                },
                clip_args=per_param_clip_args)
        if self.hparams.use_cf_guide:

            def guide(*args, **kwargs):
                return self.pyro_model.counterfactual_guide(
                    *args,
                    **kwargs,
                    counterfactual_type=self.hparams.cf_elbo_type)

            self.svi = SVI(self.pyro_model.svi_model, guide, self.scheduler,
                           loss)
        else:
            self.svi = SVI(self.pyro_model.svi_model,
                           self.pyro_model.svi_guide, self.scheduler, loss)
        self.svi.loss_class = loss

    def backward(self, *args, **kwargs):
        pass  # No loss to backpropagate since we're using Pyro's optimisation machinery

    def print_trace_updates(self, batch):
        with torch.no_grad():
            logger.info('Traces:\n' + ('#' * 10))

            guide_trace = pyro.poutine.trace(
                self.pyro_model.svi_guide).get_trace(batch)
            model_trace = pyro.poutine.trace(
                pyro.poutine.replay(self.pyro_model.svi_model,
                                    trace=guide_trace)).get_trace(batch)

            guide_trace = pyro.poutine.util.prune_subsample_sites(guide_trace)
            model_trace = pyro.poutine.util.prune_subsample_sites(model_trace)

            model_trace.compute_log_prob()
            guide_trace.compute_score_parts()

            logging.info(f'model: {model_trace.nodes.keys()}')
            for name, site in model_trace.nodes.items():
                if site["type"] == "sample":
                    fn = site['fn']
                    if isinstance(fn, Independent):
                        fn = fn.base_dist
                    try:
                        logging.info(f'{name}: {fn} - {fn.support}')
                    except NotImplementedError:
                        logging.info(f'{name}: {fn}')
                    log_prob_sum = site["log_prob_sum"]
                    is_obs = site["is_observed"]
                    logging.info(
                        f'model - log p({name}) = {log_prob_sum} | obs={is_obs}'
                    )
                    if torch.isnan(log_prob_sum):
                        value = site['value'][0]
                        conc0 = fn.concentration0
                        conc1 = fn.concentration1
                        raise RuntimeError(
                            f'Error: \n{value}\n{conc0}\n{conc1}')

            logging.info(f'guide: {guide_trace.nodes.keys()}')

            for name, site in guide_trace.nodes.items():
                if site["type"] == "sample":
                    fn = site['fn']
                    if isinstance(fn, Independent):
                        fn = fn.base_dist
                    try:
                        logging.info(f'{name}: {fn} - {fn.support}')
                    except NotImplementedError:
                        logging.info(f'{name}: {fn}')
                    entropy = site["score_parts"].entropy_term.sum()
                    is_obs = site["is_observed"]
                    logging.info(
                        f'guide - log q({name}) = {entropy} | obs={is_obs}')

    def get_trace_metrics(self, batch):
        metrics = {}
        model = self.svi.loss_class.trace_storage['model']
        guide = self.svi.loss_class.trace_storage['guide']
        for k in self.required_data:
            metrics[f'log p({k})'] = model.nodes[k]['log_prob'].mean()
        if self.pyro_model.n_levels > 0:
            metrics['log p(z) - log q(z)'] = 0.
            for i in range(self.pyro_model.n_levels):
                metrics[f'log p(z{i})'] = model.nodes[f'z{i}'][
                    'log_prob'].mean()
                metrics[f'log q(z{i})'] = guide.nodes[f'z{i}'][
                    'log_prob'].mean()
                metrics['log p(z) - log q(z)'] += metrics[
                    f'log p(z{i})'] - metrics[f'log q(z{i})']
        else:
            metrics['log p(z)'] = model.nodes['z']['log_prob'].mean()
            metrics['log q(z)'] = guide.nodes['z']['log_prob'].mean()
            metrics['log p(z) - log q(z)'] = metrics['log p(z)'] - metrics[
                'log q(z)']
        return metrics

    def _theis_noise(self, obs):
        """ add noise to discrete variables per Theis 2016 """
        if self.training:
            obs['x'] += (torch.rand_like(obs['x']) - 0.5)
            obs['slice_number'] += (torch.rand_like(obs['slice_number']) - 0.5)
            obs['duration'] += torch.rand_like(obs['duration'] - 0.5)
            obs['duration'].clamp_(min=1e-4)
            obs['edss'] += ((torch.rand_like(obs['edss']) / 2.) - 0.25)
            obs['edss'].clamp_(min=1e-4)
        return obs

    @property
    def pseudo3d(self):
        return self.pyro_model.pseudo3d

    def prep_batch(self, batch):
        x = 255. * batch['image'].float(
        )  # multiply by 255 b/c preprocess tfms
        out = dict(x=x)
        for k in self.required_data:
            if k in batch:
                out[k] = batch[k].unsqueeze(1).float()
        out = self._theis_noise(out)
        return out

    def _steps_per_epoch(self):
        return len(self.calabresi_train
                   ) // self.train_batch_size  # integer div b/c drop_last used

    def _set_annealing_factor(self, batch_idx=None):
        steps_per_epoch = self._steps_per_epoch()
        if batch_idx is None:
            batch_idx = steps_per_epoch
        not_in_sanity_check = self.hparams.annealing_epochs > 0
        in_annealing_epochs = self.current_epoch < self.hparams.annealing_epochs
        n_levels = max(self.pyro_model.n_levels, 1)
        self.pyro_model.annealing_factor = [1. for _ in range(n_levels)]
        for i in range(n_levels):
            if not_in_sanity_check and in_annealing_epochs and self.training:
                min_af = self.hparams.min_annealing_factor[i]
                max_af = self.hparams.max_annealing_factor[i]
                self.pyro_model.annealing_factor[i] = min_af + (max_af - min_af) * \
                                   (float(batch_idx + self.current_epoch * steps_per_epoch + 1) /
                                    float(self.hparams.annealing_epochs * steps_per_epoch))
            else:
                self.pyro_model.annealing_factor[
                    i] = self.hparams.max_annealing_factor[i]
            if self.training:
                self.log(f'annealing_factor/af{i}',
                         self.pyro_model.annealing_factor[i],
                         on_step=False,
                         on_epoch=True)

    def training_step(self, batch, batch_idx):
        self._set_annealing_factor(batch_idx)
        batch = self.prep_batch(batch)
        if self.hparams.validate:
            logging.info('Validation:')
            self.print_trace_updates(batch)
        loss = self.svi.step(batch)
        self.scheduler.step()
        loss = torch.as_tensor(loss)
        self.log('train_loss', loss, on_step=False, on_epoch=True)
        metrics = self.get_trace_metrics(batch)
        if np.isnan(loss):
            self.logger.experiment.add_text(
                'nan', f'nand at {self.current_epoch}:\n{metrics}')
            raise ValueError(
                'loss went to nan with metrics:\n{}'.format(metrics))
        for k, v in metrics.items():
            self.log('train/' + k, v, on_step=False, on_epoch=True)
        return loss

    def validation_step(self, batch, batch_idx):
        self._set_annealing_factor()
        batch = self.prep_batch(batch)
        loss = self.svi.evaluate_loss(batch)
        self.log('val_loss', loss, on_step=False, on_epoch=True)
        metrics = self.get_trace_metrics(batch)
        for k, v in metrics.items():
            self.log('val/' + k, v, on_step=False, on_epoch=True)
        return metrics

    def test_step(self, batch, batch_idx):
        import nibabel as nib
        self._set_annealing_factor()
        subject = int(batch['subject'][0])
        scan = int(batch['scan'][0])
        batch = self.prep_batch(batch)
        loss = self.svi.evaluate_loss(batch)
        self.log('test_loss', loss, on_step=False, on_epoch=True)
        metrics = self.get_trace_metrics(batch)
        for k, v in metrics.items():
            self.log('test/' + k, v, on_step=False, on_epoch=True)
        samples = self.build_test_samples(batch)
        for intervention, data in samples.items():
            cf = data['x'].detach().cpu().numpy()
            if self.hparams.pseudo3d:
                cf = cf[:, 1, ...]  # get the middle slices
            cf = cf.squeeze()
            fn = os.path.join(self.hparams.test_dir,
                              f'{subject}_{scan}_{intervention}.nii.gz')
            nib.Nifti1Image(cf, None).to_filename(fn)
        return {'samples': samples, 'metrics': metrics}

    @classmethod
    def add_arguments(cls, parser):
        parser = super().add_arguments(parser)
        parser.add_argument(
            '--num-svi-particles',
            default=4,
            type=int,
            help="number of particles to use for ELBO (default: %(default)s)")
        parser.add_argument(
            '--num-sample-particles',
            default=32,
            type=int,
            help=
            "number of particles to use for MC sampling (default: %(default)s)"
        )
        parser.add_argument(
            '--use-cf-guide',
            default=False,
            action='store_true',
            help="whether to use counterfactual guide (default: %(default)s)")
        parser.add_argument(
            '--cf-elbo-type',
            default=-1,
            choices=[-1, 0, 1, 2],
            help=
            "-1: randomly select per batch, 0: shuffle thickness, 1: shuffle intensity, 2: shuffle both (default: %(default)s)"
        )
        parser.add_argument(
            '--annealing-epochs',
            default=50,
            type=int,
            help="anneal kl div in z for this # epochs (default: %(default)s)")
        parser.add_argument(
            '--min-annealing-factor',
            default=[0.2],
            type=float,
            nargs='+',
            help=
            "anneal kl div in z starting here (per level for hierarchical) (default: %(default)s)"
        )
        parser.add_argument(
            '--max-annealing-factor',
            default=[1.0],
            type=float,
            nargs='+',
            help=
            "anneal kl div in z ending here (per level for hierarchical) (default: %(default)s)"
        )
        parser.add_argument(
            '--tracegraph-elbo',
            default=False,
            action='store_true',
            help=
            "use tracegraph elbo (much more computationally expensive) (default: %(default)s)"
        )
        return parser
예제 #23
0
def train(args, DATA_PATH):
    # clear param store
    pyro.clear_param_store()
    #pyro.enable_validation(True)

    # train_loader, test_loader
    transform = {}
    transform["train"] = transforms.Compose([
        transforms.Resize((400, 400)),
        transforms.ToTensor(),
    ])
    transform["test"] = transforms.Compose(
        [transforms.Resize((400, 400)),
         transforms.ToTensor()])

    train_loader, test_loader = setup_data_loaders(
        dataset=GameCharacterFullData,
        root_path=DATA_PATH,
        batch_size=32,
        transforms=transform)

    # setup the VAE
    vae = VAE(use_cuda=args.cuda, num_labels=17)

    # setup the exponential learning rate scheduler
    optimizer = torch.optim.Adam
    scheduler = pyro.optim.ExponentialLR({
        'optimizer': optimizer,
        'optim_args': {
            'lr': args.learning_rate
        },
        'gamma': 0.1
    })

    # setup the inference algorithm
    elbo = JitTrace_ELBO() if args.jit else Trace_ELBO()
    svi = SVI(vae.model, vae.guide, scheduler, loss=elbo)

    # setup visdom for visualization
    if args.visdom_flag:
        vis = visdom.Visdom(port='8097')

    train_elbo = []
    test_elbo = []
    # training loop
    for epoch in range(args.num_epochs):
        # initialize loss accumulator
        epoch_loss = 0.
        # do a training epoch over each mini-batch x returned
        # by the data loader
        for x, y, actor, reactor, actor_type, reactor_type, action, reaction in train_loader:
            # if on GPU put mini-batch into CUDA memory
            if args.cuda:
                x = x.cuda()
                y = y.cuda()
                actor = actor.cuda()
                reactor = reactor.cuda()
                actor_type = actor_type.cuda()
                reactor_type = reactor_type.cuda()
                action = action.cuda()
                reaction = reaction.cuda()
            # do ELBO gradient and accumulate loss
            epoch_loss += svi.step(x, y, actor, reactor, actor_type,
                                   reactor_type, action, reaction)

        # report training diagnostics
        normalizer_train = len(train_loader.dataset)
        total_epoch_loss_train = epoch_loss / normalizer_train
        train_elbo.append(total_epoch_loss_train)
        print("[epoch %03d]  average training loss: %.4f" %
              (epoch, total_epoch_loss_train))

        if epoch % args.test_frequency == 0:
            # initialize loss accumulator
            test_loss = 0.
            # compute the loss over the entire test set
            for i, (x, y, actor, reactor, actor_type, reactor_type, action,
                    reaction) in enumerate(test_loader):
                # if on GPU put mini-batch into CUDA memory
                if args.cuda:
                    x = x.cuda()
                    y = y.cuda()
                    actor = actor.cuda()
                    reactor = reactor.cuda()
                    actor_type = actor_type.cuda()
                    reactor_type = reactor_type.cuda()
                    action = action.cuda()
                    reaction = reaction.cuda()
                # compute ELBO estimate and accumulate loss
                test_loss += svi.evaluate_loss(x, y, actor, reactor,
                                               actor_type, reactor_type,
                                               action, reaction)
                # pick three random test images from the first mini-batch and
                # visualize how well we're reconstructing them
                if i == 0:
                    if args.visdom_flag:
                        plot_vae_samples(vae, vis)
                        reco_indices = np.random.randint(0, x.shape[0], 3)
                        for index in reco_indices:
                            test_img = x[index, :]
                            reco_img = vae.reconstruct_img(test_img)
                            vis.image(test_img.reshape(
                                400, 400).detach().cpu().numpy(),
                                      opts={'caption': 'test image'})
                            vis.image(reco_img.reshape(
                                400, 400).detach().cpu().numpy(),
                                      opts={'caption': 'reconstructed image'})
            # report test diagnostics
            normalizer_test = len(test_loader.dataset)
            total_epoch_loss_test = test_loss / normalizer_test
            test_elbo.append(total_epoch_loss_test)
            print("[epoch %03d]  average test loss: %.4f" %
                  (epoch, total_epoch_loss_test))

    return vae, optimizer
예제 #24
0
def main(args):
    pyro.set_rng_seed(0)
    pyro.clear_param_store()
    pyro.enable_validation(__debug__)

    # load data
    if args.dataset == "dipper":
        capture_history_file = os.path.dirname(
            os.path.abspath(__file__)) + '/dipper_capture_history.csv'
    elif args.dataset == "vole":
        capture_history_file = os.path.dirname(
            os.path.abspath(__file__)) + '/meadow_voles_capture_history.csv'
    else:
        raise ValueError("Available datasets are \'dipper\' and \'vole\'.")

    capture_history = torch.tensor(
        np.genfromtxt(capture_history_file, delimiter=',')).float()[:, 1:]
    N, T = capture_history.shape
    print(
        "Loaded {} capture history for {} individuals collected over {} time periods."
        .format(args.dataset, N, T))

    if args.dataset == "dipper" and args.model in ["4", "5"]:
        sex_file = os.path.dirname(
            os.path.abspath(__file__)) + '/dipper_sex.csv'
        sex = torch.tensor(np.genfromtxt(sex_file, delimiter=',')).float()[:,
                                                                           1]
        print("Loaded dipper sex data.")
    elif args.dataset == "vole" and args.model in ["4", "5"]:
        raise ValueError(
            "Cannot run model_{} on meadow voles data, since we lack sex " +
            "information for these animals.".format(args.model))
    else:
        sex = None

    model = models[args.model]

    # we use poutine.block to only expose the continuous latent variables
    # in the models to AutoDiagonalNormal (all of which begin with 'phi'
    # or 'rho')
    def expose_fn(msg):
        return msg["name"][0:3] in ['phi', 'rho']

    # we use a mean field diagonal normal variational distributions (i.e. guide)
    # for the continuous latent variables.
    guide = AutoDiagonalNormal(poutine.block(model, expose_fn=expose_fn))

    # since we enumerate the discrete random variables,
    # we need to use TraceEnum_ELBO or TraceTMC_ELBO.
    optim = Adam({'lr': args.learning_rate})
    if args.tmc:
        elbo = TraceTMC_ELBO(max_plate_nesting=1)
        tmc_model = poutine.infer_config(model, lambda msg: {
            "num_samples": args.tmc_num_samples,
            "expand": False
        } if msg["infer"].get("enumerate", None) == "parallel" else {}
                                         )  # noqa: E501
        svi = SVI(tmc_model, guide, optim, elbo)
    else:
        elbo = TraceEnum_ELBO(max_plate_nesting=1,
                              num_particles=20,
                              vectorize_particles=True)
        svi = SVI(model, guide, optim, elbo)

    losses = []

    print(
        "Beginning training of model_{} with Stochastic Variational Inference."
        .format(args.model))

    for step in range(args.num_steps):
        loss = svi.step(capture_history, sex)
        losses.append(loss)
        if step % 20 == 0 and step > 0 or step == args.num_steps - 1:
            print("[iteration %03d] loss: %.3f" %
                  (step, np.mean(losses[-20:])))

    # evaluate final trained model
    elbo_eval = TraceEnum_ELBO(max_plate_nesting=1,
                               num_particles=2000,
                               vectorize_particles=True)
    svi_eval = SVI(model, guide, optim, elbo_eval)
    print("Final loss: %.4f" % svi_eval.evaluate_loss(capture_history, sex))
예제 #25
0
    total_epoch_loss_train = epoch_loss / normalizer_train

    train_elbo.append(-total_epoch_loss_train)

    # --------------------------Do testing for each epoch here--------------------------------
    # initialize loss accumulator
    test_loss = 0.
    # compute the loss over the entire test set
    for x_test in test_loader:
        # if on GPU put mini-batch into CUDA memory

        x_test = x_test[0].cuda()
        # compute ELBO estimate and accumulate loss
        test_loss += svi.evaluate_loss(
            x_test
        )  #Data entry point <---------------------------------Data Entry Point
    normalizer_test = len(test_loader.dataset)
    total_epoch_loss_test = test_loss / normalizer_test
    incept_score = 0

    # This loop fixes the limits for the random number generator
    #On first run the
    limits = np.zeros((2, d))
    for i in range(0, d):
        limits[0, i] = -4
        limits[1, i] = 4

    incept_score = inception_scoring(
        d, limits)  #Calls the inception score and calculates it.
def main_sVAE(arr):
    
    X_DIM = 10000
    Y_DIM = 2
    Z_DIM=16
    ALPHA_ENCO = int("".join(str(i) for i in arr[0:10]),2)
    BETA_ENCO = int("".join(str(i) for i in arr[10:18]),2)
    ALPHA_DECO = int("".join(str(i) for i in arr[18:28]),2)
    BETA_DECO = int("".join(str(i) for i in arr[28:37]),2)
    
    H_DIM_ENCO_1 = ALPHA_ENCO + BETA_ENCO
    
    H_DIM_ENCO_2 = ALPHA_ENCO
    
    H_DIM_DECO_1 = ALPHA_DECO
    
    H_DIM_DECO_2 = ALPHA_DECO + BETA_DECO
       
    print(str(H_DIM_ENCO_1))
    print(str(H_DIM_ENCO_2))
    print(str(H_DIM_DECO_1))
    print(str(H_DIM_DECO_2))
    print('-----------')
    
    

    # Run options
    LEARNING_RATE = 1.0e-3
    USE_CUDA = True

    # Run only for a single iteration for testing
    NUM_EPOCHS = 501
    TEST_FREQUENCY = 5

    train_loader,test_loader = dataloader_first()
    # clear param store
    pyro.clear_param_store()

    # setup the VAE
    vae = VAE(x_dim=X_DIM, y_dim=Y_DIM, h_dim_enco_1=H_DIM_ENCO_1, h_dim_enco_2=H_DIM_ENCO_2, h_dim_deco_1=H_DIM_DECO_1, h_dim_deco_2=H_DIM_DECO_1, z_dim=Z_DIM, use_cuda=USE_CUDA)

    # setup the optimizer
    adagrad_params = {"lr": 0.00003}
    optimizer = Adagrad(adagrad_params)


    svi = SVI(vae.model, vae.guide, optimizer, loss=Trace_ELBO())

    train_elbo = []
    test_elbo = []
    # training loop
    for epoch in range(NUM_EPOCHS):
        total_epoch_loss_train = train(svi, train_loader,  use_cuda=USE_CUDA)
    
        train_elbo.append(-total_epoch_loss_train)
        
        print("[epoch %03d]  average training loss: %.4f" % (epoch, total_epoch_loss_train))
    
        if epoch==500:
        # --------------------------Do testing for each epoch here--------------------------------
        # initialize loss accumulator
            test_loss = 0.
        # compute the loss over the entire test set
            for x_test,y_test in test_loader:
 
                x_test = x_test.cuda()
                y_test = y_test.cuda()
                # compute ELBO estimate and accumulate loss
                labels_y_test = torch.tensor(np.zeros((y_test.shape[0],2)))
                y_test_2=torch.Tensor.cpu(y_test.reshape(1,y_test.size()[0])[0]).numpy().astype(int)  
                labels_y_test=np.eye(2)[y_test_2]
                labels_y_test = torch.from_numpy(labels_y_test)
        
                test_loss += svi.evaluate_loss(x_test.reshape(-1,10000),labels_y_test.cuda().float()) #Data entry point <---------------------------------Data Entry Point
            
            normalizer_test = len(test_loader.dataset)
            total_epoch_loss_test = test_loss / normalizer_test
            print("[epoch %03d]  average training loss: %.4f" % (epoch, total_epoch_loss_test))
            return total_epoch_loss_test
예제 #27
0
def main(args):
    # clear param store
    pyro.clear_param_store()

    ### SETUP
    train_loader, test_loader = get_data()

    # setup the VAE
    vae = VAE(use_cuda=args.cuda)

    # setup the optimizer
    adam_args = {"lr": args.learning_rate}
    optimizer = Adam(adam_args)

    # setup the inference algorithm
    elbo = JitTrace_ELBO() if args.jit else Trace_ELBO()
    svi = SVI(vae.model, vae.guide, optimizer, loss=elbo)

    inputSize = 0

    # setup visdom for visualization
    if args.visdom_flag:
        vis = visdom.Visdom()

    train_elbo = []
    test_elbo = []

    for epoch in range(args.num_epochs):
        # initialize loss accumulator
        epoch_loss = 0.
        # do a training epoch over each mini-batch x returned
        # by the data loader

        for step, batch in enumerate(train_loader):
            x, adj = 0, 0
            # if on GPU put mini-batch into CUDA memory
            if args.cuda:
                x = batch['x'].cuda()
                adj = batch['edge_index'].cuda()
            else:

                x = batch['x']
                adj = batch['edge_index']
            print("x_shape", x.shape)
            print("adj_shape", adj.shape)

            inputSize = x.shape[0] * x.shape[1]
            epoch_loss += svi.step(x, adj)

        # report training diagnostics
        normalizer_train = len(train_loader.dataset)
        total_epoch_loss_train = epoch_loss / normalizer_train
        train_elbo.append(total_epoch_loss_train)
        print("[epoch %03d]  average training loss: %.4f" %
              (epoch, total_epoch_loss_train))

        if True:
            # if epoch % args.test_frequency == 0:
            # initialize loss accumulator
            test_loss = 0.
            # compute the loss over the entire test set
            for step, batch in enumerate(test_loader):
                x, adj = 0, 0
                # if on GPU put mini-batch into CUDA memory
                if args.cuda:
                    x = batch['x'].cuda()
                    adj = batch['edge_index'].cuda()
                else:
                    x = batch['x']
                    adj = batch['edge_index']

                # compute ELBO estimate and accumulate loss
                # print('before evaluating test loss')
                test_loss += svi.evaluate_loss(x, adj)
                # print('after evaluating test loss')

                # pick three random test images from the first mini-batch and
                # visualize how well we're reconstructing them
                # if i == 0:
                #     if args.visdom_flag:
                #         plot_vae_samples(vae, vis)
                #         reco_indices = np.random.randint(0, x.shape[0], 3)
                #         for index in reco_indices:
                #             test_img = x[index, :]
                #             reco_img = vae.reconstruct_img(test_img)
                #             vis.image(test_img.reshape(28, 28).detach().cpu().numpy(),
                #                       opts={'caption': 'test image'})
                #             vis.image(reco_img.reshape(28, 28).detach().cpu().numpy(),
                #                       opts={'caption': 'reconstructed image'})

                if args.visdom_flag:
                    plot_vae_samples(vae, vis)
                    reco_indices = np.random.randint(0, x.shape[0], 3)
                    for index in reco_indices:
                        test_img = x[index, :]
                        reco_img = vae.reconstruct_graph(test_img)
                        vis.image(test_img.reshape(28,
                                                   28).detach().cpu().numpy(),
                                  opts={'caption': 'test image'})
                        vis.image(reco_img.reshape(28,
                                                   28).detach().cpu().numpy(),
                                  opts={'caption': 'reconstructed image'})

            # report test diagnostics
            normalizer_test = len(test_loader.dataset)
            total_epoch_loss_test = test_loss / normalizer_test
            test_elbo.append(total_epoch_loss_test)
            print("[epoch %03d]  average test loss: %.4f" %
                  (epoch, total_epoch_loss_test))

        # if epoch == args.tsne_iter:
        #     mnist_test_tsne(vae=vae, test_loader=test_loader)
        #     plot_llk(np.array(train_elbo), np.array(test_elbo))

    if args.save:
        torch.save(
            {
                'epoch': epoch,
                'model_state_dict': vae.state_dict(),
                'optimzier_state_dict': optimizer.get_state(),
                'train_loss': total_epoch_loss_train,
                'test_loss': total_epoch_loss_test
            }, 'vae_' + args.name + str(args.time) + '.pt')

    return vae
예제 #28
0
def train(device, dataloaders, dataset_sizes, learning_rate, num_epochs,
          early_stop_patience, model_path, pre_trained_baseline_net):

    # clear param store
    pyro.clear_param_store()

    cvae_net = CVAE(200, 500, 500, pre_trained_baseline_net)
    cvae_net.to(device)
    optimizer = pyro.optim.Adam({"lr": learning_rate})
    svi = SVI(cvae_net.model, cvae_net.guide, optimizer, loss=Trace_ELBO())

    best_loss = np.inf
    early_stop_count = 0
    Path(model_path).parent.mkdir(parents=True, exist_ok=True)

    # to track evolution
    val_inp, digits = get_val_images(num_quadrant_inputs=1,
                                     num_images=30, shuffle=False)
    val_inp = val_inp.to(device)
    samples = []
    losses = []

    for epoch in range(num_epochs):
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            running_loss = 0.0

            # Iterate over data.
            bar = tqdm(dataloaders[phase],
                       desc='CVAE Epoch {} {}'.format(epoch, phase).ljust(20))
            for i, batch in enumerate(bar):
                inputs = batch['input'].to(device)
                outputs = batch['output'].to(device)

                if phase == 'train':
                    loss = svi.step(inputs, outputs) / inputs.size(0)
                else:
                    loss = svi.evaluate_loss(inputs, outputs) / inputs.size(0)

                # statistics
                running_loss += loss
                if i % 10 == 0:
                    bar.set_postfix(loss='{:.2f}'.format(loss),
                                    early_stop_count=early_stop_count)

                # track evolution
                if phase == 'train':
                    df = pd.DataFrame(columns=['epoch', 'loss'])
                    df.loc[0] = [epoch + float(i) / len(dataloaders[phase]), loss]
                    losses.append(df)
                    if i % 47 == 0:  # every 10% of training (469)
                        dfs = predict_samples(
                            val_inp, digits, cvae_net,
                            epoch + float(i) / len(dataloaders[phase]),
                        )
                        samples.append(dfs)

            epoch_loss = running_loss / dataset_sizes[phase]
            # deep copy the model
            if phase == 'val':
                if epoch_loss < best_loss:
                    best_loss = epoch_loss
                    cvae_net.save(model_path)
                    early_stop_count = 0
                else:
                    early_stop_count += 1

        if early_stop_count >= early_stop_patience:
            break

    # Save model weights
    cvae_net.load(model_path)

    # record evolution
    samples = pd.concat(samples, axis=0, ignore_index=True)
    samples.to_csv('samples.csv', index=False)

    losses = pd.concat(losses, axis=0, ignore_index=True)
    losses.to_csv('losses.csv', index=False)

    return cvae_net
예제 #29
0
def main(args):
    # Init tensorboard
    writer = SummaryWriter('./runs/' + args.runname + str(args.trialnumber))
    model_name = 'VanillaDMM'

    # Set evaluation log file
    evaluation_logpath = './logs/{}/evaluation_result.log'.format(
        model_name.lower())
    log_evaluation(evaluation_logpath,
                   'Evaluation Trial - {}\n'.format(args.trialnumber))

    # Constants
    time_length = 30
    input_length_for_pred = 20
    pred_length = time_length - input_length_for_pred
    train_batch_size = 16
    valid_batch_size = 1

    # For model
    input_channels = 1
    z_channels = 50
    emission_channels = [64, 32]
    transition_channels = 64
    encoder_channels = [32, 64]
    rnn_input_dim = 256
    rnn_channels = 128
    kernel_size = 3
    pred_length = 0

    # Device checking
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")

    # Make dataset
    logging.info("Generate data")
    train_datapath = args.datapath / 'train'
    valid_datapath = args.datapath / 'valid'
    train_dataset = DiffusionDataset(train_datapath)
    valid_dataset = DiffusionDataset(valid_datapath)

    # Create data loaders from pickle data
    logging.info("Generate data loaders")
    train_dataloader = DataLoader(
        train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=8)
    valid_dataloader = DataLoader(
        valid_dataset, batch_size=valid_batch_size, num_workers=4)

    # Training parameters
    width = 100
    height = 100
    input_dim = width * height

    # Create model
    logging.warning("Generate model")
    logging.warning(input_dim)
    pred_input_dim = 10
    dmm = DMM(input_channels=input_channels, z_channels=z_channels, emission_channels=emission_channels,
              transition_channels=transition_channels, encoder_channels=encoder_channels, rnn_input_dim=rnn_input_dim, rnn_channels=rnn_channels, kernel_size=kernel_size, height=height, width=width, pred_input_dim=pred_input_dim, num_layers=1, rnn_dropout_rate=0.0,
              num_iafs=0, iaf_dim=50, use_cuda=use_cuda)

    # Initialize model
    logging.info("Initialize model")
    epochs = args.endepoch
    learning_rate = 0.0001
    beta1 = 0.9
    beta2 = 0.999
    clip_norm = 10.0
    lr_decay = 1.0
    weight_decay = 0
    adam_params = {"lr": learning_rate, "betas": (beta1, beta2),
                   "clip_norm": clip_norm, "lrd": lr_decay,
                   "weight_decay": weight_decay}
    adam = ClippedAdam(adam_params)
    elbo = Trace_ELBO()
    svi = SVI(dmm.model, dmm.guide, adam, loss=elbo)

    # saves the model and optimizer states to disk
    save_model = Path('./checkpoints/' + model_name)

    def save_checkpoint(epoch):
        save_dir = save_model / '{}.model'.format(epoch)
        save_opt_dir = save_model / '{}.opt'.format(epoch)
        logging.info("saving model to %s..." % save_dir)
        torch.save(dmm.state_dict(), save_dir)
        logging.info("saving optimizer states to %s..." % save_opt_dir)
        adam.save(save_opt_dir)
        logging.info("done saving model and optimizer checkpoints to disk.")

    # Starting epoch
    start_epoch = args.startepoch

    # loads the model and optimizer states from disk
    if start_epoch != 0:
        load_opt = './checkpoints/' + model_name + \
            '/e{}-i188-opt-tn{}.opt'.format(start_epoch - 1, args.trialnumber)
        load_model = './checkpoints/' + model_name + \
            '/e{}-i188-tn{}.pt'.format(start_epoch - 1, args.trialnumber)

        def load_checkpoint():
            # assert exists(load_opt) and exists(load_model), \
            #     "--load-model and/or --load-opt misspecified"
            logging.info("loading model from %s..." % load_model)
            dmm.load_state_dict(torch.load(load_model, map_location=device))
            # logging.info("loading optimizer states from %s..." % load_opt)
            # adam.load(load_opt)
            # logging.info("done loading model and optimizer states.")

        if load_model != '':
            logging.info('Load checkpoint')
            load_checkpoint()

    # Validation only?
    validation_only = args.validonly

    # Train the model
    if not validation_only:
        logging.info("Training model")
        annealing_epochs = 1000
        minimum_annealing_factor = 0.2
        N_train_size = 3000
        N_mini_batches = int(N_train_size / train_batch_size +
                             int(N_train_size % train_batch_size > 0))
        for epoch in tqdm(range(start_epoch, epochs), desc='Epoch', leave=True):
            r_loss_train = 0
            dmm.train(True)
            idx = 0
            mov_avg_loss = 0
            mov_data_len = 0
            for which_mini_batch, data in enumerate(tqdm(train_dataloader, desc='Train', leave=True)):
                if annealing_epochs > 0 and epoch < annealing_epochs:
                    # compute the KL annealing factor approriate for the current mini-batch in the current epoch
                    min_af = minimum_annealing_factor
                    annealing_factor = min_af + (1.0 - min_af) * \
                        (float(which_mini_batch + epoch * N_mini_batches + 1) /
                         float(annealing_epochs * N_mini_batches))
                else:
                    # by default the KL annealing factor is unity
                    annealing_factor = 1.0

                data['observation'] = normalize(
                    data['observation'].unsqueeze(2).to(device))
                batch_size, length, _, w, h = data['observation'].shape
                data_reversed = reverse_sequences(data['observation'])
                data_mask = torch.ones(
                    batch_size, length, input_channels, w, h).cuda()

                loss = svi.step(data['observation'],
                                data_reversed, data_mask, annealing_factor)

                # Running losses
                mov_avg_loss += loss
                mov_data_len += batch_size

                r_loss_train += loss
                idx += 1

            # Average losses
            train_loss_avg = r_loss_train / (len(train_dataset) * time_length)
            writer.add_scalar('Loss/train', train_loss_avg, epoch)
            logging.info("Epoch: %d, Training loss: %1.5f",
                         epoch, train_loss_avg)

            # # Time to time evaluation
            if epoch == epochs - 1:
                for temp_pred_length in [20]:
                    r_loss_valid = 0
                    r_loss_loc_valid = 0
                    r_loss_scale_valid = 0
                    r_loss_latent_valid = 0
                    dmm.train(False)
                    val_pred_length = temp_pred_length
                    val_pred_input_length = 10
                    with torch.no_grad():
                        for i, data in enumerate(tqdm(valid_dataloader, desc='Eval', leave=True)):
                            data['observation'] = normalize(
                                data['observation'].unsqueeze(2).to(device))
                            batch_size, length, _, w, h = data['observation'].shape
                            data_reversed = reverse_sequences(
                                data['observation'])
                            data_mask = torch.ones(
                                batch_size, length, input_channels, w, h).cuda()

                            pred_tensor = data['observation'][:,
                                                              :input_length_for_pred, :, :, :]
                            pred_tensor_reversed = reverse_sequences(
                                pred_tensor)
                            pred_tensor_mask = torch.ones(
                                batch_size, input_length_for_pred, input_channels, w, h).cuda()

                            ground_truth = data['observation'][:,
                                                               input_length_for_pred:, :, :, :]

                            val_nll = svi.evaluate_loss(
                                data['observation'], data_reversed, data_mask)

                            preds, _, loss_loc, loss_scale = do_prediction_rep_inference(
                                dmm, pred_tensor_mask, val_pred_length, val_pred_input_length, data['observation'])

                            ground_truth = denormalize(
                                data['observation'].squeeze().cpu().detach()
                            )
                            pred_with_input = denormalize(
                                torch.cat(
                                    [data['observation'][:, :-val_pred_length, :, :, :].squeeze(),
                                     preds.squeeze()], dim=0
                                ).cpu().detach()
                            )

                            # Running losses
                            r_loss_valid += val_nll
                            r_loss_loc_valid += loss_loc
                            r_loss_scale_valid += loss_scale

                    # Average losses
                    valid_loss_avg = r_loss_valid / \
                        (len(valid_dataset) * time_length)
                    valid_loss_loc_avg = r_loss_loc_valid / \
                        (len(valid_dataset) * val_pred_length * width * height)
                    valid_loss_scale_avg = r_loss_scale_valid / \
                        (len(valid_dataset) * val_pred_length * width * height)
                    writer.add_scalar('Loss/test', valid_loss_avg, epoch)
                    writer.add_scalar(
                        'Loss/test_obs', valid_loss_loc_avg, epoch)
                    writer.add_scalar('Loss/test_scale',
                                      valid_loss_scale_avg, epoch)
                    logging.info("Validation loss: %1.5f", valid_loss_avg)
                    logging.info("Validation obs loss: %1.5f",
                                 valid_loss_loc_avg)
                    logging.info("Validation scale loss: %1.5f",
                                 valid_loss_scale_avg)
                    log_evaluation(evaluation_logpath, "Validation obs loss for {}s pred {}: {}\n".format(
                        val_pred_length, args.trialnumber, valid_loss_loc_avg))
                    log_evaluation(evaluation_logpath, "Validation scale loss for {}s pred {}: {}\n".format(
                        val_pred_length, args.trialnumber, valid_loss_scale_avg))

            # Save model
            if epoch % 50 == 0 or epoch == epochs - 1:
                torch.save(dmm.state_dict(), args.modelsavepath / model_name /
                           'e{}-i{}-tn{}.pt'.format(epoch, idx, args.trialnumber))
                adam.save(args.modelsavepath / model_name /
                          'e{}-i{}-opt-tn{}.opt'.format(epoch, idx, args.trialnumber))

    # Last validation after training
    test_samples_indices = range(100)
    total_n = 0
    if validation_only:
        r_loss_loc_valid = 0
        r_loss_scale_valid = 0
        r_loss_latent_valid = 0
        dmm.train(False)
        val_pred_length = args.validpredlength
        val_pred_input_length = 10
        with torch.no_grad():
            for i in tqdm(test_samples_indices, desc='Valid', leave=True):
                # Data processing
                data = valid_dataset[i]
                if torch.isnan(torch.sum(data['observation'])):
                    print("Skip {}".format(i))
                    continue
                else:
                    total_n += 1
                data['observation'] = normalize(
                    data['observation'].unsqueeze(0).unsqueeze(2).to(device))
                batch_size, length, _, w, h = data['observation'].shape
                data_reversed = reverse_sequences(data['observation'])
                data_mask = torch.ones(
                    batch_size, length, input_channels, w, h).to(device)

                # Prediction
                pred_tensor_mask = torch.ones(
                    batch_size, input_length_for_pred, input_channels, w, h).to(device)
                preds, _, loss_loc, loss_scale = do_prediction_rep_inference(
                    dmm, pred_tensor_mask, val_pred_length, val_pred_input_length, data['observation'])

                ground_truth = denormalize(
                    data['observation'].squeeze().cpu().detach()
                )
                pred_with_input = denormalize(
                    torch.cat(
                        [data['observation'][:, :-val_pred_length, :, :, :].squeeze(),
                         preds.squeeze()], dim=0
                    ).cpu().detach()
                )

                # Save samples
                if i < 5:
                    save_dir_samples = Path('./samples/more_variance_long')
                    with open(save_dir_samples / '{}-gt-test.pkl'.format(i), 'wb') as fout:
                        pickle.dump(ground_truth, fout)
                    with open(save_dir_samples / '{}-vanilladmm-pred-test.pkl'.format(i), 'wb') as fout:
                        pickle.dump(pred_with_input, fout)

                # Running losses
                r_loss_loc_valid += loss_loc
                r_loss_scale_valid += loss_scale
                r_loss_latent_valid += np.sum((preds.squeeze().detach().cpu().numpy(
                ) - data['latent'][time_length - val_pred_length:, :, :].detach().cpu().numpy()) ** 2)

        # Average losses
        test_samples_indices = range(total_n)
        print(total_n)
        valid_loss_loc_avg = r_loss_loc_valid / \
            (total_n * val_pred_length * width * height)
        valid_loss_scale_avg = r_loss_scale_valid / \
            (total_n * val_pred_length * width * height)
        valid_loss_latent_avg = r_loss_latent_valid / \
            (total_n * val_pred_length * width * height)
        logging.info("Validation obs loss for %ds pred VanillaDMM: %f",
                     val_pred_length, valid_loss_loc_avg)
        logging.info("Validation latent loss: %f", valid_loss_latent_avg)

        with open('VanillaDMMResult.log', 'a+') as fout:
            validation_log = 'Pred {}s VanillaDMM: {}\n'.format(
                val_pred_length, valid_loss_loc_avg)
            fout.write(validation_log)
예제 #30
0
def train():
    parser = argparse.ArgumentParser(description='Train VAE.')
    parser.add_argument('-c', '--config', default='train_config.json', help='Config file.')
    args = parser.parse_args()
    print(args)
    c = json.load(open(args.config))
    print(c)

    pyro.clear_param_store()

    # TODO: Move to config file.
    lookback = 50
    max_n_files = None

    train_start_date = datetime.strptime(c['train_start_date'], '%Y/%m/%d')
    train_end_date = datetime.strptime(c['train_end_date'], '%Y/%m/%d')
    val_start_date = datetime.strptime(c['val_start_date'], '%Y/%m/%d')
    val_end_date = datetime.strptime(c['val_end_date'], '%Y/%m/%d')
    min_sequence_length_train = 2 * (c['series_length'] + lookback)
    min_sequence_length_test = 2 * (c['series_length'] + lookback)

    out_path = Path(c['out_dir'])
    out_path.mkdir(exist_ok=True)

    dataset_train = create_ticker_dataset(c['in_dir'], c['series_length'], lookback, min_sequence_length_train,
                                          start_date=train_start_date, end_date=train_end_date,
                                          normalised_returns=c['normalised_returns'], max_n_files=max_n_files)
    dataset_val = create_ticker_dataset(c['in_dir'], c['series_length'], lookback, min_sequence_length_test,
                                        start_date=val_start_date, end_date=val_end_date, fixed_start_date=True,
                                        normalised_returns=c['normalised_returns'], max_n_files=max_n_files)
    train_loader = DataLoader(dataset_train, batch_size=c['batch_size'], shuffle=True, num_workers=0, drop_last=True)
    val_loader = DataLoader(dataset_val, batch_size=c['batch_size'], shuffle=False, num_workers=0, drop_last=True)

    N_train_data = len(dataset_train)
    N_val_data = len(dataset_val)
    N_mini_batches = N_train_data // c['batch_size']
    N_train_time_slices = c['batch_size'] * N_mini_batches

    print(f'N_train_data: {N_train_data}, N_val_data: {N_val_data}')

    # setup the VAE
    vae = VAE(c['series_length'], z_dim=c['z_dim'], hidden_dims=c['hidden_dims'], use_cuda=c['cuda'])

    # setup the optimizer
    adam_args = {"lr": c['learning_rate']}
    optimizer = Adam(adam_args)

    # setup the inference algorithm
    elbo = JitTrace_ELBO() if c['jit'] else Trace_ELBO()
    svi = SVI(vae.model, vae.guide, optimizer, loss=elbo)

    if c['checkpoint_load']:
        checkpoint = torch.load(c['checkpoint_load'])
        vae.load_state_dict(checkpoint['model_state_dict'])
        # optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    train_elbo = []
    val_elbo = []
    # training loop
    for epoch in range(c['n_epochs']):
        # initialize loss accumulator
        epoch_loss = 0.
        # do a training epoch over each mini-batch x returned
        # by the data loader
        for batch in train_loader:
            x = batch['series']
            # if on GPU put mini-batch into CUDA memory
            if c['cuda']:
                x = x.cuda()
            # do ELBO gradient and accumulate loss
            epoch_loss += svi.step(x.float())

        # report training diagnostics
        normalizer_train = len(train_loader.dataset)
        total_epoch_loss_train = epoch_loss / normalizer_train
        train_elbo.append(total_epoch_loss_train)
        print("[epoch %03d]  average training loss: %.4f" % (epoch, total_epoch_loss_train))

        torch.save({
            'epoch': epoch,
            'model_state_dict': vae.state_dict(),
            # 'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': epoch_loss
        }, out_path / c['checkpoint_save'].format(epoch))

        if epoch % c['val_frequency'] == 0:
            # initialize loss accumulator
            val_loss = 0.
            # compute the loss over the entire test set
            for i, batch in enumerate(val_loader):
                x = batch['series']
                # if on GPU put mini-batch into CUDA memory
                if c['cuda']:
                    x = x.cuda()
                x = x.float()
                # compute ELBO estimate and accumulate loss
                val_loss += svi.evaluate_loss(x)

                if i == 0:
                    # Visualise first batch.
                    x_reconst = vae.reconstruct_img(x)
                    x = x.cpu().numpy()
                    x_reconst = x_reconst.cpu().detach().numpy()

                    n = min(5, x.shape[0])
                    fig, axes = plt.subplots(n, 1, squeeze=False)
                    for s in range(n):
                        ax = axes[s, 0]
                        ax.plot(x[s])
                        ax.plot(x_reconst[s])
                    fig.savefig(out_path / f'val_{epoch:03d}.png')
                    plt.close(fig)

            # report test diagnostics
            normalizer_val = len(val_loader.dataset)
            total_epoch_loss_val = val_loss / normalizer_val
            val_elbo.append(total_epoch_loss_val)
            print("[epoch %03d]  average val loss: %.4f" % (epoch, total_epoch_loss_val))

            # t-SNE.
            all_z_latents = []
            for batch in val_loader:
                x = batch['series']
                # z_latents = minibatch_inference(dmm, test_batch)
                # z_latents = encode_x_to_z(dmm, test_batch, sample_z_t=False)
                # x, z, x_reconst = test_minibatch(dmm, test_batch, args, sample_z=True)

                if c['cuda']:
                    x = x.cuda()

                z_loc, z_scale, z = vae.encode_x(x.float())
                all_z_latents.append(z.cpu().numpy())

            # all_latents = torch.cat(all_z_latents, dim=0)
            all_latents = np.concatenate(all_z_latents, axis=0)

            # Run t-SNE with 2 output dimensions.
            from sklearn.manifold import TSNE
            model_tsne = TSNE(n_components=2, random_state=0)
            # z_states = all_latents.detach().cpu().numpy()
            z_states = all_latents
            z_embed = model_tsne.fit_transform(z_states)
            # Plot t-SNE embedding.
            fig = plt.figure()
            plt.scatter(z_embed[:, 0], z_embed[:, 1], s=10)

            fig.savefig(out_path / f'tsne_{epoch:03d}.png')
            plt.close(fig)

    print('Finished training.')
class SVILossCompute(LossCompute):
    """A simple loss compute and train function."""
    def __init__(self,
                 generator,
                 model,
                 guide,
                 optimizer,
                 optim_params,
                 elbo_type='TraceELBO',
                 num_particles=1,
                 eval=False,
                 step=1. / 30000.0,
                 aux_model=None,
                 aux_guide=None):
        optim = self.getOptimizer(optimizer, optim_params)
        elbo = self.getELBO(elbo_type, num_particles)
        criterion = SVI(model, guide, optim, loss=elbo)
        super(SVILossCompute, self).__init__(generator, criterion, optim)

        self.eval = eval
        self.guide = guide
        self.model = model
        self.kl_anneal = step
        self.step = step
        self.aux_criterion = None
        #hack to get only KL term
        self.model_no_obs = poutine.block(model, hide=["preds", 'lm_preds'])
        optim = self.getOptimizer(optimizer, optim_params)
        elbo = self.getELBO(elbo_type, num_particles)
        self.kl_eval_svi = SVI(self.model_no_obs, self.guide, optim, elbo)

        #aux model and guide are for calculating additional loss terms...
        if aux_model is not None and aux_guide is not None:
            print('setting aux loss, ')
            logging.info("setting aux loss")
            optim = self.getOptimizer(optimizer, optim_params)
            elbo = self.getELBO(elbo_type, num_particles)
            self.aux_criterion = SVI(aux_model, aux_guide, optim, loss=elbo)

        self.aux_guide = aux_guide
        self.aux_model = aux_model

    def setKLAnnealingSchedule(self, step_size, kl_anneal):
        """
            step_size: how much to increase weight of KL term at each step
            beta: current weight of kl term
        """
        self.step = step_size
        self.kl_anneal = kl_anneal

    def getKLAnnealingSchedule(self):
        return self.step, self.kl_anneal

    def getOptimizerStateDict(self):
        return self.criterion.optim.get_state()

    def setOptimizerStateDict(self, state_dict):
        return self.criterion.optim.set_state(state_dict)

    def getELBO(self, elbo_type, particles):
        if elbo_type == 'TraceELBO':
            return Trace_ELBO(num_particles=particles)
        elif elbo_type == "MeanFieldELBO":
            return TraceMeanField_ELBO(num_particles=particles)
        else:
            raise ValueError("{} ELBO not supported".format(elbo_type))

    def getOptimizer(self, optimizer, optim_params):
        if optimizer == 'clippedadam':
            return PyroOptim(ClippedAdam, optim_params)
        elif optimizer == 'adadelta':
            #not 100% on this but pretty sure ** "dereferences" the dictionary
            return Adadelta(optim_params)
        elif optimizer == 'clippedadadelta':
            #since it's custom, gotta set it up in the way Pyro expects
            return PyroOptim(ClippedAdadelta, optim_params)
        else:
            raise ValueError("{} optimizer not supported".format(optimizer))

    def __call__(self, src, trg, src_mask, trg_mask, src_lengths, trg_lengths,
                 trg_y, norm):
        #x = self.generator(x)
        kl_anneal = self.kl_anneal
        if self.eval:
            #you could also do .eval_loss or something but this allows a bit more probing of results
            with torch.no_grad():
                elbo = self.criterion.evaluate_loss(
                    src, trg, src_mask, trg_mask, src_lengths, trg_lengths,
                    trg_y) * norm
                kl_term = self.kl_eval_svi.evaluate_loss(
                    src, trg, src_mask, trg_mask, src_lengths, trg_lengths,
                    trg_y) * norm
                nll = elbo - kl_term

                def torch_item(x):
                    return x if isinstance(x, numbers.Number) else x.item()

            if self.aux_criterion is not None:
                aux_loss = self.aux_criterion.evaluate_loss(
                    src, trg, src_mask, trg_mask, src_lengths, trg_lengths,
                    trg_y)
            else:
                aux_loss = -1.0

            loss = {
                'elbo': elbo,
                'nll': nll,
                'approx_kl': kl_term,
                'aux_loss': aux_loss
            }

        else:
            loss = self.criterion.step(src, trg, src_mask, trg_mask,
                                       src_lengths, trg_lengths, trg_y,
                                       kl_anneal)
            if self.aux_criterion is not None:
                aux_loss = self.aux_criterion.step(src, trg, src_mask,
                                                   trg_mask, src_lengths,
                                                   trg_lengths, trg_y,
                                                   kl_anneal)
            loss = loss * norm
            self.kl_anneal = min(self.kl_anneal + self.step, 1.0)

        return loss
예제 #32
0
파일: vae.py 프로젝트: lewisKit/pyro
def main(args):
    # setup MNIST data loaders
    # train_loader, test_loader
    train_loader, test_loader = setup_data_loaders(MNIST, use_cuda=args.cuda, batch_size=256)

    # setup the VAE
    vae = VAE(use_cuda=args.cuda)

    # setup the optimizer
    adam_args = {"lr": args.learning_rate}
    optimizer = Adam(adam_args)

    # setup the inference algorithm
    svi = SVI(vae.model, vae.guide, optimizer, loss=Trace_ELBO())

    # setup visdom for visualization
    if args.visdom_flag:
        vis = visdom.Visdom()

    train_elbo = []
    test_elbo = []
    # training loop
    for epoch in range(args.num_epochs):
        # initialize loss accumulator
        epoch_loss = 0.
        # do a training epoch over each mini-batch x returned
        # by the data loader
        for _, (x, _) in enumerate(train_loader):
            # if on GPU put mini-batch into CUDA memory
            if args.cuda:
                x = x.cuda()
            # do ELBO gradient and accumulate loss
            epoch_loss += svi.step(x)

        # report training diagnostics
        normalizer_train = len(train_loader.dataset)
        total_epoch_loss_train = epoch_loss / normalizer_train
        train_elbo.append(total_epoch_loss_train)
        print("[epoch %03d]  average training loss: %.4f" % (epoch, total_epoch_loss_train))

        if epoch % args.test_frequency == 0:
            # initialize loss accumulator
            test_loss = 0.
            # compute the loss over the entire test set
            for i, (x, _) in enumerate(test_loader):
                # if on GPU put mini-batch into CUDA memory
                if args.cuda:
                    x = x.cuda()
                # compute ELBO estimate and accumulate loss
                test_loss += svi.evaluate_loss(x)

                # pick three random test images from the first mini-batch and
                # visualize how well we're reconstructing them
                if i == 0:
                    if args.visdom_flag:
                        plot_vae_samples(vae, vis)
                        reco_indices = np.random.randint(0, x.size(0), 3)
                        for index in reco_indices:
                            test_img = x[index, :]
                            reco_img = vae.reconstruct_img(test_img)
                            vis.image(test_img.reshape(28, 28).detach().cpu().numpy(),
                                      opts={'caption': 'test image'})
                            vis.image(reco_img.reshape(28, 28).detach().cpu().numpy(),
                                      opts={'caption': 'reconstructed image'})

            # report test diagnostics
            normalizer_test = len(test_loader.dataset)
            total_epoch_loss_test = test_loss / normalizer_test
            test_elbo.append(total_epoch_loss_test)
            print("[epoch %03d]  average test loss: %.4f" % (epoch, total_epoch_loss_test))

        if epoch == args.tsne_iter:
            mnist_test_tsne(vae=vae, test_loader=test_loader)
            plot_llk(np.array(train_elbo), np.array(test_elbo))

    return vae