Ejemplo n.º 1
0
    def log_prior(self, x):
        PI  = Laplace(5, 1.0).log_prob(x[:, 0])
        PI += Laplace(-2, 1.0).log_prob(x[:, 1])

        PI += Normal(torch.tanh(x[:, 0] + x[:, 1] - 2.8), 0.1).log_prob(x[:, 2])
        PI += Normal(x[:, 0] * x[:, 1], 0.1).log_prob(x[:, 3])

        PI += Normal(7.0, 2.0).log_prob(x[:, 4])
        PI += Normal(torch.tanh(x[:, 3] + x[:, 4]), 0.1).log_prob(x[:, 5])

        return PI
Ejemplo n.º 2
0
    def sample(self):
        x0 = Laplace(5, 1.0).sample()
        x1 = Laplace(-2, 1.0).sample()

        x2 = Normal(torch.tanh(x0 + x1 - 2.8), 0.1).sample()
        x3 = Normal(x0 * x1, 0.1).sample()

        x4 = Normal(7.0, 2.0).sample()
        x5 = Normal(torch.tanh(x3 + x4), 0.1).sample()

        y0 = Normal(x3, 0.1).sample()
        y1 = Normal(x5, 0.1).sample()

        return torch.tensor([x0, x1, x2, x3, x4, x5]), torch.tensor([y0, y1])
Ejemplo n.º 3
0
    def update_hidden(self):
        self._iter = self._iter + 1
        self.update_decay()

        wlasso, wridge, num_pars = 0, 0, 0
        for name, param in self.named_parameters():
            if not name.endswith('weight'):
                continue
            a_star = Normal(torch.tensor([0.0]), np.sqrt(
                self.v1)).log_prob(param).exp() * self.theta[name]
            b_star = Laplace(torch.tensor(
                [0.0]), self.v0).log_prob(param).exp() * (1 - self.theta[name])
            self.p_star[name] = (1 - self.decay) * self.p_star[
                name] + self.decay * a_star / (a_star + b_star)
            self.d_star0[name] = (
                1 - self.decay) * self.d_star0[name] + self.decay * (
                    (1 - self.p_star[name]) / self.v0)
            self.d_star1[name] = (1 - self.decay) * self.d_star1[
                name] + self.decay * (self.p_star[name] / self.v1)
            self.theta[name] = (1 - self.decay) * self.theta[name] \
                    + self.decay * ((self.p_star[name].sum() + self.a - 1) / (self.a + self.b + np.prod(param.data.size()) - 2)).item()
            wlasso += (param.abs() * self.d_star0[name]).sum().item()
            wridge += (param.pow(2) * self.d_star1[name]).sum().item()
            if self.thres > 0 and self._iter >= self.warm:
                """ one-shot mask """
                if self._iter == self.warm:
                    self.mask[name] = self.p_star[name] < self.thres
                param.data[self.mask[name]] = 0

        wridge = 4 * (self.N + self.num_pars + self.nu) * (
            self.likelihood + wridge + self.nu * self.lamda)
        new_sd = (wlasso + np.sqrt(wlasso**2 + wridge)) / (
            self.N + self.num_pars + self.nu) / 2
        self.sd = np.sqrt((1 - self.decay) * self.sd**2 + self.decay *
                          (new_sd**2))
Ejemplo n.º 4
0
 def test_laplace_shape_tensor_params(self):
     laplace = Laplace(torch.Tensor([0, 0]), torch.Tensor([1, 1]))
     self.assertEqual(laplace._batch_shape, torch.Size((2,)))
     self.assertEqual(laplace._event_shape, torch.Size(()))
     self.assertEqual(laplace.sample().size(), torch.Size((2,)))
     self.assertEqual(laplace.sample((3, 2)).size(), torch.Size((3, 2, 2)))
     self.assertEqual(laplace.log_prob(self.tensor_sample_1).size(), torch.Size((3, 2)))
     self.assertRaises(ValueError, laplace.log_prob, self.tensor_sample_2)
    def _init_and_train_flow(data,
                             nh,
                             l,
                             prior_dist,
                             epochs,
                             device,
                             opt_method='adam',
                             verbose=False):
        # init and save 2 normalizing flows, 1 for each direction
        d = data.shape[1]
        if d > 2:
            print('using higher D implementation')
            affine_flow = AffineFullFlowGeneral
        else:
            affine_flow = AffineFullFlow
        if prior_dist == 'laplace':
            prior = Laplace(torch.zeros(d), torch.ones(d))
        else:
            prior = TransformedDistribution(
                Uniform(torch.zeros(d), torch.ones(d)),
                SigmoidTransform().inv)
        flows = [
            affine_flow(dim=d, nh=nh, parity=False, net_class=MLP1layer)
            for _ in range(l)
        ]
        flow = NormalizingFlowModel(prior, flows).to(device)

        dset = CustomSyntheticDatasetDensity(data.astype(np.float32),
                                             device=device)
        train_loader = DataLoader(dset, shuffle=True, batch_size=128)
        optimizer = optim.Adam(flow.parameters(), lr=1e-4, weight_decay=1e-5)
        if opt_method == 'scheduler':
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                             factor=0.1,
                                                             patience=3,
                                                             verbose=verbose)

        flow.train()
        loss_vals = []
        for e in range(epochs):
            loss_val = 0
            for _, x in enumerate(train_loader):
                x.to(device)
                # compute loss
                _, prior_logprob, log_det = flow(x)
                loss = -torch.sum(prior_logprob + log_det)
                loss_val += loss.item()
                # optimize
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            if opt_method == 'scheduler':
                scheduler.step(loss_val / len(train_loader))
            if verbose:
                print('epoch {}/{} \tloss: {}'.format(e, epochs, loss_val))
            loss_vals.append(loss_val)
        return flow, loss_vals
Ejemplo n.º 6
0
 def test_laplace_shape_scalar_params(self):
     laplace = Laplace(0, 1)
     self.assertEqual(laplace._batch_shape, torch.Size())
     self.assertEqual(laplace._event_shape, torch.Size())
     self.assertEqual(laplace.sample().size(), torch.Size((1,)))
     self.assertEqual(laplace.sample((3, 2)).size(), torch.Size((3, 2)))
     self.assertRaises(ValueError, laplace.log_prob, self.scalar_sample)
     self.assertEqual(laplace.log_prob(self.tensor_sample_1).size(), torch.Size((3, 2)))
     self.assertEqual(laplace.log_prob(self.tensor_sample_2).size(), torch.Size((3, 2, 3)))
Ejemplo n.º 7
0
    def update_hidden(self, prune=False, adaptive_sparse=False):
        self.dcoef['t'] = self.dcoef['t'] + 1.
        self.adaptive_sparse = self.target_sparse * (
            1 - self.cut**(self.dcoef['t'] / self.gap))
        self.update_decay()

        sparse_items, wlasso, wridge = 0, 0, 0

        for name, param in self.named_parameters():
            if not name.endswith(
                    'weight') or 'conv' not in name or name == 'conv1.weight':
                sparse_items += (param.data == 0).sum().item()
                continue
            a_star = Normal(torch.tensor([0.0], device='cuda'), np.sqrt(
                self.v1)).log_prob(param.data).exp() * self.theta[name]
            b_star = Laplace(torch.tensor([0.0], device='cuda'),
                             self.v0).log_prob(
                                 param.data).exp() * (1 - self.theta[name])
            self.p_star[name] = (1 - self.decay) * self.p_star[
                name] + self.decay * a_star / (a_star + b_star)
            self.d_star0[name] = (
                1 - self.decay) * self.d_star0[name] + self.decay * (
                    (1 - self.p_star[name]) / self.v0)
            self.d_star1[name] = (1 - self.decay) * self.d_star1[
                name] + self.decay * (self.p_star[name] / self.v1)
            self.theta[name] = (1 - self.decay) * self.theta[name] + self.decay * \
                    ((self.p_star[name].sum() + self.a - 1) / (self.a + self.b + np.prod(param.data.size()) - 2)).item()
            kept_ratio = (self.p_star[name] > 0.5
                          ).sum().item() * 100.0 / np.prod(param.data.size())
            if prune:
                threshold = self.binary_search_threshold(
                    param.data, self.adaptive_sparse,
                    np.prod(param.data.size()))
                param.data[abs(param.data) < threshold] = 0
                wlasso += (param.data.abs() * self.d_star0[name]).sum().item()
                wridge += (param.data**2 * self.d_star1[name]).sum().item()

            if self.dcoef['t'] % 500 == 0:
                print(
                    '{:s} | P max: {:5.1f} min: {:5.1f} | Keep ratio: {:.1f}'.
                    format(name, self.p_star[name].max() * 100,
                           self.p_star[name].min() * 100, kept_ratio))
            sparse_items += (param.data == 0).sum().item()

        self.sparse_rate = sparse_items * 100.0 / self.total_no_pars
        wridge = 4 * (self.sparse_no_pars + self.nu +
                      2) * (wridge + self.nu * self.lamda)
        new_sd = (wlasso + np.sqrt(wlasso**2 + wridge)) / (
            self.sparse_no_pars + self.nu + 2) / 2
        self.sd = np.sqrt((1 - self.decay) * self.sd**2 + self.decay *
                          (new_sd**2))
Ejemplo n.º 8
0
    def test_laplace(self):
        loc = Variable(torch.randn(5, 5), requires_grad=True)
        scale = Variable(torch.randn(5, 5).abs(), requires_grad=True)
        loc_1d = Variable(torch.randn(1), requires_grad=True)
        scale_1d = Variable(torch.randn(1), requires_grad=True)
        loc_delta = torch.Tensor([1.0, 0.0])
        scale_delta = torch.Tensor([1e-5, 1e-5])
        self.assertEqual(Laplace(loc, scale).sample().size(), (5, 5))
        self.assertEqual(Laplace(loc, scale).sample_n(7).size(), (7, 5, 5))
        self.assertEqual(Laplace(loc_1d, scale_1d).sample_n(1).size(), (1, 1))
        self.assertEqual(Laplace(loc_1d, scale_1d).sample().size(), (1, ))
        self.assertEqual(Laplace(0.2, .6).sample_n(1).size(), (1, ))
        self.assertEqual(Laplace(-0.7, 50.0).sample_n(1).size(), (1, ))

        # sample check for extreme value of mean, std
        self._set_rng_seed()
        self.assertEqual(Laplace(loc_delta,
                                 scale_delta).sample(sample_shape=(1, 2)),
                         torch.Tensor([[[1.0, 0.0], [1.0, 0.0]]]),
                         prec=1e-4)

        self._gradcheck_log_prob(Laplace, (loc, scale))
        self._gradcheck_log_prob(Laplace, (loc, 1.0))
        self._gradcheck_log_prob(Laplace, (0.0, scale))

        state = torch.get_rng_state()
        eps = torch.ones_like(loc).uniform_(-.5, .5)
        torch.set_rng_state(state)
        z = Laplace(loc, scale).rsample()
        z.backward(torch.ones_like(z))
        self.assertEqual(loc.grad, torch.ones_like(loc))
        self.assertEqual(scale.grad, -eps.sign() * torch.log1p(-2 * eps.abs()))
        loc.grad.zero_()
        scale.grad.zero_()
        self.assertEqual(z.size(), (5, 5))

        def ref_log_prob(idx, x, log_prob):
            m = loc.data.view(-1)[idx]
            s = scale.data.view(-1)[idx]
            expected = (-math.log(2 * s) - abs(x - m) / s)
            self.assertAlmostEqual(log_prob, expected, places=3)

        self._check_log_prob(Laplace(loc, scale), ref_log_prob)
Ejemplo n.º 9
0
    def _get_flow_arch(self, parity=False):
        """
        Returns a normalizing flow according to the config file.

        Parameters:
        ----------
        parity: bool
            If True, the flow follows the (1, 2) permutations, otherwise it follows the (2, 1) permutation.
        """
        # this method only gets called by _train, which in turn is only called after self.dim has been initialized
        dim = self.dim
        # prior
        if self.config.flow.prior_dist == 'laplace':
            prior = Laplace(torch.zeros(dim).to(self.device), torch.ones(dim).to(self.device))
        else:
            prior = TransformedDistribution(Uniform(torch.zeros(dim).to(self.device), torch.ones(dim).to(self.device)),
                                            SigmoidTransform().inv)
        # net type for flow parameters
        if self.config.flow.net_class.lower() == 'mlp':
            net_class = MLP1layer
        elif self.config.flow.net_class.lower() == 'mlp4':
            net_class = MLP4
        elif self.config.flow.net_class.lower() == 'armlp':
            net_class = ARMLP
        else:
            raise NotImplementedError('net_class {} not understood.'.format(self.config.flow.net_class))

        # flow type
        def ar_flow(hidden_dim):
            if self.config.flow.architecture.lower() in ['cl', 'realnvp']:
                return AffineCL(dim=dim, nh=hidden_dim, scale_base=self.config.flow.scale_base,
                                shift_base=self.config.flow.shift_base, net_class=net_class, parity=parity,
                                scale=self.config.flow.scale)
            elif self.config.flow.architecture.lower() == 'maf':
                return MAF(dim=dim, nh=hidden_dim, net_class=net_class, parity=parity)
            elif self.config.flow.architecture.lower() == 'spline':
                return NSF_AR(dim=dim, hidden_dim=hidden_dim, base_network=net_class)
            else:
                raise NotImplementedError('Architecture {} not understood.'.format(self.config.flow.architecture))

        # support training multiple flows for varying depth and width, and keep only best
        self.n_layers = self.n_layers if type(self.n_layers) is list else [self.n_layers]
        self.n_hidden = self.n_hidden if type(self.n_hidden) is list else [self.n_hidden]
        normalizing_flows = []
        for nl in self.n_layers: # only 1 item in list self.n_layer= [5]
            for nh in self.n_hidden: # only 1 item in list self.n_layer= [10]
                # construct normalizing flows
                flow_list = [ar_flow(nh) for _ in range(nl)]
                normalizing_flows.append(NormalizingFlowModel(prior, flow_list).to(self.device))
        return normalizing_flows
Ejemplo n.º 10
0
 def random_point(self, shape):
     """
     Sample uniformly from the constraint set.
     L1 and L2 are implemented here.
     Linf implemented in the subclass.
     https://arxiv.org/abs/math/0503650
     """
     if self.p == 2:
         distrib = Normal(0, 1)
     elif self.p == 1:
         distrib = Laplace(0, 1)
     x = distrib.sample(shape)
     e = expon(.5).rvs()
     denom = torch.sqrt(e + (x**2).sum())
     return self.alpha * x / denom
Ejemplo n.º 11
0
    def test_valid_parameter_broadcasting(self):
        # Test correct broadcasting of parameter sizes for distributions that have multiple
        # parameters.
        # example type (distribution instance, expected sample shape)
        valid_examples = [
            (Normal(mean=torch.Tensor([0, 0]), std=1),
             (2,)),
            (Normal(mean=0, std=torch.Tensor([1, 1])),
             (2,)),
            (Normal(mean=torch.Tensor([0, 0]), std=torch.Tensor([1])),
             (2,)),
            (Normal(mean=torch.Tensor([0, 0]), std=torch.Tensor([[1], [1]])),
             (2, 2)),
            (Normal(mean=torch.Tensor([0, 0]), std=torch.Tensor([[1]])),
             (1, 2)),
            (Normal(mean=torch.Tensor([0]), std=torch.Tensor([[1]])),
             (1, 1)),
            (Gamma(alpha=torch.Tensor([1, 1]), beta=1),
             (2,)),
            (Gamma(alpha=1, beta=torch.Tensor([1, 1])),
             (2,)),
            (Gamma(alpha=torch.Tensor([1, 1]), beta=torch.Tensor([[1], [1], [1]])),
             (3, 2)),
            (Gamma(alpha=torch.Tensor([1, 1]), beta=torch.Tensor([[1], [1]])),
             (2, 2)),
            (Gamma(alpha=torch.Tensor([1, 1]), beta=torch.Tensor([[1]])),
             (1, 2)),
            (Gamma(alpha=torch.Tensor([1]), beta=torch.Tensor([[1]])),
             (1, 1)),
            (Laplace(loc=torch.Tensor([0, 0]), scale=1),
             (2,)),
            (Laplace(loc=0, scale=torch.Tensor([1, 1])),
             (2,)),
            (Laplace(loc=torch.Tensor([0, 0]), scale=torch.Tensor([1])),
             (2,)),
            (Laplace(loc=torch.Tensor([0, 0]), scale=torch.Tensor([[1], [1]])),
             (2, 2)),
            (Laplace(loc=torch.Tensor([0, 0]), scale=torch.Tensor([[1]])),
             (1, 2)),
            (Laplace(loc=torch.Tensor([0]), scale=torch.Tensor([[1]])),
             (1, 1)),
        ]

        for dist, expected_size in valid_examples:
            dist_sample_size = dist.sample().size()
            self.assertEqual(dist_sample_size, expected_size,
                             'actual size: {} != expected size: {}'.format(dist_sample_size, expected_size))
Ejemplo n.º 12
0
    def training_step(self, batch, batch_idx):
        # x, y = torch.split(batch, split_size_or_sections=1, dim=0)
        x = batch
        eps = torch.randn(batch.shape[0], 1)

        zs, log_ratio = self.model(eps=eps, s_span=self.s_ext_span)
        zs = zs[1:-1]

        likelihood = Laplace(loc=zs, scale=self.scale)

        # Bad Hack just in this case where every tensor in batch is identical
        logp = likelihood.log_prob(x.mean(dim=0).unsqueeze(1).to(self.device)).sum(dim=0).mean(dim=0)
        loss = -logp + log_ratio * self.kl_scheduler()

        # loss.backward()
        # self.optimizer.step()
        # self.scheduler.step()
        self.logp_metric.step(logp)
        self.log_ratio_metric.step(log_ratio)
        self.loss_metric.step(loss)

        logs = {'train_loss': loss}
        return {'loss': loss, 'log': logs}
Ejemplo n.º 13
0
    def sample(self, z):

        mu = self.conditional_param(z)
        return Laplace(mu, self.noise_std.exp()).sample()
Ejemplo n.º 14
0
 def __init__(self, loc, scale_diag):
     dist = Independent(Laplace(loc, scale_diag), 1)
     super().__init__(dist)
Ejemplo n.º 15
0
def runCEPair(pair_id,
              Nlayers,
              Nhidden,
              priorDist='laplace',
              TrainSplit=1.,
              epochs=100,
              optMethod='adam',
              removeOutliers=False,
              scaleDat=True,
              verbose=False):
    """
    run cause effect discovery for given pair id
    """

    # check input
    assert priorDist in ['laplace', 'uniform']

    # polish format of pair_id
    pair_id = str(pair_id)
    pair_id = '0' * (4 - len(pair_id)) + pair_id

    # load in the data
    # os.chdir(PairDataDir)
    dat_id = np.loadtxt(PairDataDir + 'pair' + str(pair_id) + '.txt')
    dir_id = open(PairDataDir + 'pair' + str(pair_id) + '_des.txt', 'r').read(
    ).lower()  # .split('ground truth:')[1].strip() #split('\n')[1]

    # determine causal direction (from dir_id file):
    dir_id = dir_id.replace('\n', '')
    dir_id = dir_id.replace(':', '')
    dir_id = dir_id.replace(' ', '')

    if ('x-->y' in dir_id) | ('x->y' in dir_id):
        dir_id = 'x-->y'
    elif ('y-->x' in dir_id) | ('y->x' in dir_id) | ('x<-y' in dir_id):
        dir_id = 'y-->x'

    if removeOutliers:
        print('removing outliers')
        clf = LocalOutlierFactor(n_neighbors=20, contamination=0.05)
        y_pred = clf.fit_predict(dat_id)
        dat_id = dat_id[np.where(y_pred == 1)[0], ]

    # scale data:
    if scaleDat:
        dat_id = scale(dat_id)
    # dat_id = MinMaxScaler().fit_transform( dat_id )

    if dat_id.shape[1] > 2:
        dat_id = dat_id[:, :2]

    if TrainSplit == 1.:
        testDat_id = np.copy(dat_id)
    else:
        testDat_id = np.copy(dat_id[int(TrainSplit * dat_id.shape[0]):, :])
        dat_id = dat_id[:int(TrainSplit * dat_id.shape[0]), :]

    if verbose:
        print('Running experiments for CE Pair: ' + pair_id + ' with n=' +
              str(dat_id.shape[0]) + ' samples')
        print('True causal direction: ' + dir_id)
        print('baseline dist: ' + priorDist)

    # define final variables
    Ncomp = 2
    single_flow = AffineCL  # AffineHalfFlow
    net_class = MLP1layer

    # now start running LR methods
    results = pd.DataFrame({
        'L': np.repeat(Nlayers, len(Nhidden)),
        'nh': Nhidden * len(Nlayers),
        'x->y': [0] * len(Nlayers) * len(Nhidden),
        'y->x': [0] * len(Nlayers) * len(Nhidden)
    })

    for l in Nlayers:
        for nh in Nhidden:
            # -------------------------------------------------------------------------------
            #         Conditional Flow Model: X->Y
            # -------------------------------------------------------------------------------
            torch.manual_seed(0)
            if priorDist == 'laplace':
                prior = Laplace(
                    torch.zeros(Ncomp), torch.ones(Ncomp)
                )  # TransformedDistribution(Laplace(torch.zeros( Ncomp ), torch.ones( Ncomp )), SigmoidTransform().inv)
            else:
                print('.')
                prior = TransformedDistribution(
                    Uniform(torch.zeros(Ncomp), torch.ones(Ncomp)),
                    SigmoidTransform().inv)  # Logistic distribution

            flows = [
                single_flow(dim=Ncomp,
                            nh=nh,
                            parity=False,
                            net_class=net_class,
                            shift_base=True,
                            scale_base=True) for _ in range(l)
            ]
            # cflows = [ [segment_flow(dim=Ncomp) ] ]

            # flow_mod_cond = ClassCondFlow( prior, flows, cflows, device='cpu' )
            flow_mod_cond = Flow(prior, flows, device='cpu')
            flow_mod_cond.load_data(
                data=dat_id)  # , labels= to_one_hot( label )[0] )

            # now we train this model and store the likelihood:
            loss_cond = flow_mod_cond.train(epochs=epochs,
                                            optMethod=optMethod,
                                            verbose=False)
            # print(np.nanmean( flow_mod_cond.EvalLL( dat_pca, to_one_hot(label)[0] ) ))

            # -------------------------------------------------------------------------------
            #         Conditional Flow Model: Y->X
            # -------------------------------------------------------------------------------
            torch.manual_seed(0)
            if priorDist == 'laplace':
                prior_rev = Laplace(torch.zeros(Ncomp), torch.ones(Ncomp))
                # TransformedDistribution(Laplace(torch.zeros( Ncomp ), torch.ones( Ncomp )), SigmoidTransform().inv)
                # MultivariateNormal(loc=np.zeros((Ncomp,)), covariance_matrix = np.eye( Ncomp )).inv)  # SigmoidTransform().inv)
            else:
                print('.')
                prior_rev = TransformedDistribution(
                    Uniform(torch.zeros(Ncomp), torch.ones(Ncomp)),
                    SigmoidTransform().inv)  # Logistic distribution

            flows_rev = [
                single_flow(dim=Ncomp,
                            nh=nh,
                            parity=False,
                            net_class=net_class) for _ in range(l)
            ]
            # cflows_rev = [ [ segment_flow(dim=Ncomp) ] ]

            flow_mod_cond_rev = Flow(prior_rev, flows_rev, device='cpu')
            flow_mod_cond_rev.load_data(
                data=dat_id[:, [1, 0]])  # , labels= to_one_hot( label )[0] )

            # now we train this model and store the likelihood:
            loss_cond_rev = flow_mod_cond_rev.train(epochs=epochs,
                                                    optMethod=optMethod,
                                                    verbose=False)

            # evaluate on test data
            results.loc[(results.L == l) & (results.nh == nh),
                        'x->y'] = np.nanmean(flow_mod_cond.EvalLL(testDat_id))
            results.loc[(results.L == l) & (results.nh == nh),
                        'y->x'] = np.nanmean(
                            flow_mod_cond_rev.EvalLL(testDat_id[:, [1, 0]]))

    print(results)
    # compute the consensus
    p = results['x->y'].max() - results['y->x'].max(
    )  # np.mean( results['x->y'] > results['y->x'] )
    predModel = 'x->y' if p >= 0 else 'y->x'

    return results, predModel, dir_id, np.minimum(
        np.unique(dat_id[:, 0]).shape[0] / float(dat_id.shape[0]),
        np.unique(dat_id[:, 1]).shape[0] / float(dat_id.shape[0]))
Ejemplo n.º 16
0
def laplace_dist(mu, var):
    return Independent(Laplace(loc=mu, scale=var), 1)
Ejemplo n.º 17
0
def laplace_loss(x_hat, scale=0.08):
    return Laplace(loc=x_hat, scale=scale)
Ejemplo n.º 18
0
 def test_laplace_sample(self):
     self._set_rng_seed(1)
     for loc, scale in product([-1.0, 0.0, 1.0], [0.1, 1.0, 10.0]):
         self._check_sampler_sampler(
             Laplace(loc, scale), scipy.stats.laplace(loc=loc, scale=scale),
             'Laplace(loc={}, scale={})'.format(loc, scale))
Ejemplo n.º 19
0
def main():
    # Dataset.
    ts_, ts_ext_, ts_vis_, ts, ts_ext, ts_vis, ys, ys_ = make_data()

    # Plotting parameters.
    vis_batch_size = 1024
    ylims = (-1.75, 1.75)
    alphas = [0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50, 0.55]
    percentiles = [0.999, 0.99, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]
    vis_idx = npr.permutation(vis_batch_size)
    # From https://colorbrewer2.org/.
    if args.color == "blue":
        sample_colors = ('#8c96c6', '#8c6bb1', '#810f7c')
        fill_color = '#9ebcda'
        mean_color = '#4d004b'
        num_samples = len(sample_colors)
    else:
        sample_colors = ('#fc4e2a', '#e31a1c', '#bd0026')
        fill_color = '#fd8d3c'
        mean_color = '#800026'
        num_samples = len(sample_colors)

    # Fix seed for the random draws used in the plots.
    eps = torch.randn(vis_batch_size, 1).to(device)
    bm = BrownianPath(t0=ts_vis[0],
                      w0=torch.zeros(vis_batch_size, 1).to(device))

    # Model.
    model = LatentSDE().to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-2)
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=.999)
    kl_scheduler = utils.LinearScheduler(iters=args.kl_anneal_iters)

    logp_metric = utils.EMAMetric()
    log_ratio_metric = utils.EMAMetric()
    loss_metric = utils.EMAMetric()

    if args.show_prior:
        with torch.no_grad():
            zs = model.sample_p(ts=ts_vis,
                                batch_size=vis_batch_size,
                                eps=eps,
                                bm=bm).squeeze()
            ts_vis_, zs_ = ts_vis.cpu().numpy(), zs.cpu().numpy()
            zs_ = np.sort(zs_, axis=1)

            img_dir = os.path.join(args.train_dir, 'prior.png')
            plt.subplot(frameon=False)
            for alpha, percentile in zip(alphas, percentiles):
                idx = int((1 - percentile) / 2. * vis_batch_size)
                zs_bot_ = zs_[:, idx]
                zs_top_ = zs_[:, -idx]
                plt.fill_between(ts_vis_,
                                 zs_bot_,
                                 zs_top_,
                                 alpha=alpha,
                                 color=fill_color)

            # `zorder` determines who's on top; the larger the more at the top.
            plt.scatter(ts_, ys_, marker='x', zorder=3, color='k',
                        s=35)  # Data.
            plt.ylim(ylims)
            plt.xlabel('$t$')
            plt.ylabel('$Y_t$')
            plt.tight_layout()
            plt.savefig(img_dir, dpi=args.dpi)
            plt.close()
            logging.info(f'Saved prior figure at: {img_dir}')

    for global_step in tqdm.tqdm(range(args.train_iters)):
        # Plot and save.
        if global_step % args.pause_iters == 0:
            img_path = os.path.join(args.train_dir,
                                    f'global_step_{global_step}.png')

            with torch.no_grad():
                zs = model.sample_q(ts=ts_vis,
                                    batch_size=vis_batch_size,
                                    eps=eps,
                                    bm=bm).squeeze()
                samples = zs[:, vis_idx]
                ts_vis_, zs_, samples_ = ts_vis.cpu().numpy(), zs.cpu().numpy(
                ), samples.cpu().numpy()
                zs_ = np.sort(zs_, axis=1)
                plt.subplot(frameon=False)

                if args.show_percentiles:
                    for alpha, percentile in zip(alphas, percentiles):
                        idx = int((1 - percentile) / 2. * vis_batch_size)
                        zs_bot_, zs_top_ = zs_[:, idx], zs_[:, -idx]
                        plt.fill_between(ts_vis_,
                                         zs_bot_,
                                         zs_top_,
                                         alpha=alpha,
                                         color=fill_color)

                if args.show_mean:
                    plt.plot(ts_vis_, zs_.mean(axis=1), color=mean_color)

                if args.show_samples:
                    for j in range(num_samples):
                        plt.plot(ts_vis_,
                                 samples_[:, j],
                                 color=sample_colors[j],
                                 linewidth=1.0)

                if args.show_arrows:
                    num, dt = 12, 0.12
                    t, y = torch.meshgrid([
                        torch.linspace(0.2, 1.8, num).to(device),
                        torch.linspace(-1.5, 1.5, num).to(device)
                    ])
                    t, y = t.reshape(-1, 1), y.reshape(-1, 1)
                    fty = model.f(t=t, y=y).reshape(num, num)
                    dt = torch.zeros(num, num).fill_(dt).to(device)
                    dy = fty * dt
                    dt_, dy_, t_, y_ = dt.cpu().numpy(), dy.cpu().numpy(
                    ), t.cpu().numpy(), y.cpu().numpy()
                    plt.quiver(t_,
                               y_,
                               dt_,
                               dy_,
                               alpha=0.3,
                               edgecolors='k',
                               width=0.0035,
                               scale=50)

                if args.hide_ticks:
                    plt.xticks([], [])
                    plt.yticks([], [])

                plt.scatter(ts_, ys_, marker='x', zorder=3, color='k',
                            s=35)  # Data.
                plt.ylim(ylims)
                plt.xlabel('$t$')
                plt.ylabel('$Y_t$')
                plt.tight_layout()
                plt.savefig(img_path, dpi=args.dpi)
                plt.close()
                logging.info(f'Saved figure at: {img_path}')

                if args.save_ckpt:
                    torch.save({'model': model.state_dict()},
                               os.path.join(ckpt_dir,
                                            f'global_step_{global_step}.ckpt'))

        # Train.
        optimizer.zero_grad()
        zs, log_ratio = model(ts=ts_ext, batch_size=args.batch_size)
        zs = zs.squeeze()
        zs = zs[
            1:
            -1]  # Drop first and last which are only used to penalize out-of-data region and spread uncertainty.

        likelihood = {
            "laplace": Laplace(loc=zs, scale=args.scale),
            "normal": Normal(loc=zs, scale=args.scale)
        }[args.likelihood]
        logp = likelihood.log_prob(ys).sum(dim=0).mean(dim=0)

        loss = -logp + log_ratio * kl_scheduler()
        loss.backward()
        optimizer.step()
        scheduler.step()
        kl_scheduler.step()

        logp_metric.step(logp)
        log_ratio_metric.step(log_ratio)
        loss_metric.step(loss)

        logging.info(
            f'global_step: {global_step}, '
            f'logp: {logp_metric.val():.3f}, log_ratio: {log_ratio_metric.val():.3f}, loss: {loss_metric.val():.3f}'
        )
Ejemplo n.º 20
0
 def step_n(self, zt, t, n):
                                 
     mu = self.conditional_param(zt)
     return Laplace(mu, self.noise_std.exp()).sample([n])
Ejemplo n.º 21
0
 def func(mu, var):
     return Laplace(mu, var.sqrt).rsample()
Ejemplo n.º 22
0
 def logp(self, z, x, t):
     mu = self.conditional_param(z)
     l = Laplace(mu, self.noise_std.exp()).log_prob(x).sum(-1)
     return l