def test_laplace_shape_tensor_params(self): laplace = Laplace(torch.Tensor([0, 0]), torch.Tensor([1, 1])) self.assertEqual(laplace._batch_shape, torch.Size((2,))) self.assertEqual(laplace._event_shape, torch.Size(())) self.assertEqual(laplace.sample().size(), torch.Size((2,))) self.assertEqual(laplace.sample((3, 2)).size(), torch.Size((3, 2, 2))) self.assertEqual(laplace.log_prob(self.tensor_sample_1).size(), torch.Size((3, 2))) self.assertRaises(ValueError, laplace.log_prob, self.tensor_sample_2)
def test_laplace_shape_scalar_params(self): laplace = Laplace(0, 1) self.assertEqual(laplace._batch_shape, torch.Size()) self.assertEqual(laplace._event_shape, torch.Size()) self.assertEqual(laplace.sample().size(), torch.Size((1,))) self.assertEqual(laplace.sample((3, 2)).size(), torch.Size((3, 2))) self.assertRaises(ValueError, laplace.log_prob, self.scalar_sample) self.assertEqual(laplace.log_prob(self.tensor_sample_1).size(), torch.Size((3, 2))) self.assertEqual(laplace.log_prob(self.tensor_sample_2).size(), torch.Size((3, 2, 3)))
def log_prior(self, x): PI = Laplace(5, 1.0).log_prob(x[:, 0]) PI += Laplace(-2, 1.0).log_prob(x[:, 1]) PI += Normal(torch.tanh(x[:, 0] + x[:, 1] - 2.8), 0.1).log_prob(x[:, 2]) PI += Normal(x[:, 0] * x[:, 1], 0.1).log_prob(x[:, 3]) PI += Normal(7.0, 2.0).log_prob(x[:, 4]) PI += Normal(torch.tanh(x[:, 3] + x[:, 4]), 0.1).log_prob(x[:, 5]) return PI
def sample(self): x0 = Laplace(5, 1.0).sample() x1 = Laplace(-2, 1.0).sample() x2 = Normal(torch.tanh(x0 + x1 - 2.8), 0.1).sample() x3 = Normal(x0 * x1, 0.1).sample() x4 = Normal(7.0, 2.0).sample() x5 = Normal(torch.tanh(x3 + x4), 0.1).sample() y0 = Normal(x3, 0.1).sample() y1 = Normal(x5, 0.1).sample() return torch.tensor([x0, x1, x2, x3, x4, x5]), torch.tensor([y0, y1])
def update_hidden(self): self._iter = self._iter + 1 self.update_decay() wlasso, wridge, num_pars = 0, 0, 0 for name, param in self.named_parameters(): if not name.endswith('weight'): continue a_star = Normal(torch.tensor([0.0]), np.sqrt( self.v1)).log_prob(param).exp() * self.theta[name] b_star = Laplace(torch.tensor( [0.0]), self.v0).log_prob(param).exp() * (1 - self.theta[name]) self.p_star[name] = (1 - self.decay) * self.p_star[ name] + self.decay * a_star / (a_star + b_star) self.d_star0[name] = ( 1 - self.decay) * self.d_star0[name] + self.decay * ( (1 - self.p_star[name]) / self.v0) self.d_star1[name] = (1 - self.decay) * self.d_star1[ name] + self.decay * (self.p_star[name] / self.v1) self.theta[name] = (1 - self.decay) * self.theta[name] \ + self.decay * ((self.p_star[name].sum() + self.a - 1) / (self.a + self.b + np.prod(param.data.size()) - 2)).item() wlasso += (param.abs() * self.d_star0[name]).sum().item() wridge += (param.pow(2) * self.d_star1[name]).sum().item() if self.thres > 0 and self._iter >= self.warm: """ one-shot mask """ if self._iter == self.warm: self.mask[name] = self.p_star[name] < self.thres param.data[self.mask[name]] = 0 wridge = 4 * (self.N + self.num_pars + self.nu) * ( self.likelihood + wridge + self.nu * self.lamda) new_sd = (wlasso + np.sqrt(wlasso**2 + wridge)) / ( self.N + self.num_pars + self.nu) / 2 self.sd = np.sqrt((1 - self.decay) * self.sd**2 + self.decay * (new_sd**2))
def _init_and_train_flow(data, nh, l, prior_dist, epochs, device, opt_method='adam', verbose=False): # init and save 2 normalizing flows, 1 for each direction d = data.shape[1] if d > 2: print('using higher D implementation') affine_flow = AffineFullFlowGeneral else: affine_flow = AffineFullFlow if prior_dist == 'laplace': prior = Laplace(torch.zeros(d), torch.ones(d)) else: prior = TransformedDistribution( Uniform(torch.zeros(d), torch.ones(d)), SigmoidTransform().inv) flows = [ affine_flow(dim=d, nh=nh, parity=False, net_class=MLP1layer) for _ in range(l) ] flow = NormalizingFlowModel(prior, flows).to(device) dset = CustomSyntheticDatasetDensity(data.astype(np.float32), device=device) train_loader = DataLoader(dset, shuffle=True, batch_size=128) optimizer = optim.Adam(flow.parameters(), lr=1e-4, weight_decay=1e-5) if opt_method == 'scheduler': scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=3, verbose=verbose) flow.train() loss_vals = [] for e in range(epochs): loss_val = 0 for _, x in enumerate(train_loader): x.to(device) # compute loss _, prior_logprob, log_det = flow(x) loss = -torch.sum(prior_logprob + log_det) loss_val += loss.item() # optimize optimizer.zero_grad() loss.backward() optimizer.step() if opt_method == 'scheduler': scheduler.step(loss_val / len(train_loader)) if verbose: print('epoch {}/{} \tloss: {}'.format(e, epochs, loss_val)) loss_vals.append(loss_val) return flow, loss_vals
def update_hidden(self, prune=False, adaptive_sparse=False): self.dcoef['t'] = self.dcoef['t'] + 1. self.adaptive_sparse = self.target_sparse * ( 1 - self.cut**(self.dcoef['t'] / self.gap)) self.update_decay() sparse_items, wlasso, wridge = 0, 0, 0 for name, param in self.named_parameters(): if not name.endswith( 'weight') or 'conv' not in name or name == 'conv1.weight': sparse_items += (param.data == 0).sum().item() continue a_star = Normal(torch.tensor([0.0], device='cuda'), np.sqrt( self.v1)).log_prob(param.data).exp() * self.theta[name] b_star = Laplace(torch.tensor([0.0], device='cuda'), self.v0).log_prob( param.data).exp() * (1 - self.theta[name]) self.p_star[name] = (1 - self.decay) * self.p_star[ name] + self.decay * a_star / (a_star + b_star) self.d_star0[name] = ( 1 - self.decay) * self.d_star0[name] + self.decay * ( (1 - self.p_star[name]) / self.v0) self.d_star1[name] = (1 - self.decay) * self.d_star1[ name] + self.decay * (self.p_star[name] / self.v1) self.theta[name] = (1 - self.decay) * self.theta[name] + self.decay * \ ((self.p_star[name].sum() + self.a - 1) / (self.a + self.b + np.prod(param.data.size()) - 2)).item() kept_ratio = (self.p_star[name] > 0.5 ).sum().item() * 100.0 / np.prod(param.data.size()) if prune: threshold = self.binary_search_threshold( param.data, self.adaptive_sparse, np.prod(param.data.size())) param.data[abs(param.data) < threshold] = 0 wlasso += (param.data.abs() * self.d_star0[name]).sum().item() wridge += (param.data**2 * self.d_star1[name]).sum().item() if self.dcoef['t'] % 500 == 0: print( '{:s} | P max: {:5.1f} min: {:5.1f} | Keep ratio: {:.1f}'. format(name, self.p_star[name].max() * 100, self.p_star[name].min() * 100, kept_ratio)) sparse_items += (param.data == 0).sum().item() self.sparse_rate = sparse_items * 100.0 / self.total_no_pars wridge = 4 * (self.sparse_no_pars + self.nu + 2) * (wridge + self.nu * self.lamda) new_sd = (wlasso + np.sqrt(wlasso**2 + wridge)) / ( self.sparse_no_pars + self.nu + 2) / 2 self.sd = np.sqrt((1 - self.decay) * self.sd**2 + self.decay * (new_sd**2))
def _get_flow_arch(self, parity=False): """ Returns a normalizing flow according to the config file. Parameters: ---------- parity: bool If True, the flow follows the (1, 2) permutations, otherwise it follows the (2, 1) permutation. """ # this method only gets called by _train, which in turn is only called after self.dim has been initialized dim = self.dim # prior if self.config.flow.prior_dist == 'laplace': prior = Laplace(torch.zeros(dim).to(self.device), torch.ones(dim).to(self.device)) else: prior = TransformedDistribution(Uniform(torch.zeros(dim).to(self.device), torch.ones(dim).to(self.device)), SigmoidTransform().inv) # net type for flow parameters if self.config.flow.net_class.lower() == 'mlp': net_class = MLP1layer elif self.config.flow.net_class.lower() == 'mlp4': net_class = MLP4 elif self.config.flow.net_class.lower() == 'armlp': net_class = ARMLP else: raise NotImplementedError('net_class {} not understood.'.format(self.config.flow.net_class)) # flow type def ar_flow(hidden_dim): if self.config.flow.architecture.lower() in ['cl', 'realnvp']: return AffineCL(dim=dim, nh=hidden_dim, scale_base=self.config.flow.scale_base, shift_base=self.config.flow.shift_base, net_class=net_class, parity=parity, scale=self.config.flow.scale) elif self.config.flow.architecture.lower() == 'maf': return MAF(dim=dim, nh=hidden_dim, net_class=net_class, parity=parity) elif self.config.flow.architecture.lower() == 'spline': return NSF_AR(dim=dim, hidden_dim=hidden_dim, base_network=net_class) else: raise NotImplementedError('Architecture {} not understood.'.format(self.config.flow.architecture)) # support training multiple flows for varying depth and width, and keep only best self.n_layers = self.n_layers if type(self.n_layers) is list else [self.n_layers] self.n_hidden = self.n_hidden if type(self.n_hidden) is list else [self.n_hidden] normalizing_flows = [] for nl in self.n_layers: # only 1 item in list self.n_layer= [5] for nh in self.n_hidden: # only 1 item in list self.n_layer= [10] # construct normalizing flows flow_list = [ar_flow(nh) for _ in range(nl)] normalizing_flows.append(NormalizingFlowModel(prior, flow_list).to(self.device)) return normalizing_flows
def training_step(self, batch, batch_idx): # x, y = torch.split(batch, split_size_or_sections=1, dim=0) x = batch eps = torch.randn(batch.shape[0], 1) zs, log_ratio = self.model(eps=eps, s_span=self.s_ext_span) zs = zs[1:-1] likelihood = Laplace(loc=zs, scale=self.scale) # Bad Hack just in this case where every tensor in batch is identical logp = likelihood.log_prob(x.mean(dim=0).unsqueeze(1).to(self.device)).sum(dim=0).mean(dim=0) loss = -logp + log_ratio * self.kl_scheduler() # loss.backward() # self.optimizer.step() # self.scheduler.step() self.logp_metric.step(logp) self.log_ratio_metric.step(log_ratio) self.loss_metric.step(loss) logs = {'train_loss': loss} return {'loss': loss, 'log': logs}
def random_point(self, shape): """ Sample uniformly from the constraint set. L1 and L2 are implemented here. Linf implemented in the subclass. https://arxiv.org/abs/math/0503650 """ if self.p == 2: distrib = Normal(0, 1) elif self.p == 1: distrib = Laplace(0, 1) x = distrib.sample(shape) e = expon(.5).rvs() denom = torch.sqrt(e + (x**2).sum()) return self.alpha * x / denom
def test_valid_parameter_broadcasting(self): # Test correct broadcasting of parameter sizes for distributions that have multiple # parameters. # example type (distribution instance, expected sample shape) valid_examples = [ (Normal(mean=torch.Tensor([0, 0]), std=1), (2,)), (Normal(mean=0, std=torch.Tensor([1, 1])), (2,)), (Normal(mean=torch.Tensor([0, 0]), std=torch.Tensor([1])), (2,)), (Normal(mean=torch.Tensor([0, 0]), std=torch.Tensor([[1], [1]])), (2, 2)), (Normal(mean=torch.Tensor([0, 0]), std=torch.Tensor([[1]])), (1, 2)), (Normal(mean=torch.Tensor([0]), std=torch.Tensor([[1]])), (1, 1)), (Gamma(alpha=torch.Tensor([1, 1]), beta=1), (2,)), (Gamma(alpha=1, beta=torch.Tensor([1, 1])), (2,)), (Gamma(alpha=torch.Tensor([1, 1]), beta=torch.Tensor([[1], [1], [1]])), (3, 2)), (Gamma(alpha=torch.Tensor([1, 1]), beta=torch.Tensor([[1], [1]])), (2, 2)), (Gamma(alpha=torch.Tensor([1, 1]), beta=torch.Tensor([[1]])), (1, 2)), (Gamma(alpha=torch.Tensor([1]), beta=torch.Tensor([[1]])), (1, 1)), (Laplace(loc=torch.Tensor([0, 0]), scale=1), (2,)), (Laplace(loc=0, scale=torch.Tensor([1, 1])), (2,)), (Laplace(loc=torch.Tensor([0, 0]), scale=torch.Tensor([1])), (2,)), (Laplace(loc=torch.Tensor([0, 0]), scale=torch.Tensor([[1], [1]])), (2, 2)), (Laplace(loc=torch.Tensor([0, 0]), scale=torch.Tensor([[1]])), (1, 2)), (Laplace(loc=torch.Tensor([0]), scale=torch.Tensor([[1]])), (1, 1)), ] for dist, expected_size in valid_examples: dist_sample_size = dist.sample().size() self.assertEqual(dist_sample_size, expected_size, 'actual size: {} != expected size: {}'.format(dist_sample_size, expected_size))
def __init__(self, loc, scale_diag): dist = Independent(Laplace(loc, scale_diag), 1) super().__init__(dist)
def sample(self, z): mu = self.conditional_param(z) return Laplace(mu, self.noise_std.exp()).sample()
def main(): # Dataset. ts_, ts_ext_, ts_vis_, ts, ts_ext, ts_vis, ys, ys_ = make_data() # Plotting parameters. vis_batch_size = 1024 ylims = (-1.75, 1.75) alphas = [0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50, 0.55] percentiles = [0.999, 0.99, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1] vis_idx = npr.permutation(vis_batch_size) # From https://colorbrewer2.org/. if args.color == "blue": sample_colors = ('#8c96c6', '#8c6bb1', '#810f7c') fill_color = '#9ebcda' mean_color = '#4d004b' num_samples = len(sample_colors) else: sample_colors = ('#fc4e2a', '#e31a1c', '#bd0026') fill_color = '#fd8d3c' mean_color = '#800026' num_samples = len(sample_colors) # Fix seed for the random draws used in the plots. eps = torch.randn(vis_batch_size, 1).to(device) bm = BrownianPath(t0=ts_vis[0], w0=torch.zeros(vis_batch_size, 1).to(device)) # Model. model = LatentSDE().to(device) optimizer = optim.Adam(model.parameters(), lr=1e-2) scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=.999) kl_scheduler = utils.LinearScheduler(iters=args.kl_anneal_iters) logp_metric = utils.EMAMetric() log_ratio_metric = utils.EMAMetric() loss_metric = utils.EMAMetric() if args.show_prior: with torch.no_grad(): zs = model.sample_p(ts=ts_vis, batch_size=vis_batch_size, eps=eps, bm=bm).squeeze() ts_vis_, zs_ = ts_vis.cpu().numpy(), zs.cpu().numpy() zs_ = np.sort(zs_, axis=1) img_dir = os.path.join(args.train_dir, 'prior.png') plt.subplot(frameon=False) for alpha, percentile in zip(alphas, percentiles): idx = int((1 - percentile) / 2. * vis_batch_size) zs_bot_ = zs_[:, idx] zs_top_ = zs_[:, -idx] plt.fill_between(ts_vis_, zs_bot_, zs_top_, alpha=alpha, color=fill_color) # `zorder` determines who's on top; the larger the more at the top. plt.scatter(ts_, ys_, marker='x', zorder=3, color='k', s=35) # Data. plt.ylim(ylims) plt.xlabel('$t$') plt.ylabel('$Y_t$') plt.tight_layout() plt.savefig(img_dir, dpi=args.dpi) plt.close() logging.info(f'Saved prior figure at: {img_dir}') for global_step in tqdm.tqdm(range(args.train_iters)): # Plot and save. if global_step % args.pause_iters == 0: img_path = os.path.join(args.train_dir, f'global_step_{global_step}.png') with torch.no_grad(): zs = model.sample_q(ts=ts_vis, batch_size=vis_batch_size, eps=eps, bm=bm).squeeze() samples = zs[:, vis_idx] ts_vis_, zs_, samples_ = ts_vis.cpu().numpy(), zs.cpu().numpy( ), samples.cpu().numpy() zs_ = np.sort(zs_, axis=1) plt.subplot(frameon=False) if args.show_percentiles: for alpha, percentile in zip(alphas, percentiles): idx = int((1 - percentile) / 2. * vis_batch_size) zs_bot_, zs_top_ = zs_[:, idx], zs_[:, -idx] plt.fill_between(ts_vis_, zs_bot_, zs_top_, alpha=alpha, color=fill_color) if args.show_mean: plt.plot(ts_vis_, zs_.mean(axis=1), color=mean_color) if args.show_samples: for j in range(num_samples): plt.plot(ts_vis_, samples_[:, j], color=sample_colors[j], linewidth=1.0) if args.show_arrows: num, dt = 12, 0.12 t, y = torch.meshgrid([ torch.linspace(0.2, 1.8, num).to(device), torch.linspace(-1.5, 1.5, num).to(device) ]) t, y = t.reshape(-1, 1), y.reshape(-1, 1) fty = model.f(t=t, y=y).reshape(num, num) dt = torch.zeros(num, num).fill_(dt).to(device) dy = fty * dt dt_, dy_, t_, y_ = dt.cpu().numpy(), dy.cpu().numpy( ), t.cpu().numpy(), y.cpu().numpy() plt.quiver(t_, y_, dt_, dy_, alpha=0.3, edgecolors='k', width=0.0035, scale=50) if args.hide_ticks: plt.xticks([], []) plt.yticks([], []) plt.scatter(ts_, ys_, marker='x', zorder=3, color='k', s=35) # Data. plt.ylim(ylims) plt.xlabel('$t$') plt.ylabel('$Y_t$') plt.tight_layout() plt.savefig(img_path, dpi=args.dpi) plt.close() logging.info(f'Saved figure at: {img_path}') if args.save_ckpt: torch.save({'model': model.state_dict()}, os.path.join(ckpt_dir, f'global_step_{global_step}.ckpt')) # Train. optimizer.zero_grad() zs, log_ratio = model(ts=ts_ext, batch_size=args.batch_size) zs = zs.squeeze() zs = zs[ 1: -1] # Drop first and last which are only used to penalize out-of-data region and spread uncertainty. likelihood = { "laplace": Laplace(loc=zs, scale=args.scale), "normal": Normal(loc=zs, scale=args.scale) }[args.likelihood] logp = likelihood.log_prob(ys).sum(dim=0).mean(dim=0) loss = -logp + log_ratio * kl_scheduler() loss.backward() optimizer.step() scheduler.step() kl_scheduler.step() logp_metric.step(logp) log_ratio_metric.step(log_ratio) loss_metric.step(loss) logging.info( f'global_step: {global_step}, ' f'logp: {logp_metric.val():.3f}, log_ratio: {log_ratio_metric.val():.3f}, loss: {loss_metric.val():.3f}' )
def runCEPair(pair_id, Nlayers, Nhidden, priorDist='laplace', TrainSplit=1., epochs=100, optMethod='adam', removeOutliers=False, scaleDat=True, verbose=False): """ run cause effect discovery for given pair id """ # check input assert priorDist in ['laplace', 'uniform'] # polish format of pair_id pair_id = str(pair_id) pair_id = '0' * (4 - len(pair_id)) + pair_id # load in the data # os.chdir(PairDataDir) dat_id = np.loadtxt(PairDataDir + 'pair' + str(pair_id) + '.txt') dir_id = open(PairDataDir + 'pair' + str(pair_id) + '_des.txt', 'r').read( ).lower() # .split('ground truth:')[1].strip() #split('\n')[1] # determine causal direction (from dir_id file): dir_id = dir_id.replace('\n', '') dir_id = dir_id.replace(':', '') dir_id = dir_id.replace(' ', '') if ('x-->y' in dir_id) | ('x->y' in dir_id): dir_id = 'x-->y' elif ('y-->x' in dir_id) | ('y->x' in dir_id) | ('x<-y' in dir_id): dir_id = 'y-->x' if removeOutliers: print('removing outliers') clf = LocalOutlierFactor(n_neighbors=20, contamination=0.05) y_pred = clf.fit_predict(dat_id) dat_id = dat_id[np.where(y_pred == 1)[0], ] # scale data: if scaleDat: dat_id = scale(dat_id) # dat_id = MinMaxScaler().fit_transform( dat_id ) if dat_id.shape[1] > 2: dat_id = dat_id[:, :2] if TrainSplit == 1.: testDat_id = np.copy(dat_id) else: testDat_id = np.copy(dat_id[int(TrainSplit * dat_id.shape[0]):, :]) dat_id = dat_id[:int(TrainSplit * dat_id.shape[0]), :] if verbose: print('Running experiments for CE Pair: ' + pair_id + ' with n=' + str(dat_id.shape[0]) + ' samples') print('True causal direction: ' + dir_id) print('baseline dist: ' + priorDist) # define final variables Ncomp = 2 single_flow = AffineCL # AffineHalfFlow net_class = MLP1layer # now start running LR methods results = pd.DataFrame({ 'L': np.repeat(Nlayers, len(Nhidden)), 'nh': Nhidden * len(Nlayers), 'x->y': [0] * len(Nlayers) * len(Nhidden), 'y->x': [0] * len(Nlayers) * len(Nhidden) }) for l in Nlayers: for nh in Nhidden: # ------------------------------------------------------------------------------- # Conditional Flow Model: X->Y # ------------------------------------------------------------------------------- torch.manual_seed(0) if priorDist == 'laplace': prior = Laplace( torch.zeros(Ncomp), torch.ones(Ncomp) ) # TransformedDistribution(Laplace(torch.zeros( Ncomp ), torch.ones( Ncomp )), SigmoidTransform().inv) else: print('.') prior = TransformedDistribution( Uniform(torch.zeros(Ncomp), torch.ones(Ncomp)), SigmoidTransform().inv) # Logistic distribution flows = [ single_flow(dim=Ncomp, nh=nh, parity=False, net_class=net_class, shift_base=True, scale_base=True) for _ in range(l) ] # cflows = [ [segment_flow(dim=Ncomp) ] ] # flow_mod_cond = ClassCondFlow( prior, flows, cflows, device='cpu' ) flow_mod_cond = Flow(prior, flows, device='cpu') flow_mod_cond.load_data( data=dat_id) # , labels= to_one_hot( label )[0] ) # now we train this model and store the likelihood: loss_cond = flow_mod_cond.train(epochs=epochs, optMethod=optMethod, verbose=False) # print(np.nanmean( flow_mod_cond.EvalLL( dat_pca, to_one_hot(label)[0] ) )) # ------------------------------------------------------------------------------- # Conditional Flow Model: Y->X # ------------------------------------------------------------------------------- torch.manual_seed(0) if priorDist == 'laplace': prior_rev = Laplace(torch.zeros(Ncomp), torch.ones(Ncomp)) # TransformedDistribution(Laplace(torch.zeros( Ncomp ), torch.ones( Ncomp )), SigmoidTransform().inv) # MultivariateNormal(loc=np.zeros((Ncomp,)), covariance_matrix = np.eye( Ncomp )).inv) # SigmoidTransform().inv) else: print('.') prior_rev = TransformedDistribution( Uniform(torch.zeros(Ncomp), torch.ones(Ncomp)), SigmoidTransform().inv) # Logistic distribution flows_rev = [ single_flow(dim=Ncomp, nh=nh, parity=False, net_class=net_class) for _ in range(l) ] # cflows_rev = [ [ segment_flow(dim=Ncomp) ] ] flow_mod_cond_rev = Flow(prior_rev, flows_rev, device='cpu') flow_mod_cond_rev.load_data( data=dat_id[:, [1, 0]]) # , labels= to_one_hot( label )[0] ) # now we train this model and store the likelihood: loss_cond_rev = flow_mod_cond_rev.train(epochs=epochs, optMethod=optMethod, verbose=False) # evaluate on test data results.loc[(results.L == l) & (results.nh == nh), 'x->y'] = np.nanmean(flow_mod_cond.EvalLL(testDat_id)) results.loc[(results.L == l) & (results.nh == nh), 'y->x'] = np.nanmean( flow_mod_cond_rev.EvalLL(testDat_id[:, [1, 0]])) print(results) # compute the consensus p = results['x->y'].max() - results['y->x'].max( ) # np.mean( results['x->y'] > results['y->x'] ) predModel = 'x->y' if p >= 0 else 'y->x' return results, predModel, dir_id, np.minimum( np.unique(dat_id[:, 0]).shape[0] / float(dat_id.shape[0]), np.unique(dat_id[:, 1]).shape[0] / float(dat_id.shape[0]))
def laplace_dist(mu, var): return Independent(Laplace(loc=mu, scale=var), 1)
def laplace_loss(x_hat, scale=0.08): return Laplace(loc=x_hat, scale=scale)
def test_laplace_sample(self): self._set_rng_seed(1) for loc, scale in product([-1.0, 0.0, 1.0], [0.1, 1.0, 10.0]): self._check_sampler_sampler( Laplace(loc, scale), scipy.stats.laplace(loc=loc, scale=scale), 'Laplace(loc={}, scale={})'.format(loc, scale))
def test_laplace(self): loc = Variable(torch.randn(5, 5), requires_grad=True) scale = Variable(torch.randn(5, 5).abs(), requires_grad=True) loc_1d = Variable(torch.randn(1), requires_grad=True) scale_1d = Variable(torch.randn(1), requires_grad=True) loc_delta = torch.Tensor([1.0, 0.0]) scale_delta = torch.Tensor([1e-5, 1e-5]) self.assertEqual(Laplace(loc, scale).sample().size(), (5, 5)) self.assertEqual(Laplace(loc, scale).sample_n(7).size(), (7, 5, 5)) self.assertEqual(Laplace(loc_1d, scale_1d).sample_n(1).size(), (1, 1)) self.assertEqual(Laplace(loc_1d, scale_1d).sample().size(), (1, )) self.assertEqual(Laplace(0.2, .6).sample_n(1).size(), (1, )) self.assertEqual(Laplace(-0.7, 50.0).sample_n(1).size(), (1, )) # sample check for extreme value of mean, std self._set_rng_seed() self.assertEqual(Laplace(loc_delta, scale_delta).sample(sample_shape=(1, 2)), torch.Tensor([[[1.0, 0.0], [1.0, 0.0]]]), prec=1e-4) self._gradcheck_log_prob(Laplace, (loc, scale)) self._gradcheck_log_prob(Laplace, (loc, 1.0)) self._gradcheck_log_prob(Laplace, (0.0, scale)) state = torch.get_rng_state() eps = torch.ones_like(loc).uniform_(-.5, .5) torch.set_rng_state(state) z = Laplace(loc, scale).rsample() z.backward(torch.ones_like(z)) self.assertEqual(loc.grad, torch.ones_like(loc)) self.assertEqual(scale.grad, -eps.sign() * torch.log1p(-2 * eps.abs())) loc.grad.zero_() scale.grad.zero_() self.assertEqual(z.size(), (5, 5)) def ref_log_prob(idx, x, log_prob): m = loc.data.view(-1)[idx] s = scale.data.view(-1)[idx] expected = (-math.log(2 * s) - abs(x - m) / s) self.assertAlmostEqual(log_prob, expected, places=3) self._check_log_prob(Laplace(loc, scale), ref_log_prob)
def step_n(self, zt, t, n): mu = self.conditional_param(zt) return Laplace(mu, self.noise_std.exp()).sample([n])
def func(mu, var): return Laplace(mu, var.sqrt).rsample()
def logp(self, z, x, t): mu = self.conditional_param(z) l = Laplace(mu, self.noise_std.exp()).log_prob(x).sum(-1) return l