def __init__(self, latent_size, hidden_size, flow_depth, num_components): super().__init__() modules = [] for _ in range(flow_depth): modules.append( flow.AutoregressiveInverseAndLogProb(num_input=latent_size, num_hidden=hidden_size, num_context=latent_size, use_tanh=True)) modules.append(flow.Reverse(latent_size)) self.r_nu_first = flow.FlowSequential(*modules) self.r_nu_last = flow.BinaryMixtureTransform(latent_size, num_components) self.log_r_0 = distributions.StandardNormalLogProb()
def __init__(self, latent_size, data_size, flow_depth): super().__init__() hidden_size = latent_size * 2 self.inference_network = NeuralNetwork(input_size=data_size, output_size=latent_size * 3, hidden_size=hidden_size) modules = [] for _ in range(flow_depth): modules.append(flow.InverseAutoregressiveFlow( num_input=latent_size, num_hidden=hidden_size, num_context=latent_size)) modules.append(flow.Reverse(latent_size)) self.q_z_flow = flow.FlowSequential(*modules) self.log_q_z_0 = NormalLogProb() self.softplus = nn.Softplus()
def __init__(self, latent_size, flow_depth, hidden_size, hidden_degrees, activation, reverse, flow_std): super().__init__() modules = [] self.latent_size = latent_size for flow_num in range(flow_depth): module = flow.AutoregressiveSampleAndLogProb( num_input=latent_size, use_context=False, hidden_size=hidden_size, hidden_degrees=hidden_degrees, activation=activation, flow_std=flow_std) modules.append(module) if reverse: modules.append(flow.Reverse(latent_size)) self.q_nu = flow.FlowSequential(*modules) self.q_nu_0 = distributions.Normal(loc=0.0, scale=flow_std)
def __init__(self, latent_size, flow_depth, flow_std, hidden_size, hidden_degrees, activation): super().__init__() self.r_nu_0 = torch_dist.Normal(loc=0, scale=1) self.register_buffer('zero', torch.Tensor([0])) modules = [] for _ in range(flow_depth): modules.append( flow.AutoregressiveInverseAndLogProb( num_input=latent_size, use_context=True, use_tanh=True, hidden_size=hidden_size, hidden_degrees=hidden_degrees, flow_std=flow_std, activation=activation)) modules.append(flow.Reverse(latent_size)) self.r_nu = flow.FlowSequential(*modules)
def __init__(self, latent_size, flow_depth, flow_std, hidden_size, hidden_degrees, reverse, activation): super().__init__() self.log_r_nu_0 = BinaryDistribution(latent_size, scale=flow_std) modules = [] for _ in range(flow_depth): modules.append( flow.AutoregressiveInverseAndLogProb( num_input=latent_size, use_context=True, use_tanh=True, hidden_size=hidden_size, hidden_degrees=hidden_degrees, flow_std=flow_std, activation=activation)) if reverse: modules.append(flow.Reverse(latent_size)) self.r_nu = flow.FlowSequential(*modules)
def __init__(self, latent_size, flow_depth=2, logprob=False): super().__init__() if logprob: self.encode_func = self.encode_logprob else: self.encode_func = self.encode DIM = 64 self.main = nn.Sequential( nn.Conv2d(1, DIM, 5, stride=2, padding=2), nn.ReLU(True), nn.Conv2d(DIM, 2 * DIM, 5, stride=2, padding=2), nn.ReLU(True), nn.Conv2d(2 * DIM, 4 * DIM, 5, stride=2, padding=2), nn.ReLU(True), ) if flow_depth > 0: # IAF hidden_size = latent_size * 2 flow_layers = [ flow.InverseAutoregressiveFlow(latent_size, hidden_size, latent_size) for _ in range(flow_depth) ] flow_layers.append(flow.Reverse(latent_size)) self.q_z_flow = flow.FlowSequential(*flow_layers) self.enc_chunk = 3 else: self.q_z_flow = None self.enc_chunk = 2 fc_out_size = latent_size * self.enc_chunk conv_out_size = 4 * 4 * 4 * DIM self.fc = nn.Sequential( nn.Linear(conv_out_size, fc_out_size), nn.LayerNorm(fc_out_size), nn.LeakyReLU(0.2), nn.Linear(fc_out_size, fc_out_size), )
def __init__(self, cconv, latent_size, channels, flow_depth=2): super().__init__() self.cconv = cconv if flow_depth > 0: hidden_size = latent_size * 2 flow_layers = [ flow.InverseAutoregressiveFlow(latent_size, hidden_size, latent_size) for _ in range(flow_depth) ] flow_layers.append(flow.Reverse(latent_size)) self.q_z_flow = flow.FlowSequential(*flow_layers) self.enc_chunk = 3 else: self.q_z_flow = None self.enc_chunk = 2 self.grid_encoder = GridEncoder(channels, latent_size * self.enc_chunk)
def __init__(self, latent_size, data_size, flow_depth): super().__init__() hidden_size = latent_size * 2 self.inference_network = NeuralNetwork( input_size=data_size, # loc, scale, and context output_size=latent_size * 3, hidden_size=hidden_size) modules = [] for _ in range(flow_depth): modules.append( flow.InverseAutoregressiveGate(num_input=latent_size, hidden_size=hidden_size, use_context=True, hidden_degrees='random', flow_std=1.0, activation='relu')) modules.append(flow.Reverse(latent_size)) self.q_z_flow = flow.FlowSequential(*modules) self.log_q_z_0 = NormalLogProb() self.softplus = nn.Softplus()
def __init__(self, latent_size, flow_depth=2, logprob=False): super().__init__() if logprob: self.encode_func = self.encode_logprob else: self.encode_func = self.encode dim = 64 self.ls = nn.Sequential(nn.Conv2d(3, dim, 5, 2, 2), nn.LeakyReLU(0.2), conv_ln_lrelu(dim, dim * 2), conv_ln_lrelu(dim * 2, dim * 4), conv_ln_lrelu(dim * 4, dim * 8), nn.Conv2d(dim * 8, latent_size, 4)) if flow_depth > 0: # IAF hidden_size = latent_size * 2 flow_layers = [ flow.InverseAutoregressiveFlow(latent_size, hidden_size, latent_size) for _ in range(flow_depth) ] flow_layers.append(flow.Reverse(latent_size)) self.q_z_flow = flow.FlowSequential(*flow_layers) self.enc_chunk = 3 else: self.q_z_flow = None self.enc_chunk = 2 fc_out_size = latent_size * self.enc_chunk self.fc = nn.Sequential( nn.Linear(latent_size, fc_out_size), nn.LayerNorm(fc_out_size), nn.LeakyReLU(0.2), nn.Linear(fc_out_size, fc_out_size), )
def main(cfg): np.random.seed(cfg.seed) torch.manual_seed(cfg.seed) random.seed(48283) device = torch.device("cuda:0" if cfg.cuda else "cpu") dataset = moons.MOONS() kwargs = {'num_workers': 4, 'pin_memory': True} if cfg.cuda else {} train_tensor = torch.from_numpy(dataset.trn.x) train_dataset = torch.utils.data.TensorDataset(train_tensor) valid_tensor = torch.from_numpy(dataset.val.x) valid_dataset = torch.utils.data.TensorDataset(valid_tensor) test_tensor = torch.from_numpy(dataset.tst.x) test_dataset = torch.utils.data.TensorDataset(test_tensor) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True, **kwargs) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=cfg.test_batch_size, shuffle=False, drop_last=False, **kwargs) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=cfg.test_batch_size, shuffle=False, drop_last=False, **kwargs) # plot real data fig, ax = plt.subplots() ax.plot(dataset.val.x[:, 0], dataset.val.x[:, 1], '.') ax.set_title('Real data') plt.savefig(cfg.out_dir / 'data.png') modules = [] mask = network.mask.checkerboard((1, 2)) base_mask = torch.from_numpy(mask) for flow_num in range(cfg.num_blocks): if cfg.flow_type == 'realnvp': if flow_num % 2 == 0: # invert mask opposite to prior mask = 1 - base_mask else: mask = base_mask modules.append( flow.RealNVPCoupling(input_shape=(1, 2), hidden_size=10, mask=mask, kernel_size=1)) elif cfg.flow_type == 'maf': modules.append( flow.MaskedAutoregressiveFlow(dataset.n_dims, cfg.num_hidden, use_context=False, use_tanh=cfg.use_tanh)) modules.append(flow.BatchNormalization(dataset.n_dims)) modules.append(flow.Reverse(dataset.n_dims)) model = flow.FlowSequential(*modules) # orthogonal initialization helps for module in model.modules(): if isinstance(module, nn.Linear): nn.init.orthogonal_(module.weight) module.bias.data.fill_(0) model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=cfg.learning_rate, weight_decay=1e-6) p_u = torch.distributions.Normal(loc=torch.zeros(dataset.n_dims, device=device), scale=torch.ones(dataset.n_dims, device=device)) train_iter = iter(train_loader) for step in range(cfg.max_iteration): try: data = next(train_iter) except StopIteration: train_iter = iter(train_loader) data = next(train_iter) data = data[0].to(device) optimizer.zero_grad() log_prob = log_prob_fn(model, p_u, data).sum() / data.shape[0] loss = -log_prob loss.backward() optimizer.step() if step % cfg.log_interval == 0: if np.isnan(loss.item()): raise ValueError("Loss hit nan!") print(f'epoch: {step * cfg.batch_size // len(train_dataset)}') print(f"step: {step}\tlog_lik: {log_prob.item():.2f}") for module in model.modules(): if isinstance(module, flow.BatchNormalization): module.momentum = 0. # initialize the moving averages with the full dataset all_data = train_loader.dataset.tensors[0].to(data.device) with torch.no_grad(): model(all_data) # update momentum for proper eval for module in model.modules(): if isinstance(module, flow.BatchNormalization): module.momentum = 1.0 valid_log_lik = evaluate(model, p_u, log_prob_fn, valid_loader, device) print(f"\tvalid log-lik: {valid_log_lik:.10f}") model.eval() plot(step, cfg.out_dir, model, dataset, device) model.train()