def _create_layers(self): model_func = lambda c_out: CouplingTransformerNet( vocab_size=self.vocab_size, c_out=c_out, num_layers=self.model_params["coupling_hidden_layers"], hidden_size=self.model_params["coupling_hidden_size"]) coupling_mask = CouplingLayer.create_chess_mask() coupling_mask_func = lambda flow_index: coupling_mask if flow_index % 2 == 0 else 1 - coupling_mask num_flows = self.model_params["coupling_num_flows"] layers = [] for flow_index in range(num_flows): layers += [ DiscreteCouplingLayer(c_in=self.vocab_size, mask=coupling_mask_func(flow_index), model_func=model_func, block_type="Transformer", temp=0.1) ] self.flow_layers = nn.ModuleList(layers) self.prior = nn.Parameter(0.2 * torch.randn(self.set_size, self.vocab_size), requires_grad=True)
def _create_block(flow_index): # For variational dequantization we apply a combination of activation normalization and coupling layers. # Invertible convolutions are not useful here as our dimensionality is 1 anyways mask = CouplingLayer.create_chess_mask() if flow_index % 2 == 0: mask = 1 - mask return [ ActNormFlow(c_in=1, data_init=False), CouplingLayer(c_in=1, mask=mask, model_func=model_func, block_type=block_type) ]
def _create_layers(self): self.latent_dim = self.model_params["categ_encoding"]["num_dimensions"] model_func = lambda c_out: CouplingTransformerNet( c_in=self.latent_dim, c_out=c_out, num_layers=self.model_params["coupling_hidden_layers"], hidden_size=self.model_params["coupling_hidden_size"]) self.model_params["categ_encoding"]["flow_config"][ "model_func"] = model_func self.model_params["categ_encoding"]["flow_config"][ "block_type"] = "Transformer" self.encoding_layer = create_encoding( self.model_params["categ_encoding"], dataset_class=self.dataset_class, vocab_size=self.vocab_size) num_flows = self.model_params["coupling_num_flows"] if self.latent_dim > 1: coupling_mask = CouplingLayer.create_channel_mask( self.latent_dim, ratio=self.model_params["coupling_mask_ratio"]) coupling_mask_func = lambda flow_index: coupling_mask else: coupling_mask = CouplingLayer.create_chess_mask() coupling_mask_func = lambda flow_index: coupling_mask if flow_index % 2 == 0 else 1 - coupling_mask layers = [] for flow_index in range(num_flows): layers += [ ActNormFlow(self.latent_dim), InvertibleConv(self.latent_dim), MixtureCDFCoupling( c_in=self.latent_dim, mask=coupling_mask_func(flow_index), model_func=model_func, block_type="Transformer", num_mixtures=self.model_params["coupling_num_mixtures"]) ] self.flow_layers = nn.ModuleList([self.encoding_layer] + layers)
def _create_flows(num_dims, embed_dims, config): num_flows = get_param_val(config, "num_flows", 0) model_func = get_param_val(config, "model_func", allow_default=False) block_type = get_param_val(config, "block_type", None) num_mixtures = get_param_val(config, "num_mixtures", 8) # For the activation normalization, we map an embedding to scaling and bias with a single layer block_fun_actn = lambda: SimpleLinearLayer( c_in=embed_dims, c_out=2 * num_dims, data_init=True) permut_layer = lambda flow_index: InvertibleConv(c_in=num_dims) actnorm_layer = lambda flow_index: ExtActNormFlow(c_in=num_dims, net=block_fun_actn()) if num_dims > 1: mask = CouplingLayer.create_channel_mask(c_in=num_dims) mask_func = lambda _: mask else: mask = CouplingLayer.create_chess_mask() mask_func = lambda flow_index: mask if flow_index % 2 == 0 else 1 - mask coupling_layer = lambda flow_index: MixtureCDFCoupling( c_in=num_dims, mask=mask_func(flow_index), block_type=block_type, model_func=model_func, num_mixtures=num_mixtures) flow_layers = [] if num_flows == 0: # Num_flows == 0 => mixture model flow_layers += [actnorm_layer(flow_index=0)] else: for flow_index in range(num_flows): flow_layers += [ actnorm_layer(flow_index), permut_layer(flow_index), coupling_layer(flow_index) ] return nn.ModuleList(flow_layers)