コード例 #1
0
	def _create_layers(self):
		# Load global model params
		self.latent_dim = 4
		self.embed_dim = self.latent_dim
		self.num_node_types = self.dataset_class.num_node_types()

		self.hidden_size = get_param_val(self.model_params, "coupling_hidden_size", default_val=512)
		self.hidden_layers = get_param_val(self.model_params, "coupling_hidden_layers", default_val=4)
		dropout = get_param_val(self.model_params, "coupling_dropout", default_val=0.0)

		self.embed_layer = nn.Embedding(self.num_node_types, self.hidden_size)
		self.graph_encoder = RGCNNet(c_in=self.hidden_size,
									 c_out=2*self.latent_dim,
									 num_edges=1,
									 num_layers=self.hidden_layers,
									 hidden_size=self.hidden_size,
									 dp_rate=dropout,
									 rgc_layer_fun=RelationGraphAttention)
		self.graph_decoder = RGCNNet(c_in=self.latent_dim,
									 c_out=self.num_node_types,
									 num_edges=1,
									 num_layers=self.hidden_layers,
									 hidden_size=self.hidden_size,
									 dp_rate=dropout,
									 rgc_layer_fun=RelationGraphAttention)
コード例 #2
0
ファイル: lstm_model.py プロジェクト: phlippe/CategoricalNF
    def __init__(self,
                 num_classes,
                 hidden_size=64,
                 num_layers=2,
                 embedding_dim=32,
                 dp_rate=0.0,
                 input_dp_rate=0.0,
                 max_seq_len=-1,
                 vocab=None,
                 model_params=None):
        super().__init__()
        if model_params is not None:
            hidden_size = get_param_val(model_params, "coupling_hidden_size",
                                        hidden_size)
            embedding_dim = hidden_size // 4
            num_layers = get_param_val(model_params, "coupling_hidden_layers",
                                       num_layers)
            dp_rate = get_param_val(model_params, "coupling_dropout", dp_rate)
            input_dp_rate = get_param_val(model_params,
                                          "coupling_input_dropout",
                                          input_dp_rate)
            max_seq_len = get_param_val(model_params, "max_seq_len",
                                        max_seq_len)

        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.embed_dim = 1  # Not equal to embedding_dim, is needed for making sampling equal to flows

        if vocab is not None and vocab.vectors is not None:
            embedding_dim = vocab.vectors.shape[1]
            self.embeddings = nn.Embedding(num_embeddings=len(vocab),
                                           embedding_dim=embedding_dim)
            self.embeddings.weight.data.copy_(vocab.vectors)
            self.vocab_size = len(vocab)
        else:
            self.embeddings = nn.Embedding(num_embeddings=num_classes,
                                           embedding_dim=embedding_dim)
            self.vocab_size = num_classes

        if time_dp_rate < 1.0:
            time_embed_dim = embedding_dim // 4
            time_embed = nn.Linear(2 * max_seq_len, time_embed_dim)
            self.max_seq_len = max_seq_len
            self.time_concat = TimeConcat(time_embed=time_embed,
                                          input_dp_rate=input_dp_rate)
        else:
            self.time_concat = None
            time_embed_dim = 0
        self.lstm = nn.LSTM(input_size=embedding_dim + time_embed_dim,
                            hidden_size=hidden_size,
                            num_layers=num_layers,
                            batch_first=True,
                            bidirectional=False)

        self.init_state = nn.Parameter(torch.zeros(num_layers, 1, hidden_size))

        self.output_layer = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2), nn.GELU(),
            nn.Dropout(dp_rate), nn.Linear(hidden_size // 2, num_classes),
            nn.LogSoftmax(dim=-1))
コード例 #3
0
ファイル: decoder.py プロジェクト: shawntan/CategoricalNF
def create_decoder(num_categories, num_dims, config, **kwargs):
    num_layers = get_param_val(config, "num_layers", 1)
    hidden_size = get_param_val(config, "hidden_size", 64)

    return DecoderLinear(num_categories,
                         embed_dim=num_dims,
                         hidden_size=hidden_size,
                         num_layers=num_layers,
                         **kwargs)
コード例 #4
0
ファイル: train.py プロジェクト: phlippe/CategoricalNF
	def _create_model(self, model_params):
		dataset_name = get_param_val(self.model_params, "dataset", default_val="penntreebank")
		dataset_class = TaskLanguageModeling.get_dataset_class(dataset_name)
		vocab_dict = dataset_class.get_vocabulary()
		vocab_torchtext = dataset_class.get_torchtext_vocab()

		use_rnn = get_param_val(self.model_params, "use_rnn", default_val=False)
		if use_rnn:
			model = LSTMModel(num_classes=len(vocab_dict), vocab=vocab_torchtext, model_params=model_params)
		else:
			model = FlowLanguageModeling(model_params=model_params, vocab_size=len(vocab_dict), vocab=vocab_torchtext, dataset_class=dataset_class)
		return model
コード例 #5
0
 def _create_model(self, model_params):
     dataset_name = get_param_val(self.model_params,
                                  "dataset",
                                  default_val="shuffling")
     dataset_class = TaskSetModeling.get_dataset_class(dataset_name)
     use_discrete = get_param_val(self.model_params,
                                  "use_discrete",
                                  default_val=False)
     if use_discrete:
         model = DiscreteFlowSetModeling(model_params=model_params,
                                         dataset_class=dataset_class)
     else:
         model = FlowSetModeling(model_params=model_params,
                                 dataset_class=dataset_class)
     return model
コード例 #6
0
    def __init__(self,
                 model,
                 model_params,
                 load_data=True,
                 debug=False,
                 batch_size=64):
        super().__init__(model,
                         model_params,
                         load_data=load_data,
                         debug=debug,
                         batch_size=batch_size,
                         name="TaskSetModeling")

        prior_dist_params = get_param_val(
            self.model_params,
            "prior_distribution",
            allow_default=False,
            error_location="TaskSetModeling - init")
        self.prior_distribution = create_prior_distribution(prior_dist_params)

        self.beta_scheduler = create_scheduler(self.model_params["beta"],
                                               "beta")

        self.summary_dict = {
            "log_prob": list(),
            "ldj": list(),
            "z": list(),
            "beta": 0
        }
コード例 #7
0
ファイル: train.py プロジェクト: shawntan/CategoricalNF
    def _create_model(self, model_params):
        dataset_name = get_param_val(self.model_params,
                                     "dataset",
                                     default_val="tiny_3")
        dataset_class = TaskGraphColoring.get_dataset_class(dataset_name)

        use_rnn = get_param_val(model_params, "use_rnn", default_val=False)
        use_vae = get_param_val(model_params, "use_vae", default_val=False)
        if use_rnn:
            model = GraphNodeRNN(model_params, dataset_class)
        elif use_vae:
            model = GraphNodeVAE(model_params, dataset_class)
        else:
            model = GraphNodeFlow(model_params, dataset_class)

        return model
コード例 #8
0
ファイル: task.py プロジェクト: shawntan/CategoricalNF
    def __init__(self,
                 model,
                 model_params,
                 load_data=True,
                 debug=False,
                 batch_size=64):
        super().__init__(model,
                         model_params,
                         load_data=load_data,
                         debug=debug,
                         batch_size=batch_size,
                         name="TaskGraphColoring")

        prior_dist_params = get_param_val(self.model_params,
                                          "prior_distribution", dict())
        self.prior_distribution = create_prior_distribution(prior_dist_params)

        self.beta_scheduler = create_scheduler(self.model_params["beta"],
                                               "beta")
        self.gamma_scheduler = create_scheduler(self.model_params["gamma"],
                                                "gamma")

        self.summary_dict = {
            "log_prob": list(),
            "ldj": list(),
            "z": list(),
            "beta": 0,
            "gamma": 0
        }
        self.checkpoint_path = None
コード例 #9
0
ファイル: train.py プロジェクト: shawntan/CategoricalNF
 def _create_model(self, model_params):
     dataset_name = get_param_val(self.model_params,
                                  "dataset",
                                  default_val="zinc250k")
     dataset_class = TaskMoleculeGeneration.get_dataset_class(dataset_name)
     model = GraphCNF(model_params, dataset_class)
     return model
コード例 #10
0
ファイル: task.py プロジェクト: ling-cai/CategoricalNF
    def _load_datasets(self):
        self.max_seq_len = get_param_val(self.model_params,
                                         "max_seq_len",
                                         allow_default=False)

        dataset_name = get_param_val(self.model_params,
                                     "dataset",
                                     default_val="penntreebank")
        dataset_class = TaskLanguageModeling.get_dataset_class(dataset_name)
        print("Loading dataset %s..." % dataset_name)

        self.train_dataset = dataset_class(max_seq_len=self.max_seq_len,
                                           train=True)
        self.val_dataset = dataset_class(max_seq_len=self.max_seq_len,
                                         val=True)
        self.test_dataset = dataset_class(max_seq_len=self.max_seq_len,
                                          test=True)
コード例 #11
0
    def _load_datasets(self):
        self.set_size = get_param_val(self.model_params,
                                      "set_size",
                                      allow_default=False)

        dataset_name = get_param_val(self.model_params,
                                     "dataset",
                                     default_val="shuffling")
        dataset_class, dataset_kwargs = TaskSetModeling.get_dataset_class(
            dataset_name, return_kwargs=True)
        print("Loading dataset %s..." % dataset_name)

        self.train_dataset = dataset_class(set_size=self.set_size,
                                           train=True,
                                           **dataset_kwargs)
        self.val_dataset = dataset_class(set_size=self.set_size,
                                         val=True,
                                         **dataset_kwargs)
        self.test_dataset = dataset_class(set_size=self.set_size,
                                          test=True,
                                          **dataset_kwargs)
コード例 #12
0
def _create_flows(num_dims, embed_dims, config):
    num_flows = get_param_val(config, "num_flows", 0)
    model_func = get_param_val(config, "model_func", allow_default=False)
    block_type = get_param_val(config, "block_type", None)
    num_mixtures = get_param_val(config, "num_mixtures", 8)

    # For the activation normalization, we map an embedding to scaling and bias with a single layer
    block_fun_actn = lambda: SimpleLinearLayer(
        c_in=embed_dims, c_out=2 * num_dims, data_init=True)

    permut_layer = lambda flow_index: InvertibleConv(c_in=num_dims)
    actnorm_layer = lambda flow_index: ExtActNormFlow(c_in=num_dims,
                                                      net=block_fun_actn())

    if num_dims > 1:
        mask = CouplingLayer.create_channel_mask(c_in=num_dims)
        mask_func = lambda _: mask
    else:
        mask = CouplingLayer.create_chess_mask()
        mask_func = lambda flow_index: mask if flow_index % 2 == 0 else 1 - mask

    coupling_layer = lambda flow_index: MixtureCDFCoupling(
        c_in=num_dims,
        mask=mask_func(flow_index),
        block_type=block_type,
        model_func=model_func,
        num_mixtures=num_mixtures)

    flow_layers = []
    if num_flows == 0:  # Num_flows == 0 => mixture model
        flow_layers += [actnorm_layer(flow_index=0)]
    else:
        for flow_index in range(num_flows):
            flow_layers += [
                actnorm_layer(flow_index),
                permut_layer(flow_index),
                coupling_layer(flow_index)
            ]

    return nn.ModuleList(flow_layers)
コード例 #13
0
def _create_flows(num_dims, embed_dims, config):
    num_flows = get_param_val(config, "num_flows", 0)
    num_hidden_layers = get_param_val(config, "hidden_layers", 2)
    hidden_size = get_param_val(config, "hidden_size", 256)

    # We apply a linear net in the coupling layers for linear flows
    block_type_name = "LinearNet"
    block_fun_coup = lambda c_out: LinearNet(c_in=num_dims,
                                             c_out=c_out,
                                             num_layers=num_hidden_layers,
                                             hidden_size=hidden_size,
                                             ext_input_dims=embed_dims)

    # For the activation normalization, we map an embedding to scaling and bias with a single layer
    block_fun_actn = lambda: SimpleLinearLayer(
        c_in=embed_dims, c_out=2 * num_dims, data_init=True)

    permut_layer = lambda flow_index: InvertibleConv(c_in=num_dims)
    actnorm_layer = lambda flow_index: ExtActNormFlow(c_in=num_dims,
                                                      net=block_fun_actn())
    # We do not use mixture coupling layers here aas we need the inverse to be differentiable as well
    coupling_layer = lambda flow_index: CouplingLayer(
        c_in=num_dims,
        mask=CouplingLayer.create_channel_mask(c_in=num_dims),
        block_type=block_type_name,
        model_func=block_fun_coup)

    flow_layers = []
    if num_flows == 0 or num_dims == 1:  # Num_flows == 0 => mixture model, num_dims == 1 => coupling layers have no effect
        flow_layers += [actnorm_layer(flow_index=0)]
    else:
        for flow_index in range(num_flows):
            flow_layers += [
                actnorm_layer(flow_index),
                permut_layer(flow_index),
                coupling_layer(flow_index)
            ]

    return nn.ModuleList(flow_layers)
コード例 #14
0
ファイル: graphCNF.py プロジェクト: shawntan/CategoricalNF
 def _create_layers(self):
     # Load global model params
     self.max_num_nodes = self.dataset_class.max_num_nodes()
     self.num_node_types = self.dataset_class.num_node_types()
     self.num_edge_types = self.dataset_class.num_edge_types()
     self.num_max_neighbours = self.dataset_class.num_max_neighbours()
     # Prior distribution is needed here for edges
     prior_config = get_param_val(self.model_params,
                                  "prior_distribution",
                                  default_val=dict())
     self.prior_distribution = create_prior_distribution(prior_config)
     # Create encoding and flow layers
     self._create_encoding_layers()
     self._create_step_flows()
コード例 #15
0
def _create_flows(config, embed_dims):
    num_flows = get_param_val(config, "num_flows", 4)
    model_func = get_param_val(config, "model_func", allow_default=False)
    block_type = get_param_val(config, "block_type", None)

    def _create_block(flow_index):
        # For variational dequantization we apply a combination of activation normalization and coupling layers.
        # Invertible convolutions are not useful here as our dimensionality is 1 anyways
        mask = CouplingLayer.create_chess_mask()
        if flow_index % 2 == 0:
            mask = 1 - mask
        return [
            ActNormFlow(c_in=1, data_init=False),
            CouplingLayer(c_in=1,
                          mask=mask,
                          model_func=model_func,
                          block_type=block_type)
        ]

    flow_layers = []
    for flow_index in range(num_flows):
        flow_layers += _create_block(flow_index)

    return nn.ModuleList(flow_layers)
コード例 #16
0
def create_prior_distribution(distribution_params):
    distribution_type = get_param_val(distribution_params, "distribution_type",
                                      PriorDistribution.LOGISTIC)
    input_params = {
        key: val
        for key, val in distribution_params.items() if val is not None
    }

    if PriorDistribution.GAUSSIAN == distribution_type:
        return GaussianDistribution(**input_params)
    elif PriorDistribution.LOGISTIC == distribution_type:
        return LogisticDistribution(**input_params)
    else:
        print("[!] ERROR: Unknown distribution type %s" %
              str(distribution_type))
        sys.exit(1)
コード例 #17
0
    def _create_node_flow_layers(self):
        num_flows = get_param_val(self.model_params,
                                  "coupling_num_flows",
                                  default_val=8)
        hidden_size = get_param_val(self.model_params,
                                    "coupling_hidden_size",
                                    default_val=384)
        hidden_layers = get_param_val(self.model_params,
                                      "coupling_hidden_layers",
                                      default_val=4)
        num_mixtures = get_param_val(self.model_params,
                                     "coupling_num_mixtures",
                                     default_val=16)
        mask_ratio = get_param_val(self.model_params,
                                   "coupling_mask_ratio",
                                   default_val=0.5)
        dropout = get_param_val(self.model_params,
                                "coupling_dropout",
                                default_val=0.0)

        coupling_mask = CouplingLayer.create_channel_mask(self.embed_dim,
                                                          ratio=mask_ratio)

        model_func = lambda c_out: RGCNNet(c_in=self.embed_dim,
                                           c_out=c_out,
                                           num_edges=1,
                                           num_layers=hidden_layers,
                                           hidden_size=hidden_size,
                                           dp_rate=dropout,
                                           rgc_layer_fun=RelationGraphAttention
                                           )

        layers = []
        for _ in range(num_flows):
            layers += [
                ActNormFlow(self.embed_dim),
                InvertibleConv(self.embed_dim),
                MixtureCDFCoupling(
                    c_in=self.embed_dim,
                    mask=coupling_mask,
                    model_func=model_func,
                    block_type="GraphAttentionNet",
                    num_mixtures=num_mixtures,
                    regularizer_max=3.5,  # To ensure a accurate reversibility
                    regularizer_factor=2)
            ]
        layers += [ActNormFlow(c_in=self.embed_dim)]
        return layers
コード例 #18
0
ファイル: task.py プロジェクト: shawntan/CategoricalNF
    def _load_datasets(self):
        self.dataset_class = self.model.dataset_class

        dataset_kwargs = {}
        if isinstance(self.model, GraphNodeRNN):
            graph_ordering = get_param_val(self.model_params,
                                           "rnn_graph_ordering",
                                           default_val="rand")
            dataset_kwargs["order_graphs"] = graph_ordering

        self.train_dataset = self.dataset_class(train=True,
                                                val=False,
                                                test=False,
                                                **dataset_kwargs)
        self.val_dataset = self.dataset_class(train=False,
                                              val=True,
                                              test=False,
                                              **dataset_kwargs)
        self.test_dataset = self.dataset_class(train=False,
                                               val=False,
                                               test=True,
                                               **dataset_kwargs)
コード例 #19
0
def create_scheduler(scheduler_params, param_name=None):
	sched_type = get_param_val(scheduler_params, "scheduler_type", allow_default=False)
	end_val = get_param_val(scheduler_params, "scheduler_end_val", allow_default=False)
	start_val = get_param_val(scheduler_params, "scheduler_start_val", allow_default=False)
	stepsize = get_param_val(scheduler_params, "scheduler_step_size", allow_default=False)
	logit = get_param_val(scheduler_params, "scheduler_logit", allow_default=False)
	delay = get_param_val(scheduler_params, "scheduler_delay", allow_default=False)

	if sched_type == "constant":
		return ConstantScheduler(const_val=end_val, param_name=param_name)
	elif sched_type == "linear":
		return LinearScheduler(start_val=start_val, end_val=end_val, stepsize=stepsize, delay=delay, param_name=param_name)
	elif sched_type == "sigmoid":
		return SigmoidScheduler(start_val=start_val, end_val=end_val, logit_factor=logit, stepsize=stepsize, delay=delay, param_name=param_name)
	elif sched_type == "exponential":
		return ExponentialScheduler(start_val=start_val, end_val=end_val, logit_factor=logit, stepsize=stepsize, delay=delay, param_name=param_name)
	else:
		print("[!] ERROR: Unknown scheduler type \"%s\"" % str(sched_type))
		sys.exit(1)
コード例 #20
0
ファイル: graphCNF.py プロジェクト: ling-cai/CategoricalNF
    def _create_step_flows(self):
        ## Get hyperparameters from model_params dictionary
        hidden_size_nodes = get_param_val(self.model_params,
                                          "coupling_hidden_size_nodes",
                                          default_val=64)
        hidden_size_edges = get_param_val(self.model_params,
                                          "coupling_hidden_size_edges",
                                          default_val=16)
        num_flows = get_param_val(self.model_params,
                                  "coupling_num_flows",
                                  default_val="4,6,6")
        num_flows = [int(k) for k in num_flows.split(",")]
        hidden_layers = get_param_val(self.model_params,
                                      "coupling_hidden_layers",
                                      default_val=4)
        if isinstance(hidden_layers, str):
            if "," in hidden_layers:
                hidden_layers = [int(l) for l in hidden_layers.split(",")]
            else:
                hidden_layers = [int(hidden_layers)] * 3
        else:
            hidden_layers = [hidden_layers] * 3
        num_mixtures_nodes = get_param_val(self.model_params,
                                           "coupling_num_mixtures_nodes",
                                           default_val=16)
        num_mixtures_edges = get_param_val(self.model_params,
                                           "coupling_num_mixtures_edges",
                                           default_val=16)
        mask_ratio = get_param_val(self.model_params,
                                   "coupling_mask_ratio",
                                   default_val=0.5)
        dropout = get_param_val(self.model_params,
                                "coupling_dropout",
                                default_val=0.0)

        #----------------#
        #- Step 1 flows -#
        #----------------#

        coupling_mask_nodes = CouplingLayer.create_channel_mask(
            self.encoding_dim_nodes, ratio=mask_ratio
        )  # 1*self.encoding_dim_nodes, where the first half is 1 and the last half is 0.
        step1_model_func = lambda c_out: RGCNNet(
            c_in=self.encoding_dim_nodes,
            c_out=c_out,
            num_edges=self.num_edge_types,
            num_layers=hidden_layers[0],
            hidden_size=hidden_size_nodes,
            max_neighbours=self.dataset_class.num_max_neighbours(),
            dp_rate=dropout,
            rgc_layer_fun=RelationGraphConv)
        step1_flows = []
        for _ in range(num_flows[0]):
            step1_flows += [
                ActNormFlow(self.encoding_dim_nodes),
                InvertibleConv(self.encoding_dim_nodes),
                MixtureCDFCoupling(
                    c_in=self.encoding_dim_nodes,
                    mask=coupling_mask_nodes,
                    model_func=step1_model_func,
                    block_type="RelationGraphConv",
                    num_mixtures=num_mixtures_nodes,
                    regularizer_max=3.5,  # To ensure a accurate reversibility
                    regularizer_factor=2)
            ]
        self.step1_flows = nn.ModuleList(step1_flows)

        #------------------#
        #- Step 2+3 flows -#
        #------------------#

        coupling_mask_edges = CouplingLayer.create_channel_mask(
            self.encoding_dim_edges, ratio=mask_ratio)

        # Definition of the Edge-GNN network
        def edge2node_layer_func(step_idx):
            if step_idx == 1:
                return lambda: Edge2NodeAttnLayer(
                    hidden_size_nodes=hidden_size_nodes,
                    hidden_size_edges=hidden_size_edges,
                    skip_config=2)
            else:
                return lambda: Edge2NodeQKVAttnLayer(
                    hidden_size_nodes=hidden_size_nodes,
                    hidden_size_edges=hidden_size_edges,
                    skip_config=2)

        node2edge_layer_func = lambda: Node2EdgePlainLayer(
            hidden_size_nodes=hidden_size_nodes,
            hidden_size_edges=hidden_size_edges,
            skip_config=2)

        def edge_gnn_layer_func(step_idx):
            return lambda: EdgeGNNLayer(
                edge2node_layer_func=edge2node_layer_func(step_idx),
                node2edge_layer_func=node2edge_layer_func)

        def get_model_func(step_idx):
            return lambda c_out_nodes, c_out_edges: EdgeGNN(
                c_in_nodes=self.encoding_dim_nodes,
                c_in_edges=self.encoding_dim_edges,
                c_out_nodes=c_out_nodes,
                c_out_edges=c_out_edges,
                edge_gnn_layer_func=edge_gnn_layer_func(step_idx),
                max_neighbours=self.dataset_class.num_max_neighbours(),
                num_layers=hidden_layers[step_idx])

        # Activation normalization and invertible 1x1 convolution need to be applied on both nodes and edges independently.
        # The "NodeEdgeFlowWrapper" handles the forward pass for such flows
        actnorm_layer = lambda: NodeEdgeFlowWrapper(
            node_flow=ActNormFlow(c_in=self.encoding_dim_nodes),
            edge_flow=ActNormFlow(c_in=self.encoding_dim_edges))
        permut_layer = lambda: NodeEdgeFlowWrapper(
            node_flow=InvertibleConv(c_in=self.encoding_dim_nodes),
            edge_flow=InvertibleConv(c_in=self.encoding_dim_edges))
        coupling_layer = lambda step_idx: NodeEdgeCoupling(
            c_in_nodes=self.encoding_dim_nodes,
            c_in_edges=self.encoding_dim_edges,
            mask_nodes=coupling_mask_nodes,
            mask_edges=coupling_mask_edges,
            num_mixtures_nodes=num_mixtures_nodes,
            num_mixtures_edges=num_mixtures_edges,
            model_func=get_model_func(step_idx),
            regularizer_max=3.5,  # To ensure a accurate reversibility
            regularizer_factor=2)

        step2_flows = []
        for _ in range(num_flows[1]):
            step2_flows += [
                actnorm_layer(),
                permut_layer(),
                coupling_layer(
                    step_idx=1)  # the second step forward they used EdgeGNN
            ]
        self.step2_flows = nn.ModuleList(step2_flows)

        step3_flows = []
        for _ in range(num_flows[2]):
            step3_flows += [
                actnorm_layer(),
                permut_layer(),
                coupling_layer(
                    step_idx=2
                )  # the last step forward they used attention network
            ]
        self.step3_flows = nn.ModuleList(step3_flows)