def _create_layers(self): # Load global model params self.latent_dim = 4 self.embed_dim = self.latent_dim self.num_node_types = self.dataset_class.num_node_types() self.hidden_size = get_param_val(self.model_params, "coupling_hidden_size", default_val=512) self.hidden_layers = get_param_val(self.model_params, "coupling_hidden_layers", default_val=4) dropout = get_param_val(self.model_params, "coupling_dropout", default_val=0.0) self.embed_layer = nn.Embedding(self.num_node_types, self.hidden_size) self.graph_encoder = RGCNNet(c_in=self.hidden_size, c_out=2*self.latent_dim, num_edges=1, num_layers=self.hidden_layers, hidden_size=self.hidden_size, dp_rate=dropout, rgc_layer_fun=RelationGraphAttention) self.graph_decoder = RGCNNet(c_in=self.latent_dim, c_out=self.num_node_types, num_edges=1, num_layers=self.hidden_layers, hidden_size=self.hidden_size, dp_rate=dropout, rgc_layer_fun=RelationGraphAttention)
def __init__(self, num_classes, hidden_size=64, num_layers=2, embedding_dim=32, dp_rate=0.0, input_dp_rate=0.0, max_seq_len=-1, vocab=None, model_params=None): super().__init__() if model_params is not None: hidden_size = get_param_val(model_params, "coupling_hidden_size", hidden_size) embedding_dim = hidden_size // 4 num_layers = get_param_val(model_params, "coupling_hidden_layers", num_layers) dp_rate = get_param_val(model_params, "coupling_dropout", dp_rate) input_dp_rate = get_param_val(model_params, "coupling_input_dropout", input_dp_rate) max_seq_len = get_param_val(model_params, "max_seq_len", max_seq_len) self.num_layers = num_layers self.hidden_size = hidden_size self.embed_dim = 1 # Not equal to embedding_dim, is needed for making sampling equal to flows if vocab is not None and vocab.vectors is not None: embedding_dim = vocab.vectors.shape[1] self.embeddings = nn.Embedding(num_embeddings=len(vocab), embedding_dim=embedding_dim) self.embeddings.weight.data.copy_(vocab.vectors) self.vocab_size = len(vocab) else: self.embeddings = nn.Embedding(num_embeddings=num_classes, embedding_dim=embedding_dim) self.vocab_size = num_classes if time_dp_rate < 1.0: time_embed_dim = embedding_dim // 4 time_embed = nn.Linear(2 * max_seq_len, time_embed_dim) self.max_seq_len = max_seq_len self.time_concat = TimeConcat(time_embed=time_embed, input_dp_rate=input_dp_rate) else: self.time_concat = None time_embed_dim = 0 self.lstm = nn.LSTM(input_size=embedding_dim + time_embed_dim, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, bidirectional=False) self.init_state = nn.Parameter(torch.zeros(num_layers, 1, hidden_size)) self.output_layer = nn.Sequential( nn.Linear(hidden_size, hidden_size // 2), nn.GELU(), nn.Dropout(dp_rate), nn.Linear(hidden_size // 2, num_classes), nn.LogSoftmax(dim=-1))
def create_decoder(num_categories, num_dims, config, **kwargs): num_layers = get_param_val(config, "num_layers", 1) hidden_size = get_param_val(config, "hidden_size", 64) return DecoderLinear(num_categories, embed_dim=num_dims, hidden_size=hidden_size, num_layers=num_layers, **kwargs)
def _create_model(self, model_params): dataset_name = get_param_val(self.model_params, "dataset", default_val="penntreebank") dataset_class = TaskLanguageModeling.get_dataset_class(dataset_name) vocab_dict = dataset_class.get_vocabulary() vocab_torchtext = dataset_class.get_torchtext_vocab() use_rnn = get_param_val(self.model_params, "use_rnn", default_val=False) if use_rnn: model = LSTMModel(num_classes=len(vocab_dict), vocab=vocab_torchtext, model_params=model_params) else: model = FlowLanguageModeling(model_params=model_params, vocab_size=len(vocab_dict), vocab=vocab_torchtext, dataset_class=dataset_class) return model
def _create_model(self, model_params): dataset_name = get_param_val(self.model_params, "dataset", default_val="shuffling") dataset_class = TaskSetModeling.get_dataset_class(dataset_name) use_discrete = get_param_val(self.model_params, "use_discrete", default_val=False) if use_discrete: model = DiscreteFlowSetModeling(model_params=model_params, dataset_class=dataset_class) else: model = FlowSetModeling(model_params=model_params, dataset_class=dataset_class) return model
def __init__(self, model, model_params, load_data=True, debug=False, batch_size=64): super().__init__(model, model_params, load_data=load_data, debug=debug, batch_size=batch_size, name="TaskSetModeling") prior_dist_params = get_param_val( self.model_params, "prior_distribution", allow_default=False, error_location="TaskSetModeling - init") self.prior_distribution = create_prior_distribution(prior_dist_params) self.beta_scheduler = create_scheduler(self.model_params["beta"], "beta") self.summary_dict = { "log_prob": list(), "ldj": list(), "z": list(), "beta": 0 }
def _create_model(self, model_params): dataset_name = get_param_val(self.model_params, "dataset", default_val="tiny_3") dataset_class = TaskGraphColoring.get_dataset_class(dataset_name) use_rnn = get_param_val(model_params, "use_rnn", default_val=False) use_vae = get_param_val(model_params, "use_vae", default_val=False) if use_rnn: model = GraphNodeRNN(model_params, dataset_class) elif use_vae: model = GraphNodeVAE(model_params, dataset_class) else: model = GraphNodeFlow(model_params, dataset_class) return model
def __init__(self, model, model_params, load_data=True, debug=False, batch_size=64): super().__init__(model, model_params, load_data=load_data, debug=debug, batch_size=batch_size, name="TaskGraphColoring") prior_dist_params = get_param_val(self.model_params, "prior_distribution", dict()) self.prior_distribution = create_prior_distribution(prior_dist_params) self.beta_scheduler = create_scheduler(self.model_params["beta"], "beta") self.gamma_scheduler = create_scheduler(self.model_params["gamma"], "gamma") self.summary_dict = { "log_prob": list(), "ldj": list(), "z": list(), "beta": 0, "gamma": 0 } self.checkpoint_path = None
def _create_model(self, model_params): dataset_name = get_param_val(self.model_params, "dataset", default_val="zinc250k") dataset_class = TaskMoleculeGeneration.get_dataset_class(dataset_name) model = GraphCNF(model_params, dataset_class) return model
def _load_datasets(self): self.max_seq_len = get_param_val(self.model_params, "max_seq_len", allow_default=False) dataset_name = get_param_val(self.model_params, "dataset", default_val="penntreebank") dataset_class = TaskLanguageModeling.get_dataset_class(dataset_name) print("Loading dataset %s..." % dataset_name) self.train_dataset = dataset_class(max_seq_len=self.max_seq_len, train=True) self.val_dataset = dataset_class(max_seq_len=self.max_seq_len, val=True) self.test_dataset = dataset_class(max_seq_len=self.max_seq_len, test=True)
def _load_datasets(self): self.set_size = get_param_val(self.model_params, "set_size", allow_default=False) dataset_name = get_param_val(self.model_params, "dataset", default_val="shuffling") dataset_class, dataset_kwargs = TaskSetModeling.get_dataset_class( dataset_name, return_kwargs=True) print("Loading dataset %s..." % dataset_name) self.train_dataset = dataset_class(set_size=self.set_size, train=True, **dataset_kwargs) self.val_dataset = dataset_class(set_size=self.set_size, val=True, **dataset_kwargs) self.test_dataset = dataset_class(set_size=self.set_size, test=True, **dataset_kwargs)
def _create_flows(num_dims, embed_dims, config): num_flows = get_param_val(config, "num_flows", 0) model_func = get_param_val(config, "model_func", allow_default=False) block_type = get_param_val(config, "block_type", None) num_mixtures = get_param_val(config, "num_mixtures", 8) # For the activation normalization, we map an embedding to scaling and bias with a single layer block_fun_actn = lambda: SimpleLinearLayer( c_in=embed_dims, c_out=2 * num_dims, data_init=True) permut_layer = lambda flow_index: InvertibleConv(c_in=num_dims) actnorm_layer = lambda flow_index: ExtActNormFlow(c_in=num_dims, net=block_fun_actn()) if num_dims > 1: mask = CouplingLayer.create_channel_mask(c_in=num_dims) mask_func = lambda _: mask else: mask = CouplingLayer.create_chess_mask() mask_func = lambda flow_index: mask if flow_index % 2 == 0 else 1 - mask coupling_layer = lambda flow_index: MixtureCDFCoupling( c_in=num_dims, mask=mask_func(flow_index), block_type=block_type, model_func=model_func, num_mixtures=num_mixtures) flow_layers = [] if num_flows == 0: # Num_flows == 0 => mixture model flow_layers += [actnorm_layer(flow_index=0)] else: for flow_index in range(num_flows): flow_layers += [ actnorm_layer(flow_index), permut_layer(flow_index), coupling_layer(flow_index) ] return nn.ModuleList(flow_layers)
def _create_flows(num_dims, embed_dims, config): num_flows = get_param_val(config, "num_flows", 0) num_hidden_layers = get_param_val(config, "hidden_layers", 2) hidden_size = get_param_val(config, "hidden_size", 256) # We apply a linear net in the coupling layers for linear flows block_type_name = "LinearNet" block_fun_coup = lambda c_out: LinearNet(c_in=num_dims, c_out=c_out, num_layers=num_hidden_layers, hidden_size=hidden_size, ext_input_dims=embed_dims) # For the activation normalization, we map an embedding to scaling and bias with a single layer block_fun_actn = lambda: SimpleLinearLayer( c_in=embed_dims, c_out=2 * num_dims, data_init=True) permut_layer = lambda flow_index: InvertibleConv(c_in=num_dims) actnorm_layer = lambda flow_index: ExtActNormFlow(c_in=num_dims, net=block_fun_actn()) # We do not use mixture coupling layers here aas we need the inverse to be differentiable as well coupling_layer = lambda flow_index: CouplingLayer( c_in=num_dims, mask=CouplingLayer.create_channel_mask(c_in=num_dims), block_type=block_type_name, model_func=block_fun_coup) flow_layers = [] if num_flows == 0 or num_dims == 1: # Num_flows == 0 => mixture model, num_dims == 1 => coupling layers have no effect flow_layers += [actnorm_layer(flow_index=0)] else: for flow_index in range(num_flows): flow_layers += [ actnorm_layer(flow_index), permut_layer(flow_index), coupling_layer(flow_index) ] return nn.ModuleList(flow_layers)
def _create_layers(self): # Load global model params self.max_num_nodes = self.dataset_class.max_num_nodes() self.num_node_types = self.dataset_class.num_node_types() self.num_edge_types = self.dataset_class.num_edge_types() self.num_max_neighbours = self.dataset_class.num_max_neighbours() # Prior distribution is needed here for edges prior_config = get_param_val(self.model_params, "prior_distribution", default_val=dict()) self.prior_distribution = create_prior_distribution(prior_config) # Create encoding and flow layers self._create_encoding_layers() self._create_step_flows()
def _create_flows(config, embed_dims): num_flows = get_param_val(config, "num_flows", 4) model_func = get_param_val(config, "model_func", allow_default=False) block_type = get_param_val(config, "block_type", None) def _create_block(flow_index): # For variational dequantization we apply a combination of activation normalization and coupling layers. # Invertible convolutions are not useful here as our dimensionality is 1 anyways mask = CouplingLayer.create_chess_mask() if flow_index % 2 == 0: mask = 1 - mask return [ ActNormFlow(c_in=1, data_init=False), CouplingLayer(c_in=1, mask=mask, model_func=model_func, block_type=block_type) ] flow_layers = [] for flow_index in range(num_flows): flow_layers += _create_block(flow_index) return nn.ModuleList(flow_layers)
def create_prior_distribution(distribution_params): distribution_type = get_param_val(distribution_params, "distribution_type", PriorDistribution.LOGISTIC) input_params = { key: val for key, val in distribution_params.items() if val is not None } if PriorDistribution.GAUSSIAN == distribution_type: return GaussianDistribution(**input_params) elif PriorDistribution.LOGISTIC == distribution_type: return LogisticDistribution(**input_params) else: print("[!] ERROR: Unknown distribution type %s" % str(distribution_type)) sys.exit(1)
def _create_node_flow_layers(self): num_flows = get_param_val(self.model_params, "coupling_num_flows", default_val=8) hidden_size = get_param_val(self.model_params, "coupling_hidden_size", default_val=384) hidden_layers = get_param_val(self.model_params, "coupling_hidden_layers", default_val=4) num_mixtures = get_param_val(self.model_params, "coupling_num_mixtures", default_val=16) mask_ratio = get_param_val(self.model_params, "coupling_mask_ratio", default_val=0.5) dropout = get_param_val(self.model_params, "coupling_dropout", default_val=0.0) coupling_mask = CouplingLayer.create_channel_mask(self.embed_dim, ratio=mask_ratio) model_func = lambda c_out: RGCNNet(c_in=self.embed_dim, c_out=c_out, num_edges=1, num_layers=hidden_layers, hidden_size=hidden_size, dp_rate=dropout, rgc_layer_fun=RelationGraphAttention ) layers = [] for _ in range(num_flows): layers += [ ActNormFlow(self.embed_dim), InvertibleConv(self.embed_dim), MixtureCDFCoupling( c_in=self.embed_dim, mask=coupling_mask, model_func=model_func, block_type="GraphAttentionNet", num_mixtures=num_mixtures, regularizer_max=3.5, # To ensure a accurate reversibility regularizer_factor=2) ] layers += [ActNormFlow(c_in=self.embed_dim)] return layers
def _load_datasets(self): self.dataset_class = self.model.dataset_class dataset_kwargs = {} if isinstance(self.model, GraphNodeRNN): graph_ordering = get_param_val(self.model_params, "rnn_graph_ordering", default_val="rand") dataset_kwargs["order_graphs"] = graph_ordering self.train_dataset = self.dataset_class(train=True, val=False, test=False, **dataset_kwargs) self.val_dataset = self.dataset_class(train=False, val=True, test=False, **dataset_kwargs) self.test_dataset = self.dataset_class(train=False, val=False, test=True, **dataset_kwargs)
def create_scheduler(scheduler_params, param_name=None): sched_type = get_param_val(scheduler_params, "scheduler_type", allow_default=False) end_val = get_param_val(scheduler_params, "scheduler_end_val", allow_default=False) start_val = get_param_val(scheduler_params, "scheduler_start_val", allow_default=False) stepsize = get_param_val(scheduler_params, "scheduler_step_size", allow_default=False) logit = get_param_val(scheduler_params, "scheduler_logit", allow_default=False) delay = get_param_val(scheduler_params, "scheduler_delay", allow_default=False) if sched_type == "constant": return ConstantScheduler(const_val=end_val, param_name=param_name) elif sched_type == "linear": return LinearScheduler(start_val=start_val, end_val=end_val, stepsize=stepsize, delay=delay, param_name=param_name) elif sched_type == "sigmoid": return SigmoidScheduler(start_val=start_val, end_val=end_val, logit_factor=logit, stepsize=stepsize, delay=delay, param_name=param_name) elif sched_type == "exponential": return ExponentialScheduler(start_val=start_val, end_val=end_val, logit_factor=logit, stepsize=stepsize, delay=delay, param_name=param_name) else: print("[!] ERROR: Unknown scheduler type \"%s\"" % str(sched_type)) sys.exit(1)
def _create_step_flows(self): ## Get hyperparameters from model_params dictionary hidden_size_nodes = get_param_val(self.model_params, "coupling_hidden_size_nodes", default_val=64) hidden_size_edges = get_param_val(self.model_params, "coupling_hidden_size_edges", default_val=16) num_flows = get_param_val(self.model_params, "coupling_num_flows", default_val="4,6,6") num_flows = [int(k) for k in num_flows.split(",")] hidden_layers = get_param_val(self.model_params, "coupling_hidden_layers", default_val=4) if isinstance(hidden_layers, str): if "," in hidden_layers: hidden_layers = [int(l) for l in hidden_layers.split(",")] else: hidden_layers = [int(hidden_layers)] * 3 else: hidden_layers = [hidden_layers] * 3 num_mixtures_nodes = get_param_val(self.model_params, "coupling_num_mixtures_nodes", default_val=16) num_mixtures_edges = get_param_val(self.model_params, "coupling_num_mixtures_edges", default_val=16) mask_ratio = get_param_val(self.model_params, "coupling_mask_ratio", default_val=0.5) dropout = get_param_val(self.model_params, "coupling_dropout", default_val=0.0) #----------------# #- Step 1 flows -# #----------------# coupling_mask_nodes = CouplingLayer.create_channel_mask( self.encoding_dim_nodes, ratio=mask_ratio ) # 1*self.encoding_dim_nodes, where the first half is 1 and the last half is 0. step1_model_func = lambda c_out: RGCNNet( c_in=self.encoding_dim_nodes, c_out=c_out, num_edges=self.num_edge_types, num_layers=hidden_layers[0], hidden_size=hidden_size_nodes, max_neighbours=self.dataset_class.num_max_neighbours(), dp_rate=dropout, rgc_layer_fun=RelationGraphConv) step1_flows = [] for _ in range(num_flows[0]): step1_flows += [ ActNormFlow(self.encoding_dim_nodes), InvertibleConv(self.encoding_dim_nodes), MixtureCDFCoupling( c_in=self.encoding_dim_nodes, mask=coupling_mask_nodes, model_func=step1_model_func, block_type="RelationGraphConv", num_mixtures=num_mixtures_nodes, regularizer_max=3.5, # To ensure a accurate reversibility regularizer_factor=2) ] self.step1_flows = nn.ModuleList(step1_flows) #------------------# #- Step 2+3 flows -# #------------------# coupling_mask_edges = CouplingLayer.create_channel_mask( self.encoding_dim_edges, ratio=mask_ratio) # Definition of the Edge-GNN network def edge2node_layer_func(step_idx): if step_idx == 1: return lambda: Edge2NodeAttnLayer( hidden_size_nodes=hidden_size_nodes, hidden_size_edges=hidden_size_edges, skip_config=2) else: return lambda: Edge2NodeQKVAttnLayer( hidden_size_nodes=hidden_size_nodes, hidden_size_edges=hidden_size_edges, skip_config=2) node2edge_layer_func = lambda: Node2EdgePlainLayer( hidden_size_nodes=hidden_size_nodes, hidden_size_edges=hidden_size_edges, skip_config=2) def edge_gnn_layer_func(step_idx): return lambda: EdgeGNNLayer( edge2node_layer_func=edge2node_layer_func(step_idx), node2edge_layer_func=node2edge_layer_func) def get_model_func(step_idx): return lambda c_out_nodes, c_out_edges: EdgeGNN( c_in_nodes=self.encoding_dim_nodes, c_in_edges=self.encoding_dim_edges, c_out_nodes=c_out_nodes, c_out_edges=c_out_edges, edge_gnn_layer_func=edge_gnn_layer_func(step_idx), max_neighbours=self.dataset_class.num_max_neighbours(), num_layers=hidden_layers[step_idx]) # Activation normalization and invertible 1x1 convolution need to be applied on both nodes and edges independently. # The "NodeEdgeFlowWrapper" handles the forward pass for such flows actnorm_layer = lambda: NodeEdgeFlowWrapper( node_flow=ActNormFlow(c_in=self.encoding_dim_nodes), edge_flow=ActNormFlow(c_in=self.encoding_dim_edges)) permut_layer = lambda: NodeEdgeFlowWrapper( node_flow=InvertibleConv(c_in=self.encoding_dim_nodes), edge_flow=InvertibleConv(c_in=self.encoding_dim_edges)) coupling_layer = lambda step_idx: NodeEdgeCoupling( c_in_nodes=self.encoding_dim_nodes, c_in_edges=self.encoding_dim_edges, mask_nodes=coupling_mask_nodes, mask_edges=coupling_mask_edges, num_mixtures_nodes=num_mixtures_nodes, num_mixtures_edges=num_mixtures_edges, model_func=get_model_func(step_idx), regularizer_max=3.5, # To ensure a accurate reversibility regularizer_factor=2) step2_flows = [] for _ in range(num_flows[1]): step2_flows += [ actnorm_layer(), permut_layer(), coupling_layer( step_idx=1) # the second step forward they used EdgeGNN ] self.step2_flows = nn.ModuleList(step2_flows) step3_flows = [] for _ in range(num_flows[2]): step3_flows += [ actnorm_layer(), permut_layer(), coupling_layer( step_idx=2 ) # the last step forward they used attention network ] self.step3_flows = nn.ModuleList(step3_flows)