コード例 #1
0
    def extract_tree_expression(self, node, index_mark='_'):
        if node == None or node.data == None:
            return self.expression_str
        feature, parentheses, action, wl_scalar, wl_power, parentheses_bias, wl_activation, parentheses_activation, wl_bias, parentheses_power = Individual.get_all_merged_values(
            node.data)

        if parentheses == 1:
            self.expression_str += utils.get_activation(
                parentheses_activation) + '('
        self.expression_str += utils.get_activation(wl_activation)
        self.expression_str += '({}'.format(wl_scalar) + '*'  ## add wl scalar
        self.expression_str += '{}{}{}'.format(index_mark, feature, index_mark)
        self.expression_str += '**{}'.format(wl_power) + '+{}'.format(wl_bias)
        self.expression_str += ')'
        self.expression_str += utils.get_action(action)

        self.expression_str = self.extract_tree_expression(
            node.left, index_mark)
        self.expression_str = self.extract_tree_expression(
            node.right, index_mark)

        if parentheses == 1:
            self.expression_str = self.expression_str[:-1] + '+{})'.format(
                parentheses_bias) + '**{}'.format(
                    parentheses_power) + self.expression_str[
                        -1]  ## put closing parentesis before action
        return self.expression_str
コード例 #2
0
 def __init__(
     self,
     rating_vals,
     user_in_units,
     movie_in_units,
     msg_units,
     out_units,
     dropout_rate=0.0,
     agg="stack",  # or 'sum'
     agg_act=None,
     out_act=None,
     share_user_item_param=False,
 ):
     super(GCMCLayer, self).__init__()
     self.rating_vals = rating_vals
     self.agg = agg
     self.share_user_item_param = share_user_item_param
     if agg == "stack":
         # divide the original msg unit size by number of ratings to keep
         # the dimensionality
         assert msg_units % len(rating_vals) == 0
         msg_units = msg_units // len(rating_vals)
     with self.name_scope():
         self.dropout = nn.Dropout(dropout_rate)
         self.W_r = {}
         for rating in rating_vals:
             rating = str(rating)
             if share_user_item_param and user_in_units == movie_in_units:
                 self.W_r[rating] = self.params.get(
                     "W_r_%s" % rating,
                     shape=(user_in_units, msg_units),
                     dtype=np.float32,
                     allow_deferred_init=True,
                 )
                 self.W_r["rev-%s" % rating] = self.W_r[rating]
             else:
                 self.W_r[rating] = self.params.get(
                     "W_r_%s" % rating,
                     shape=(user_in_units, msg_units),
                     dtype=np.float32,
                     allow_deferred_init=True,
                 )
                 self.W_r["rev-%s" % rating] = self.params.get(
                     "revW_r_%s" % rating,
                     shape=(movie_in_units, msg_units),
                     dtype=np.float32,
                     allow_deferred_init=True,
                 )
         self.ufc = nn.Dense(out_units)
         if share_user_item_param:
             self.ifc = self.ufc
         else:
             self.ifc = nn.Dense(out_units)
         self.agg_act = get_activation(agg_act)
         self.out_act = get_activation(out_act)
コード例 #3
0
    def train(self, batch, labels=None, loss="quadratic", learning_rate=0.01, epochs=1, mini_batch_size=1):
        if labels is not None:
            batch = np.c_[batch, labels]

        amount_of_labels = len(set(batch[:, -1]))
        for epoch in range(epochs):
            print("Epoch: ", epoch, end=", ")
            np.random.shuffle(batch)  # avoids correlated mini batches or memorization of order
            avg_loss_epoch = []  # average loss over all samples in batch for this epoch
            sample_i = 0
            while sample_i < (len(batch) - mini_batch_size):
                mini_batch = batch[sample_i:sample_i + mini_batch_size]
                input_values, labels = mini_batch[:, :-1], mini_batch[:, -1]
                # one-hot-encoding of numerical labels:
                labels = np.eye(amount_of_labels)[labels.astype(int)]
                raw_outputs, activations, activated_outputs = \
                    self.inference(input_values, save_outputs=True)

                ''' Get loss function and its derivatives:
                 ("dx_y" means partial derivative of y to x) '''
                minibatch_loss = get_loss(loss)(activated_outputs[-1], labels)
                avg_loss_epoch.append(minibatch_loss)
                try:
                    da_loss = get_loss(loss, d="da_")(activated_outputs[-1], labels)
                    dz_a = get_activation(activations[-1], d="dz_")(raw_outputs[-1])
                    dz_loss = np.multiply(da_loss, dz_a)  # Hadamard product
                except AttributeError as e:
                    dz_loss = get_loss(loss, d="dz_")(activated_outputs[-1], labels)

                for l in range(1, len(self.weights)):
                    m, n = activated_outputs[-l-1].shape
                    # faster than stacking ones to our activated outputs:
                    activated_outputs_with_ones = np.ones((m, n + 1))
                    activated_outputs_with_ones[:, :-1] = activated_outputs[-l-1]
                    dw_loss = np.matmul(activated_outputs_with_ones.T, dz_loss)
                    self.weights[-l] = self.weights[-l] - learning_rate * dw_loss / len(batch)

                    dz_a = get_activation(activations[-l-1], d="dz_")(raw_outputs[-l-1])
                    dz_loss = np.multiply(
                        np.matmul(dz_loss, self.weights[-l][:-1, :].T),  # removed biases
                        dz_a
                    )

                m, n = activated_outputs[0].shape
                activated_outputs_with_ones = np.ones((m, n + 1))
                activated_outputs_with_ones[:, :-1] = activated_outputs[0]
                dw_loss = np.matmul(activated_outputs_with_ones.T, dz_loss)
                self.weights[0] = self.weights[0] - learning_rate * dw_loss / len(batch)

                sample_i += mini_batch_size

            avg_loss_epoch = np.sum(np.array(avg_loss_epoch)) / np.array(avg_loss_epoch).size
            print("Loss: ", avg_loss_epoch)
コード例 #4
0
    def __init__(self, config: Dict):
        super().__init__(config)

        torch.manual_seed(0)

        default_config = {
            "input_size": 33,
            "num_actions": 4,
            "activation": "relu",

            "hidden_sizes": (64, 64),
        }
        self.config = with_default_config(config, default_config)

        input_size: int = self.config.get("input_size")
        num_actions: int = self.config.get("num_actions")
        hidden_sizes: Tuple[int] = self.config.get("hidden_sizes")
        self.activation: Callable = get_activation(self.config.get("activation"))

        layer_sizes = (input_size,) + hidden_sizes

        self.hidden_layers = nn.ModuleList([
            nn.Linear(in_size, out_size)
            for in_size, out_size in zip(layer_sizes, layer_sizes[1:])
        ])
        self.policy_mu_head = nn.Linear(layer_sizes[-1], num_actions)

        self.v_hidden_layers = nn.ModuleList([
            nn.Linear(in_size, out_size)
            for in_size, out_size in zip(layer_sizes, layer_sizes[1:])
        ])

        self.std = nn.Parameter(torch.ones(1, num_actions))

        self.value_head = nn.Linear(layer_sizes[-1], 1)
コード例 #5
0
    def __init__(self, config: Dict):
        super().__init__(config)

        default_config = {
            "input_shape": (100, 100),
            "num_actions": 5,
            "activation": "relu",
        }
        self.config = with_default_config(config, default_config)
        self.activation = get_activation(self.config["activation"])

        input_shape: Tuple[int, int] = self.config["input_shape"]

        self.conv_layers = nn.ModuleList([
            nn.Conv2d(4, 32, kernel_size=8, stride=4),  # 24x24x32
            nn.Conv2d(32, 64, kernel_size=7, stride=3),  # 6x6x64
            nn.Conv2d(64, 64, kernel_size=3, stride=1)
        ])  # 4x4x64

        _coords_i = torch.linspace(-1, 1, input_shape[0]).view(-1, 1).repeat(
            1, input_shape[1])
        _coords_j = torch.linspace(-1, 1, input_shape[1]).view(1, -1).repeat(
            input_shape[0], 1)
        self.coords = torch.stack([_coords_i, _coords_j])

        # flatten

        self.policy_head = nn.Linear(4 * 4 * 64, self.config["num_actions"])
        self.value_head = nn.Linear(4 * 4 * 64, 1)
コード例 #6
0
def DNN_regressor(params, model_dir, feature_columns, config):
    '''Returns DNN estimator object'''
    hidden_units = params['layers'] * [params['units']]
    weight_column_name = utils.get_param(params, 'weight_column_name')
    optimizer = utils.get_optimizer(utils.get_param(params, 'optimizer'),
                                    params['learning_rate'])
    activation_fn = utils.get_activation(
        utils.get_param(params, 'activation_fn'))
    dropout = float(utils.get_param(params, 'dropout'))
    gradient_clip_norm = utils.get_param(params, 'gradient_clip_norm')
    enable_centered_bias = False  # keep false
    feature_engineering_fn = utils.get_param(params, 'feature_engineering_fn')
    embedding_lr_multipliers = utils.get_param(params,
                                               'embedding_lr_multipliers')
    input_layer_min_slice_size = utils.get_param(params,
                                                 'input_layer_min_slice_size')
    label_keys = utils.get_param(params, 'label_keys')

    return tf.contrib.learn.DNNRegressor(
        hidden_units=hidden_units,
        feature_columns=feature_columns,
        model_dir=model_dir,
        weight_column_name=weight_column_name,
        optimizer=optimizer,
        activation_fn=activation_fn,
        dropout=dropout,
        gradient_clip_norm=gradient_clip_norm,
        enable_centered_bias=enable_centered_bias,
        config=config,
        feature_engineering_fn=feature_engineering_fn,
        embedding_lr_multipliers=embedding_lr_multipliers,
        input_layer_min_slice_size=input_layer_min_slice_size)
コード例 #7
0
    def __init__(self, config: Dict):
        super().__init__(config)

        default_config = {
            "input_size": 15,
            "num_actions": 5,
            "hidden_sizes": (64, 64),
            "activation": "leaky_relu",
        }
        self.config = with_default_config(config, default_config)

        input_size: int = self.config.get("input_size")
        num_actions: int = self.config.get("num_actions")
        hidden_sizes: Tuple[int] = self.config.get("hidden_sizes")
        self.activation: Callable = get_activation(
            self.config.get("activation"))

        layer_sizes = (input_size, ) + hidden_sizes

        self.hidden_layers = nn.ModuleList([
            nn.Linear(in_size, out_size)
            for in_size, out_size in zip(layer_sizes, layer_sizes[1:])
        ])

        self.policy_head = nn.Linear(layer_sizes[-1], num_actions)
        self.value_head = nn.Linear(layer_sizes[-1], 1)
コード例 #8
0
ファイル: layers.py プロジェクト: RedTachyon/rl-pong
    def __init__(self, config: Dict[str, Any]):
        super().__init__()

        default_config = {
            "num_subgoals": 2,
            "emb_size": 4,
            "rel_hiddens": (16, 16, ),
            "mlp_hiddens": (16, ),
            "activation": "leaky_relu"
        }

        self.config = with_default_config(config, default_config)

        self.activation: Callable[[Tensor], Tensor] = get_activation(self.config["activation"])

        self.own_embedding = nn.Parameter(torch.randn(self.config["emb_size"])/10., requires_grad=True)
        self.agent_embedding = nn.Parameter(torch.randn(self.config["emb_size"])/10., requires_grad=True)
        self.subgoal_embedding = nn.Parameter(torch.randn(self.config["emb_size"])/10., requires_grad=True)
        self.goal_embedding = nn.Parameter(torch.randn(self.config["emb_size"])/10., requires_grad=True)

        rel_sizes = (2 * (self.config["emb_size"] + 3), ) + self.config["rel_hiddens"]
        mlp_sizes = (self.config["rel_hiddens"][-1], ) + self.config["mlp_hiddens"]

        self.relation_layers = nn.ModuleList([
            nn.Linear(in_size, out_size)
            for in_size, out_size in zip(rel_sizes, rel_sizes[1:])
        ])

        self.mlp_layers = nn.ModuleList([
            nn.Linear(in_size, out_size)
            for in_size, out_size in zip(mlp_sizes, mlp_sizes[1:])
        ])
コード例 #9
0
    def __init__(self, config: Dict):
        super().__init__(config)

        default_config = {
            "input_shape": (100, 100),
            "num_actions": 5,
            "activation": "relu",
        }

        self.config = with_default_config(config, default_config)

        input_shape: Tuple[int, int] = self.config["input_shape"]
        input_size: int = self.config.get("input_size")
        num_actions: int = self.config.get("num_actions")
        hidden_sizes: Tuple[int] = self.config.get("hidden_sizes")

        self.activation: Callable = get_activation(
            self.config.get("activation"))

        self.conv = nn.Conv2d(3, 3, kernel_size=3, padding=1)

        layer_sizes = (input_size, ) + hidden_sizes

        self.hidden_layers = nn.ModuleList([
            nn.Linear(in_size, out_size)
            for in_size, out_size in zip(layer_sizes, layer_sizes[1:])
        ])

        self.policy_head = nn.Linear(layer_sizes[-1], num_actions)
        self.value_head = nn.Linear(layer_sizes[-1], 1)
コード例 #10
0
    def _propagate_one_layer(self, input_values, weights, activation="identity"):
        m, n = input_values.shape
        input_with_ones = np.ones((m, n + 1))
        input_with_ones[:, :-1] = input_values  # faster than stacking
        raw_output = np.matmul(input_with_ones, weights)

        activated_output = get_activation(activation)(raw_output)
        return raw_output, activated_output
コード例 #11
0
    def __init__(self, args):
        super(Net, self).__init__()
        self._act = get_activation(args.model_activation)
        self.encoder = nn.ModuleList()
        self.encoder.append(
            GCMCLayer(args.rating_vals,
                      args.src_in_units,
                      args.dst_in_units,
                      args.gcn_agg_units,
                      args.gcn_out_units,
                      args.gcn_dropout,
                      args.gcn_agg_accum,
                      agg_act=self._act,
                      share_user_item_param=args.share_param,
                      device=args.device))
        self.gcn_agg_accum = args.gcn_agg_accum
        self.rating_vals = args.rating_vals
        self.device = args.device
        self.gcn_agg_units = args.gcn_agg_units
        self.src_in_units = args.src_in_units
        for i in range(1, args.layers):
            if args.gcn_agg_accum == 'stack':
                gcn_out_units = args.gcn_out_units * len(args.rating_vals)
            else:
                gcn_out_units = args.gcn_out_units
            self.encoder.append(
                GCMCLayer(args.rating_vals,
                          args.gcn_out_units,
                          args.gcn_out_units,
                          gcn_out_units,
                          args.gcn_out_units,
                          args.gcn_dropout - i * 0.1,
                          args.gcn_agg_accum,
                          agg_act=self._act,
                          share_user_item_param=args.share_param,
                          ini=False,
                          device=args.device))

        if args.decoder == "Bi":
            self.decoder = BiDecoder(
                in_units=args.gcn_out_units,  #* args.layers,
                num_classes=len(args.rating_vals),
                num_basis=args.gen_r_num_basis_func)
            '''
            self.decoder2 = MLPDecoder(in_units= args.gcn_out_units * 2,
                                     num_classes=len(args.rating_vals),
                                     num_basis=args.gen_r_num_basis_func)
            '''
        elif args.decoder == "MLP":
            if args.loss_func == "CE":
                num_classes = len(args.rating_vals)
            else:
                num_classes = 1
            self.decoder = MLPDecoder(in_units=args.gcn_out_units *
                                      args.layers,
                                      num_classes=num_classes,
                                      num_basis=args.gen_r_num_basis_func)
        self.rating_vals = args.rating_vals
コード例 #12
0
    def __init__(self, args, dev_id):
        super(Net, self).__init__()
        self._act = get_activation(args.model_activation)
        self.encoder = nn.ModuleList()
        self.encoder.append(
            GCMCLayer(args.rating_vals,
                      args.src_in_units,
                      args.dst_in_units,
                      args.gcn_agg_units,
                      args.gcn_out_units,
                      args.gcn_dropout,
                      args.gcn_agg_accum,
                      agg_act=self._act,
                      share_user_item_param=args.share_param,
                      device=dev_id))

        self.rating_vals = args.rating_vals

        self.gcn_agg_accum = args.gcn_agg_accum
        self.rating_vals = args.rating_vals
        self.device = dev_id
        self.gcn_agg_units = args.gcn_agg_units
        self.src_in_units = args.src_in_units
        self.batch_size = args.minibatch_size
        for i in range(1, args.layers):
            if args.gcn_agg_accum == 'stack':
                gcn_out_units = args.gcn_out_units * len(args.rating_vals)
            else:
                gcn_out_units = args.gcn_out_units
            self.encoder.append(
                GCMCLayer(args.rating_vals,
                          args.gcn_out_units,
                          args.gcn_out_units,
                          gcn_out_units,
                          args.gcn_out_units,
                          args.gcn_dropout - i * 0.1,
                          args.gcn_agg_accum,
                          agg_act=self._act,
                          share_user_item_param=args.share_param,
                          ini=False,
                          device=dev_id))

        if args.mix_cpu_gpu and args.use_one_hot_fea:
            # if use_one_hot_fea, user and movie feature is None
            # W can be extremely large, with mix_cpu_gpu W should be stored in CPU
            self.encoder.partial_to(dev_id)
        else:
            self.encoder.to(dev_id)

        self.decoder = BiDecoder(in_units=args.gcn_out_units,
                                 num_classes=len(args.rating_vals),
                                 num_basis=args.gen_r_num_basis_func)
        self.decoder.to(dev_id)
コード例 #13
0
ファイル: model.py プロジェクト: hacors/Drug
 def __init__(self,
              rating_vals,
              user_in_units,
              movie_in_units,
              msg_units,
              out_units,
              dropout_rate=0.0,
              agg='stack',  # or 'sum'
              agg_act=None,
              out_act=None,
              share_user_item_param=False):
     super(GCMCLayer, self).__init__()
     self.rating_vals = rating_vals
     self.agg = agg
     self.share_user_item_param = share_user_item_param
     self.ufc = nn.Linear(msg_units, out_units)
     if share_user_item_param:
         self.ifc = self.ufc
     else:
         self.ifc = nn.Linear(msg_units, out_units)
     if agg == 'stack':
         # divide the original msg unit size by number of ratings to keep
         # the dimensionality
         assert msg_units % len(rating_vals) == 0
         msg_units = msg_units // len(rating_vals)
     self.dropout = nn.Dropout(dropout_rate)
     self.W_r = nn.ParameterDict()
     for rating in rating_vals:
         # PyTorch parameter name can't contain "."
         rating = str(rating).replace('.', '_')
         if share_user_item_param and user_in_units == movie_in_units:
             self.W_r[rating] = nn.Parameter(th.randn(user_in_units, msg_units))
             self.W_r['rev-%s' % rating] = self.W_r[rating]
         else:
             self.W_r[rating] = nn.Parameter(th.randn(user_in_units, msg_units))
             self.W_r['rev-%s' % rating] = nn.Parameter(th.randn(movie_in_units, msg_units))
     self.agg_act = get_activation(agg_act)
     self.out_act = get_activation(out_act)
     self.reset_parameters()
コード例 #14
0
ファイル: model.py プロジェクト: jennyzhang0215/dgl
 def __init__(
     self,
     src_key,
     dst_key,
     src_in_units,
     dst_in_units,
     agg_units,
     out_units,
     num_links,
     dropout_rate=0.0,
     agg_accum='stack',
     agg_act=None,
     out_act=None,
     # agg_ordinal_sharing=False, share_agg_weights=False, share_out_fc_weights=False,
     **kwargs):
     super(GCMCLayer, self).__init__(**kwargs)
     self._out_act = get_activation(out_act)
     self._src_key = src_key
     self._dst_key = dst_key
     with self.name_scope():
         self.dropout = nn.Dropout(dropout_rate)
         self.aggregator = MultiLinkGCNAggregator(src_key=src_key,
                                                  dst_key=dst_key,
                                                  units=agg_units,
                                                  src_in_units=src_in_units,
                                                  dst_in_units=dst_in_units,
                                                  num_links=num_links,
                                                  dropout_rate=dropout_rate,
                                                  accum=agg_accum,
                                                  act=agg_act,
                                                  prefix='agg_')
         self.user_out_fcs = nn.Dense(out_units,
                                      flatten=False,
                                      prefix='user_out_')
         self.item_out_fcs = nn.Dense(out_units,
                                      flatten=False,
                                      prefix='item_out_')
         self._out_act = get_activation(out_act)
コード例 #15
0
ファイル: train.py プロジェクト: hacors/Drug
 def __init__(self, args):
     super(Net, self).__init__()
     self._act = get_activation(args.model_activation)
     self.encoder = GCMCLayer(args.rating_vals,
                              args.src_in_units,
                              args.dst_in_units,
                              args.gcn_agg_units,
                              args.gcn_out_units,
                              args.gcn_dropout,
                              args.gcn_agg_accum,
                              agg_act=self._act,
                              share_user_item_param=args.share_param)
     self.decoder = BiDecoder(args.rating_vals,
                              in_units=args.gcn_out_units,
                              num_basis_functions=args.gen_r_num_basis_func)
コード例 #16
0
    def __build_graph_propagation_model(self) -> tf.Tensor:
        h_dim = self.params['hidden_size']
        activation_fn = get_activation(self.params['graph_model_activation_function']) # tanh
        # if the initial node feature size does not match the hidden size, we create a densely connected layer
        #  to project the features to the correct size (h_dim). This densely connected layer has
        #  h_dim nodes, and uses the activation function specified above (tanh).
        if self.task.initial_node_feature_size != self.params['hidden_size']: # projects features to the specified hidden size
            self.__ops['projected_node_features'] = \
                tf.keras.layers.Dense(units=h_dim,
                                      use_bias=False,
                                      activation=activation_fn,
                                      )(self.__ops['initial_node_features'])
        else:
            self.__ops['projected_node_features'] = self.__ops['initial_node_features']

        cur_node_representations = self.__ops['projected_node_features']
        last_residual_representations = tf.zeros_like(cur_node_representations)
        for layer_idx in range(self.params['graph_num_layers']):
            with tf.variable_scope('gnn_layer_%i' % layer_idx):
                # with some probability, set neurons to zero in current node representations
                cur_node_representations = \
                    tf.nn.dropout(cur_node_representations, rate=1.0 - self.__placeholders['graph_layer_input_dropout_keep_prob'])
                # every 10000 layers, we add the previously saved node representation.
                # this helps address vanishing or exploding gradients
                if layer_idx % self.params['graph_residual_connection_every_num_layers'] == 0:
                    t = cur_node_representations
                    if layer_idx > 0:
                        cur_node_representations += last_residual_representations
                        cur_node_representations /= 2
                    last_residual_representations = t
                # finally, we construct the gnn layer.
                cur_node_representations = \
                    self._apply_gnn_layer(
                        cur_node_representations,
                        self.__ops['adjacency_lists'],
                        self.__ops['type_to_num_incoming_edges'],
                        self.params['graph_num_timesteps_per_layer'])
                if self.params['graph_inter_layer_norm']:
                    cur_node_representations = tf.contrib.layers.layer_norm(cur_node_representations)
                if layer_idx % self.params['graph_dense_between_every_num_gnn_layers'] == 0:
                    cur_node_representations = \
                        tf.keras.layers.Dense(units=h_dim,
                                              use_bias=False,
                                              activation=activation_fn,
                                              name="Dense",
                                              )(cur_node_representations)

        self.__ops['final_node_representations'] = cur_node_representations
コード例 #17
0
ファイル: train.py プロジェクト: ghk829/dgl
    def __init__(self, args):
        super(Net, self).__init__()
        self._act = get_activation(args.model_activation)
        self.encoder = GraphSageLayer(args.rating_vals,
                                 args.src_in_units,
                                 args.dst_in_units,
                                 args.gcn_agg_units,
                                 args.gcn_out_units,
                                 args.gcn_dropout,
                                 args.gcn_agg_accum,
                                 agg_act=self._act,
                                 share_user_item_param=args.share_param,
                                 device=args.device)

        self.decoder = DotProduct(args.gcn_agg_units,
                                 args.gcn_out_units)
コード例 #18
0
    def __build_graph_propagation_model(self) -> tf.Tensor:
        h_dim = self.params['hidden_size']
        activation_fn = get_activation(
            self.params['graph_model_activation_function'])
        if self.task.initial_node_feature_size != self.params['hidden_size']:
            self.__ops['projected_node_features'] = \
                tf.keras.layers.Dense(units=h_dim,
                                      use_bias=False,
                                      activation=activation_fn,
                                      )(self.__ops['initial_node_features'])
        else:
            self.__ops['projected_node_features'] = self.__ops[
                'initial_node_features']

        cur_node_representations = self.__ops['projected_node_features']
        last_residual_representations = tf.zeros_like(cur_node_representations)
        for layer_idx in range(self.params['graph_num_layers']):
            with tf.variable_scope('gnn_layer_%i' % layer_idx):
                cur_node_representations = \
                    tf.nn.dropout(cur_node_representations, rate=1.0 - self.__placeholders['graph_layer_input_dropout_keep_prob'])
                if layer_idx % self.params[
                        'graph_residual_connection_every_num_layers'] == 0:
                    t = cur_node_representations
                    if layer_idx > 0:
                        cur_node_representations += last_residual_representations
                        cur_node_representations /= 2
                    last_residual_representations = t
                cur_node_representations = \
                    self._apply_gnn_layer(
                        cur_node_representations,
                        self.__ops['adjacency_lists'],
                        self.__ops['type_to_num_incoming_edges'],
                        self.params['graph_num_timesteps_per_layer'])
                if self.params['graph_inter_layer_norm']:
                    cur_node_representations = tf.contrib.layers.layer_norm(
                        cur_node_representations)
                if layer_idx % self.params[
                        'graph_dense_between_every_num_gnn_layers'] == 0:
                    cur_node_representations = \
                        tf.keras.layers.Dense(units=h_dim,
                                              use_bias=False,
                                              activation=activation_fn,
                                              name="Dense",
                                              )(cur_node_representations)

        self.__ops['final_node_representations'] = cur_node_representations
コード例 #19
0
def get_masked_lm_output(bert_config, input_tensor, output_weights, positions,
                         label_ids, label_weights, prev_bplayers=None):
    """Get loss and log probs for the masked LM."""
    input_tensor = gather_indexes(input_tensor, positions)

    with tf.variable_scope("cls/predictions") as prediction_scope:
        # We apply one more non-linear transformation before the output layer.
        # This matrix is not used after pre-training.
        with tf.variable_scope("transform"):
            input_tensor = tf.layers.dense(
                input_tensor,
                units=bert_config.hidden_size,
                activation=utils.get_activation(bert_config.hidden_act),
                kernel_initializer=utils.create_initializer(
                    bert_config.initializer_range))
            input_tensor = utils.layer_norm(input_tensor)

        # The output weights are the same as the input embeddings, but there is
        # an output-only bias for each token.
        output_bias = tf.get_variable(
            "output_bias",
            shape=[bert_config.vocab_size],
            initializer=tf.zeros_initializer())
        logits = tf.matmul(input_tensor, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        label_ids = tf.reshape(label_ids, [-1])
        label_weights = tf.reshape(label_weights, [-1])

        one_hot_labels = tf.one_hot(
            label_ids, depth=bert_config.vocab_size, dtype=tf.float32)

        # The `positions` tensor might be zero-padded (if the sequence is too
        # short to have the maximum number of predictions). The `label_weights`
        # tensor has a value of 1.0 for every real prediction and 0.0 for the
        # padding predictions.
        per_example_loss = -tf.reduce_sum(log_probs * one_hot_labels, axis=[-1])
        numerator = tf.reduce_sum(label_weights * per_example_loss)
        denominator = tf.reduce_sum(label_weights) + 1e-5
        loss = numerator / denominator
        loss_bplayer = BPLayer(loss, prediction_scope, backward_layers=prev_bplayers)

    return (loss, per_example_loss, log_probs, loss_bplayer)
コード例 #20
0
    def __init__(self, n_in, n_out, n_layers, layer_factor, 
            act_fn='relu', dropout_p=0.0):
        super(MLP, self).__init__()
        n_h = int(n_in * layer_factor)
        act_fn_class = utils.get_activation(act_fn)
        # build the net
        net = []
        for i in range(n_layers):
            if i == 0:
                net.append(torch.nn.Linear(n_in, n_h))
            else:
                net.append(torch.nn.Linear(n_h, n_h))
            net.append(act_fn_class())
            net.append(torch.nn.BatchNorm1d(n_h))
            if dropout_p > 0:
                net.append(torch.nn.Dropout(p=dropout_p))

        net.append(torch.nn.Linear(n_h, n_out))
        # convert to sequential
        self.net = torch.nn.Sequential(*net)
コード例 #21
0
    def __init__(self,
                 layer_sizes,
                 feature_extractor_needed=False,
                 use_dropout=False,
                 activation='relu',
                 dropoutv=0.5,
                 reshape_dims=None,
                 seed=0,
                 session_config=None,
                 it=None,
                 c=1.0,
                 xi=0.1,
                 lr=0.001):

        assert (len(layer_sizes) == 4)
        assert (session_config != None)
        assert (it != None)
        self.layer_sizes = layer_sizes
        self.feature_extractor_needed = feature_extractor_needed
        self.use_dropout = use_dropout
        self.dropoutv = dropoutv
        self.reshape_dims = reshape_dims
        self.seed = seed
        self.session_config = session_config
        self.it = it

        self.use_dropout = use_dropout
        self.activation = utils.get_activation(activation)

        print("Using feature extractor: %s" % self.feature_extractor_needed)
        print("Using dropout, bn: %s, %f" % (self.use_dropout, self.dropoutv))

        self.phs = {}
        self.vars = {}
        self.objs = {}
        self.all_predictions = []

        self.c = c
        self.xi = xi
        self.lr = float(lr)
コード例 #22
0
    def __init__(self, config: Dict):
        super().__init__(config)

        default_config = {
            "input_shape": (100, 100),
            "num_actions": 3,
            "activation": "relu",
            "field_threshold": 6,
            "hidden_sizes": (64, 64),
        }

        self.config = with_default_config(config, default_config)
        self.activation = get_activation(self.config["activation"])
        self.field_threshold = self.config["field_threshold"]

        hidden_sizes: Tuple[int] = self.config.get("hidden_sizes")
        input_shape: Tuple[int, int] = self.config["input_shape"]

        _coords_i = torch.linspace(-1, 1, input_shape[0]).view(-1, 1).repeat(
            1, input_shape[1])
        _coords_j = torch.linspace(-1, 1, input_shape[1]).view(1, -1).repeat(
            input_shape[0], 1)
        self.coords = torch.stack([_coords_i, _coords_j])

        self.bilinear = nn.Bilinear(2, 2, 4)
        self.pool1 = nn.AvgPool2d((100, self.field_threshold))
        self.pool2 = nn.AvgPool2d((100, 100 - 2 * self.field_threshold))
        self.pool3 = nn.AvgPool2d((100, self.field_threshold))

        # concat + flatten to [B, 3*4]
        layer_sizes = (12, ) + hidden_sizes

        self.hidden_layers = nn.ModuleList([
            nn.Linear(in_size, out_size)
            for in_size, out_size in zip(layer_sizes, layer_sizes[1:])
        ])

        self.policy_head = nn.Linear(layer_sizes[-1],
                                     self.config["num_actions"])
        self.value_head = nn.Linear(layer_sizes[-1], 1)
コード例 #23
0
    def __init__(self,
                 layer_sizes,
                 feature_extractor_needed=False,
                 use_dropout=False,
                 activation='relu',
                 dropoutv=0.5,
                 reshape_dims=None,
                 seed=0,
                 session_config=None,
                 it=None,
                 embedding=False):

        assert (len(layer_sizes) == 4)
        assert (session_config != None)
        assert (it != None)
        self.layer_sizes = layer_sizes
        self.feature_extractor_needed = feature_extractor_needed
        self.use_dropout = use_dropout
        self.dropoutv = dropoutv
        self.reshape_dims = reshape_dims
        self.seed = seed
        self.session_config = session_config
        self.it = it
        self.embedding = embedding  # if true, then expect x data to be
        # embeddings

        if self.use_dropout:
            self.glob_training_ph = tf.placeholder_with_default(False,
                                                                shape=())
            self.training_ph = tf.placeholder_with_default(False, shape=())

        self.activation = utils.get_activation(activation)

        print("Using feature extractor: %s" % self.feature_extractor_needed)
        print("Using dropout, bn: %s, %f" % (self.use_dropout, self.dropoutv))

        self.phs = {}
        self.vars = {}
        self.objs = {}
        self.all_predictions = []
コード例 #24
0
ファイル: train.py プロジェクト: jennyzhang0215/dgl
 def __init__(self, args, **kwargs):
     super(Net, self).__init__(**kwargs)
     self._act = get_activation(args.model_activation)
     with self.name_scope():
         self.encoder = GCMCLayer(src_key=args.src_key,
                                  dst_key=args.dst_key,
                                  src_in_units=args.src_in_units,
                                  dst_in_units=args.dst_in_units,
                                  agg_units=args.gcn_agg_units,
                                  out_units=args.gcn_out_units,
                                  num_links=args.nratings,
                                  dropout_rate=args.gcn_dropout,
                                  agg_accum=args.gcn_agg_accum,
                                  agg_act=args.model_activation,
                                  prefix='enc_')
         if args.gen_r_use_classification:
             self.gen_ratings = BiDecoder(
                 in_units=args.gcn_out_units,
                 out_units=args.nratings,
                 num_basis_functions=args.gen_r_num_basis_func,
                 prefix='gen_rating')
         else:
             self.gen_ratings = InnerProductLayer(prefix='gen_rating')
コード例 #25
0
ファイル: model.py プロジェクト: jennyzhang0215/dgl
    def __init__(self,
                 src_key,
                 dst_key,
                 units,
                 src_in_units,
                 dst_in_units,
                 num_links,
                 dropout_rate=0.0,
                 accum='stack',
                 act=None,
                 **kwargs):
        super(MultiLinkGCNAggregator, self).__init__(**kwargs)
        self._src_key = src_key
        self._dst_key = dst_key
        self._accum = accum
        self._num_links = num_links
        self._units = units
        if accum == "stack":
            assert units % num_links == 0, 'units should be divisible by the num_links '
            self._units = self._units // num_links

        with self.name_scope():
            self.dropout = nn.Dropout(
                dropout_rate)  ### dropout before feeding the out layer
            self.act = get_activation(act)
            self.src_dst_weights = self.params.get('src_dst_weight',
                                                   shape=(num_links,
                                                          self._units,
                                                          src_in_units),
                                                   dtype=np.float32,
                                                   allow_deferred_init=True)
            self.dst_src_weights = self.params.get('dst_dst_weight',
                                                   shape=(num_links,
                                                          self._units,
                                                          dst_in_units),
                                                   dtype=np.float32,
                                                   allow_deferred_init=True)
コード例 #26
0
    def __init__(self, args, dev_id):
        super(Net, self).__init__()
        self._act = get_activation(args.model_activation)
        self.encoder = GCMCLayer(args.rating_vals,
                                 args.src_in_units,
                                 args.dst_in_units,
                                 args.gcn_agg_units,
                                 args.gcn_out_units,
                                 args.gcn_dropout,
                                 args.gcn_agg_accum,
                                 agg_act=self._act,
                                 share_user_item_param=args.share_param,
                                 device=dev_id)
        if args.mix_cpu_gpu and args.use_one_hot_fea:
            # if use_one_hot_fea, user and movie feature is None
            # W can be extremely large, with mix_cpu_gpu W should be stored in CPU
            self.encoder.partial_to(dev_id)
        else:
            self.encoder.to(dev_id)

        self.decoder = BiDecoder(in_units=args.gcn_out_units,
                                 num_classes=len(args.rating_vals),
                                 num_basis=args.gen_r_num_basis_func)
        self.decoder.to(dev_id)
コード例 #27
0
def sparse_rgcn_layer(
    node_embeddings: tf.Tensor,
    adjacency_lists: List[tf.Tensor],
    type_to_num_incoming_edges: tf.Tensor,
    state_dim: Optional[int],
    num_timesteps: int = 1,
    activation_function: Optional[str] = "tanh",
    message_aggregation_function: str = "sum",
    normalize_by_num_incoming: bool = True,
    use_both_source_and_target: bool = False,
) -> tf.Tensor:
    """
    Compute new graph states by neural message passing.
    This implements the R-GCN model (Schlichtkrull et al., https://arxiv.org/pdf/1703.06103.pdf)
    for the case of few relations / edge types, i.e., we do not use the dimensionality-reduction
    tricks from section 2.2 of that paper.
    For this, we assume existing node states h^t_v and a list of per-edge-type adjacency
    matrices A_\ell.

    We compute new states as follows:
        h^{t+1}_v := \sigma(\sum_\ell
                            \sum_{(u, v) \in A_\ell}
                               1/c_{v,\ell} * (W_\ell * h^t_u))
    c_{\v,\ell} is usually the number of \ell edges going into v.
    The learnable parameters of this are the W_\ell \in R^{D,D}.

    We use the following abbreviations in shape descriptions:
    * V: number of nodes
    * D: state dimension
    * L: number of different edge types
    * E: number of edges of a given edge type

    Arguments:
        node_embeddings: float32 tensor of shape [V, D], the original representation of
            each node in the graph.
        adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape
            [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge
            of type l connects node v to node u.
        type_to_num_incoming_edges: float32 tensor of shape [L, V] representing the number
            of incoming edges of a given type. Concretely, type_to_num_incoming_edges[l, v]
            is the number of edge of type l connecting to node v.
        state_dim: Optional size of output dimension of the GNN layer. If not set, defaults
            to D, the dimensionality of the input. If different from the input dimension,
            parameter num_timesteps has to be 1.
        num_timesteps: Number of repeated applications of this message passing layer.
        activation_function: Type of activation function used.
        message_aggregation_function: Type of aggregation function used for messages.
        normalize_by_num_incoming: Flag indicating if messages should be scaled by 1/(number
            of incoming edges).

    Returns:
        float32 tensor of shape [V, state_dim]
    """
    num_nodes = tf.shape(input=node_embeddings, out_type=tf.int32)[0]
    if state_dim is None:
        state_dim = tf.shape(input=node_embeddings, out_type=tf.int32)[1]

    # === Prepare things we need across all timesteps:
    activation_fn = get_activation(activation_function)
    message_aggregation_fn = get_aggregation_function(
        message_aggregation_function)
    edge_type_to_message_transformation_layers = [
    ]  # Layers to compute the message from a source state
    edge_type_to_message_targets = []  # List of tensors of message targets
    for edge_type_idx, adjacency_list_for_edge_type in enumerate(
            adjacency_lists):
        edge_type_to_message_transformation_layers.append(
            tf.keras.layers.Dense(units=state_dim,
                                  use_bias=False,
                                  activation=None,
                                  name="Edge_%i_Weight" % edge_type_idx))
        edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1])

    # Let M be the number of messages (sum of all E):
    message_targets = tf.concat(edge_type_to_message_targets,
                                axis=0)  # Shape [M]

    cur_node_states = node_embeddings
    for _ in range(num_timesteps):
        messages_per_type = []  # list of tensors of messages of shape [E, H]
        # Collect incoming messages per edge type
        for edge_type_idx, adjacency_list_for_edge_type in enumerate(
                adjacency_lists):
            edge_sources = adjacency_list_for_edge_type[:, 0]
            edge_targets = adjacency_list_for_edge_type[:, 1]
            edge_source_states = \
                tf.nn.embedding_lookup(params=cur_node_states,
                                       ids=edge_sources)  # Shape [E, H]

            if use_both_source_and_target:
                edge_target_states = \
                    tf.nn.embedding_lookup(params=cur_node_states,
                                           ids=edge_targets)  # Shape [E, H]
                edge_state_pairs = tf.concat(
                    [edge_source_states, edge_target_states],
                    axis=-1)  # Shape [E, 2H]
                messages = edge_type_to_message_transformation_layers[
                    edge_type_idx](edge_state_pairs)  # Shape [E, H]
            else:
                messages = edge_type_to_message_transformation_layers[
                    edge_type_idx](edge_source_states)  # Shape [E, H]

            if normalize_by_num_incoming:
                num_incoming_to_node_per_message = \
                    tf.nn.embedding_lookup(params=type_to_num_incoming_edges[edge_type_idx, :],
                                           ids=edge_targets)  # Shape [E, H]
                messages = tf.expand_dims(
                    1.0 / (num_incoming_to_node_per_message + SMALL_NUMBER),
                    axis=-1) * messages

            messages_per_type.append(messages)

        cur_messages = tf.concat(messages_per_type, axis=0)  # Shape [M, H]
        aggregated_messages = \
            message_aggregation_fn(data=cur_messages,
                                   segment_ids=message_targets,
                                   num_segments=num_nodes)  # Shape [V, H]

        new_node_states = activation_fn(aggregated_messages)  # Shape [V, H]
        cur_node_states = new_node_states

    return cur_node_states
コード例 #28
0
def sparse_rgat_layer(node_embeddings: tf.Tensor,
                      adjacency_lists: List[tf.Tensor],
                      state_dim: Optional[int],
                      num_heads: int = 4,
                      num_timesteps: int = 1,
                      activation_function: Optional[str] = "tanh"
                      ) -> tf.Tensor:
    """
    Compute new graph states by neural message passing using attention. This generalises
    the original GAT model (Velickovic et al., https://arxiv.org/pdf/1710.10903.pdf)
    to multiple edge types by using different weights for different edge types.
    For this, we assume existing node states h^t_v and a list of per-edge-type adjacency
    matrices A_\ell.

    In the setting for a single attention head, we compute new states as follows:
        h^t_{v, \ell} := W_\ell h^t_v
        e_{u, \ell, v} := LeakyReLU(\alpha_\ell^T * concat(h^t_{u, \ell}, h^t_{v, \ell}))
        a_v := softmax_{\ell, u with (u, v) \in A_\ell}(e_{u, \ell, v})
        h^{t+1}_v := \sigma(\sum_{ell, (u, v) \in A_\ell}
                                a_v_{u, \ell} * h^_{u, \ell})
    The learnable parameters of this are the W_\ell \in R^{D, D} and \alpha_\ell \in R^{2*D}.

    In practice, we use K attention heads, computing separate, partial new states h^{t+1}_{v,k}
    and compute h^{t+1}_v as the concatentation of the partial states.
    For this, we reduce the shape of W_\ell to R^{D, D/K} and \alpha_\ell to R^{2*D/K}.

    We use the following abbreviations in shape descriptions:
    * V: number of nodes
    * D: state dimension
    * K: number of attention heads
    * L: number of different edge types
    * E: number of edges of a given edge type

    Arguments:
        node_embeddings: float32 tensor of shape [V, D], the original representation of
            each node in the graph.
        adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape
            [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge
            of type l connects node v to node u.
        state_dim: Optional size of output dimension of the GNN layer. If not set, defaults
            to D, the dimensionality of the input. If different from the input dimension,
            parameter num_timesteps has to be 1.
        num_heads: Number of attention heads to use.
        num_timesteps: Number of repeated applications of this message passing layer.
        activation_function: Type of activation function used.

    Returns:
        float32 tensor of shape [V, state_dim]
    """
    num_nodes = tf.shape(input=node_embeddings, out_type=tf.int32)[0]
    if state_dim is None:
        state_dim = tf.shape(input=node_embeddings, out_type=tf.int32)[1]
    per_head_dim = state_dim // num_heads

    # === Prepare things we need across all timesteps:
    activation_fn = get_activation(activation_function)
    edge_type_to_state_transformation_layers = []  # Layers to compute the message from a source state
    edge_type_to_attention_parameters = []  # Parameters for the attention mechanism
    edge_type_to_message_targets = []  # List of tensors of message targets
    for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists):
        edge_type_to_state_transformation_layers.append(
            tf.keras.layers.Dense(units=state_dim,
                                  use_bias=False,
                                  activation=None,
                                  name="Edge_%i_Weight" % edge_type_idx))
        edge_type_to_attention_parameters.append(
            tf.compat.v1.get_variable(shape=(2 * state_dim),
                            name="Edge_%i_Attention_Parameters" % edge_type_idx))
        edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1])

    # Let M be the number of messages (sum of all E):
    message_targets = tf.concat(edge_type_to_message_targets, axis=0)  # Shape [M]

    cur_node_states = node_embeddings
    for _ in range(num_timesteps):
        edge_type_to_per_head_messages = []  # type: List[tf.Tensor]  # list of lists of tensors of messages of shape [E, K, D/K]
        edge_type_to_per_head_attention_coefficients = []  # type: List[tf.Tensor]  # list of lists of tensors of shape [E, K]

        # Collect incoming messages per edge type
        # Note:
        #  We compute the state transformations (to make use of the wider, faster matrix multiplication),
        #  and then split into the individual attention heads via some reshapes:
        for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists):
            edge_sources = adjacency_list_for_edge_type[:, 0]
            edge_targets = adjacency_list_for_edge_type[:, 1]

            transformed_states = \
                edge_type_to_state_transformation_layers[edge_type_idx](cur_node_states)  # Shape [V, D]

            edge_transformed_source_states = \
                tf.nn.embedding_lookup(params=transformed_states, ids=edge_sources)  # Shape [E, D]
            edge_transformed_target_states = \
                tf.nn.embedding_lookup(params=transformed_states, ids=edge_targets)  # Shape [E, D]

            per_edge_per_head_transformed_source_states = \
                tf.reshape(edge_transformed_source_states, shape=(-1, num_heads, per_head_dim))

            per_edge_per_head_transformed_states = \
                tf.concat([per_edge_per_head_transformed_source_states,
                           tf.reshape(edge_transformed_target_states, shape=(-1, num_heads, per_head_dim))],
                          axis=-1)  # Shape [E, K, 2*D/K]
            per_head_attention_pars = tf.reshape(edge_type_to_attention_parameters[edge_type_idx],
                                                 shape=(num_heads, 2 * per_head_dim))  # Shape [K, 2*D/K]
            per_edge_per_head_attention_coefficients = \
                tf.nn.leaky_relu(tf.einsum('vki,ki->vk',
                                           per_edge_per_head_transformed_states,
                                           per_head_attention_pars))  # Shape [E, K]

            edge_type_to_per_head_messages.append(per_edge_per_head_transformed_source_states)
            edge_type_to_per_head_attention_coefficients.append(per_edge_per_head_attention_coefficients)

        per_head_messages = tf.concat(edge_type_to_per_head_messages, axis=0)
        per_head_attention_coefficients = tf.concat(edge_type_to_per_head_attention_coefficients, axis=0)

        head_to_aggregated_messages = []  # list of tensors of shape [V, D/K]
        for head_idx in range(num_heads):
            # Compute the softmax over all the attention coefficients for all messages going to this state:
            attention_coefficients = tf.concat(per_head_attention_coefficients[:, head_idx], axis=0)  # Shape [M]
            attention_values = \
                tf.exp(unsorted_segment_log_softmax(logits=attention_coefficients,
                                                    segment_ids=message_targets,
                                                    num_segments=num_nodes))  # Shape [M]
            messages = per_head_messages[:, head_idx, :]  # Shape [M, D/K]
            # Compute weighted sum per target node for this head:
            head_to_aggregated_messages.append(
                tf.math.unsorted_segment_sum(data=tf.expand_dims(attention_values, -1) * messages,
                                        segment_ids=message_targets,
                                        num_segments=num_nodes))

        new_node_states = activation_fn(tf.concat(head_to_aggregated_messages, axis=-1))
        cur_node_states = new_node_states

    return cur_node_states
コード例 #29
0
    if not os.path.exists(args.result_dir):
        os.mkdir(args.result_dir)

    for set_cur in args.set_names:

        if not os.path.exists(os.path.join(args.result_dir, set_cur)):
            os.mkdir(os.path.join(args.result_dir, set_cur))
        psnrs = []
        ssims = []

        for im in os.listdir(os.path.join(args.set_dir, set_cur)):
            if im.endswith(".jpg") or im.endswith(".bmp") or im.endswith(
                    ".png"):
                # model.conv1.register_forward_hook(get_activation('conv1'))
                model.dncnn.register_forward_hook(
                    get_activation('noise_level'))

                x = np.array(imread(os.path.join(args.set_dir, set_cur, im)),
                             dtype=np.float32) / 255.0
                np.random.seed(seed=0)  # for reproducibility
                y = x + np.random.normal(
                    0, args.sigma / 255.0,
                    x.shape)  # Add Gaussian noise without clipping
                y = y.astype(np.float32)
                y_ = torch.from_numpy(y).view(1, -1, y.shape[0], y.shape[1])

                torch.cuda.synchronize()
                start_time = time.time()
                y_ = y_.cuda()
                x_ = model(y_)  # inference
                noise_level = activation['noise_level'].squeeze()
コード例 #30
0
        model = model.cuda()

    if not os.path.exists(args.result_dir):
        os.mkdir(args.result_dir)

    for set_cur in args.set_names:

        if not os.path.exists(os.path.join(args.result_dir, set_cur)):
            os.mkdir(os.path.join(args.result_dir, set_cur))
        psnrs = []
        ssims = []

        for im in os.listdir(os.path.join(args.set_dir, set_cur)):
            if im.endswith(".jpg") or im.endswith(".bmp") or im.endswith(".png"):
                # model.conv1.register_forward_hook(get_activation('conv1'))
                model.dncnn.register_forward_hook(get_activation('noise_level'))


                x = np.array(imread(os.path.join(args.set_dir, set_cur, im)), dtype=np.float32)/255.0
                np.random.seed(seed=0)  # for reproducibility
                y = x + np.random.normal(0, args.sigma/255.0, x.shape)  # Add Gaussian noise without clipping
                y = y.astype(np.float32)
                y_ = torch.from_numpy(y).view(1, -1, y.shape[0], y.shape[1])


                High_origin, Low_origin = Decomposition(torch.from_numpy(x).unsqueeze(0))
                High_noise, Low_noise = Decomposition(y_.squeeze(0))

                torch.cuda.synchronize()
                start_time = time.time()
                y_ = y_.cuda()