def extract_tree_expression(self, node, index_mark='_'): if node == None or node.data == None: return self.expression_str feature, parentheses, action, wl_scalar, wl_power, parentheses_bias, wl_activation, parentheses_activation, wl_bias, parentheses_power = Individual.get_all_merged_values( node.data) if parentheses == 1: self.expression_str += utils.get_activation( parentheses_activation) + '(' self.expression_str += utils.get_activation(wl_activation) self.expression_str += '({}'.format(wl_scalar) + '*' ## add wl scalar self.expression_str += '{}{}{}'.format(index_mark, feature, index_mark) self.expression_str += '**{}'.format(wl_power) + '+{}'.format(wl_bias) self.expression_str += ')' self.expression_str += utils.get_action(action) self.expression_str = self.extract_tree_expression( node.left, index_mark) self.expression_str = self.extract_tree_expression( node.right, index_mark) if parentheses == 1: self.expression_str = self.expression_str[:-1] + '+{})'.format( parentheses_bias) + '**{}'.format( parentheses_power) + self.expression_str[ -1] ## put closing parentesis before action return self.expression_str
def __init__( self, rating_vals, user_in_units, movie_in_units, msg_units, out_units, dropout_rate=0.0, agg="stack", # or 'sum' agg_act=None, out_act=None, share_user_item_param=False, ): super(GCMCLayer, self).__init__() self.rating_vals = rating_vals self.agg = agg self.share_user_item_param = share_user_item_param if agg == "stack": # divide the original msg unit size by number of ratings to keep # the dimensionality assert msg_units % len(rating_vals) == 0 msg_units = msg_units // len(rating_vals) with self.name_scope(): self.dropout = nn.Dropout(dropout_rate) self.W_r = {} for rating in rating_vals: rating = str(rating) if share_user_item_param and user_in_units == movie_in_units: self.W_r[rating] = self.params.get( "W_r_%s" % rating, shape=(user_in_units, msg_units), dtype=np.float32, allow_deferred_init=True, ) self.W_r["rev-%s" % rating] = self.W_r[rating] else: self.W_r[rating] = self.params.get( "W_r_%s" % rating, shape=(user_in_units, msg_units), dtype=np.float32, allow_deferred_init=True, ) self.W_r["rev-%s" % rating] = self.params.get( "revW_r_%s" % rating, shape=(movie_in_units, msg_units), dtype=np.float32, allow_deferred_init=True, ) self.ufc = nn.Dense(out_units) if share_user_item_param: self.ifc = self.ufc else: self.ifc = nn.Dense(out_units) self.agg_act = get_activation(agg_act) self.out_act = get_activation(out_act)
def train(self, batch, labels=None, loss="quadratic", learning_rate=0.01, epochs=1, mini_batch_size=1): if labels is not None: batch = np.c_[batch, labels] amount_of_labels = len(set(batch[:, -1])) for epoch in range(epochs): print("Epoch: ", epoch, end=", ") np.random.shuffle(batch) # avoids correlated mini batches or memorization of order avg_loss_epoch = [] # average loss over all samples in batch for this epoch sample_i = 0 while sample_i < (len(batch) - mini_batch_size): mini_batch = batch[sample_i:sample_i + mini_batch_size] input_values, labels = mini_batch[:, :-1], mini_batch[:, -1] # one-hot-encoding of numerical labels: labels = np.eye(amount_of_labels)[labels.astype(int)] raw_outputs, activations, activated_outputs = \ self.inference(input_values, save_outputs=True) ''' Get loss function and its derivatives: ("dx_y" means partial derivative of y to x) ''' minibatch_loss = get_loss(loss)(activated_outputs[-1], labels) avg_loss_epoch.append(minibatch_loss) try: da_loss = get_loss(loss, d="da_")(activated_outputs[-1], labels) dz_a = get_activation(activations[-1], d="dz_")(raw_outputs[-1]) dz_loss = np.multiply(da_loss, dz_a) # Hadamard product except AttributeError as e: dz_loss = get_loss(loss, d="dz_")(activated_outputs[-1], labels) for l in range(1, len(self.weights)): m, n = activated_outputs[-l-1].shape # faster than stacking ones to our activated outputs: activated_outputs_with_ones = np.ones((m, n + 1)) activated_outputs_with_ones[:, :-1] = activated_outputs[-l-1] dw_loss = np.matmul(activated_outputs_with_ones.T, dz_loss) self.weights[-l] = self.weights[-l] - learning_rate * dw_loss / len(batch) dz_a = get_activation(activations[-l-1], d="dz_")(raw_outputs[-l-1]) dz_loss = np.multiply( np.matmul(dz_loss, self.weights[-l][:-1, :].T), # removed biases dz_a ) m, n = activated_outputs[0].shape activated_outputs_with_ones = np.ones((m, n + 1)) activated_outputs_with_ones[:, :-1] = activated_outputs[0] dw_loss = np.matmul(activated_outputs_with_ones.T, dz_loss) self.weights[0] = self.weights[0] - learning_rate * dw_loss / len(batch) sample_i += mini_batch_size avg_loss_epoch = np.sum(np.array(avg_loss_epoch)) / np.array(avg_loss_epoch).size print("Loss: ", avg_loss_epoch)
def __init__(self, config: Dict): super().__init__(config) torch.manual_seed(0) default_config = { "input_size": 33, "num_actions": 4, "activation": "relu", "hidden_sizes": (64, 64), } self.config = with_default_config(config, default_config) input_size: int = self.config.get("input_size") num_actions: int = self.config.get("num_actions") hidden_sizes: Tuple[int] = self.config.get("hidden_sizes") self.activation: Callable = get_activation(self.config.get("activation")) layer_sizes = (input_size,) + hidden_sizes self.hidden_layers = nn.ModuleList([ nn.Linear(in_size, out_size) for in_size, out_size in zip(layer_sizes, layer_sizes[1:]) ]) self.policy_mu_head = nn.Linear(layer_sizes[-1], num_actions) self.v_hidden_layers = nn.ModuleList([ nn.Linear(in_size, out_size) for in_size, out_size in zip(layer_sizes, layer_sizes[1:]) ]) self.std = nn.Parameter(torch.ones(1, num_actions)) self.value_head = nn.Linear(layer_sizes[-1], 1)
def __init__(self, config: Dict): super().__init__(config) default_config = { "input_shape": (100, 100), "num_actions": 5, "activation": "relu", } self.config = with_default_config(config, default_config) self.activation = get_activation(self.config["activation"]) input_shape: Tuple[int, int] = self.config["input_shape"] self.conv_layers = nn.ModuleList([ nn.Conv2d(4, 32, kernel_size=8, stride=4), # 24x24x32 nn.Conv2d(32, 64, kernel_size=7, stride=3), # 6x6x64 nn.Conv2d(64, 64, kernel_size=3, stride=1) ]) # 4x4x64 _coords_i = torch.linspace(-1, 1, input_shape[0]).view(-1, 1).repeat( 1, input_shape[1]) _coords_j = torch.linspace(-1, 1, input_shape[1]).view(1, -1).repeat( input_shape[0], 1) self.coords = torch.stack([_coords_i, _coords_j]) # flatten self.policy_head = nn.Linear(4 * 4 * 64, self.config["num_actions"]) self.value_head = nn.Linear(4 * 4 * 64, 1)
def DNN_regressor(params, model_dir, feature_columns, config): '''Returns DNN estimator object''' hidden_units = params['layers'] * [params['units']] weight_column_name = utils.get_param(params, 'weight_column_name') optimizer = utils.get_optimizer(utils.get_param(params, 'optimizer'), params['learning_rate']) activation_fn = utils.get_activation( utils.get_param(params, 'activation_fn')) dropout = float(utils.get_param(params, 'dropout')) gradient_clip_norm = utils.get_param(params, 'gradient_clip_norm') enable_centered_bias = False # keep false feature_engineering_fn = utils.get_param(params, 'feature_engineering_fn') embedding_lr_multipliers = utils.get_param(params, 'embedding_lr_multipliers') input_layer_min_slice_size = utils.get_param(params, 'input_layer_min_slice_size') label_keys = utils.get_param(params, 'label_keys') return tf.contrib.learn.DNNRegressor( hidden_units=hidden_units, feature_columns=feature_columns, model_dir=model_dir, weight_column_name=weight_column_name, optimizer=optimizer, activation_fn=activation_fn, dropout=dropout, gradient_clip_norm=gradient_clip_norm, enable_centered_bias=enable_centered_bias, config=config, feature_engineering_fn=feature_engineering_fn, embedding_lr_multipliers=embedding_lr_multipliers, input_layer_min_slice_size=input_layer_min_slice_size)
def __init__(self, config: Dict): super().__init__(config) default_config = { "input_size": 15, "num_actions": 5, "hidden_sizes": (64, 64), "activation": "leaky_relu", } self.config = with_default_config(config, default_config) input_size: int = self.config.get("input_size") num_actions: int = self.config.get("num_actions") hidden_sizes: Tuple[int] = self.config.get("hidden_sizes") self.activation: Callable = get_activation( self.config.get("activation")) layer_sizes = (input_size, ) + hidden_sizes self.hidden_layers = nn.ModuleList([ nn.Linear(in_size, out_size) for in_size, out_size in zip(layer_sizes, layer_sizes[1:]) ]) self.policy_head = nn.Linear(layer_sizes[-1], num_actions) self.value_head = nn.Linear(layer_sizes[-1], 1)
def __init__(self, config: Dict[str, Any]): super().__init__() default_config = { "num_subgoals": 2, "emb_size": 4, "rel_hiddens": (16, 16, ), "mlp_hiddens": (16, ), "activation": "leaky_relu" } self.config = with_default_config(config, default_config) self.activation: Callable[[Tensor], Tensor] = get_activation(self.config["activation"]) self.own_embedding = nn.Parameter(torch.randn(self.config["emb_size"])/10., requires_grad=True) self.agent_embedding = nn.Parameter(torch.randn(self.config["emb_size"])/10., requires_grad=True) self.subgoal_embedding = nn.Parameter(torch.randn(self.config["emb_size"])/10., requires_grad=True) self.goal_embedding = nn.Parameter(torch.randn(self.config["emb_size"])/10., requires_grad=True) rel_sizes = (2 * (self.config["emb_size"] + 3), ) + self.config["rel_hiddens"] mlp_sizes = (self.config["rel_hiddens"][-1], ) + self.config["mlp_hiddens"] self.relation_layers = nn.ModuleList([ nn.Linear(in_size, out_size) for in_size, out_size in zip(rel_sizes, rel_sizes[1:]) ]) self.mlp_layers = nn.ModuleList([ nn.Linear(in_size, out_size) for in_size, out_size in zip(mlp_sizes, mlp_sizes[1:]) ])
def __init__(self, config: Dict): super().__init__(config) default_config = { "input_shape": (100, 100), "num_actions": 5, "activation": "relu", } self.config = with_default_config(config, default_config) input_shape: Tuple[int, int] = self.config["input_shape"] input_size: int = self.config.get("input_size") num_actions: int = self.config.get("num_actions") hidden_sizes: Tuple[int] = self.config.get("hidden_sizes") self.activation: Callable = get_activation( self.config.get("activation")) self.conv = nn.Conv2d(3, 3, kernel_size=3, padding=1) layer_sizes = (input_size, ) + hidden_sizes self.hidden_layers = nn.ModuleList([ nn.Linear(in_size, out_size) for in_size, out_size in zip(layer_sizes, layer_sizes[1:]) ]) self.policy_head = nn.Linear(layer_sizes[-1], num_actions) self.value_head = nn.Linear(layer_sizes[-1], 1)
def _propagate_one_layer(self, input_values, weights, activation="identity"): m, n = input_values.shape input_with_ones = np.ones((m, n + 1)) input_with_ones[:, :-1] = input_values # faster than stacking raw_output = np.matmul(input_with_ones, weights) activated_output = get_activation(activation)(raw_output) return raw_output, activated_output
def __init__(self, args): super(Net, self).__init__() self._act = get_activation(args.model_activation) self.encoder = nn.ModuleList() self.encoder.append( GCMCLayer(args.rating_vals, args.src_in_units, args.dst_in_units, args.gcn_agg_units, args.gcn_out_units, args.gcn_dropout, args.gcn_agg_accum, agg_act=self._act, share_user_item_param=args.share_param, device=args.device)) self.gcn_agg_accum = args.gcn_agg_accum self.rating_vals = args.rating_vals self.device = args.device self.gcn_agg_units = args.gcn_agg_units self.src_in_units = args.src_in_units for i in range(1, args.layers): if args.gcn_agg_accum == 'stack': gcn_out_units = args.gcn_out_units * len(args.rating_vals) else: gcn_out_units = args.gcn_out_units self.encoder.append( GCMCLayer(args.rating_vals, args.gcn_out_units, args.gcn_out_units, gcn_out_units, args.gcn_out_units, args.gcn_dropout - i * 0.1, args.gcn_agg_accum, agg_act=self._act, share_user_item_param=args.share_param, ini=False, device=args.device)) if args.decoder == "Bi": self.decoder = BiDecoder( in_units=args.gcn_out_units, #* args.layers, num_classes=len(args.rating_vals), num_basis=args.gen_r_num_basis_func) ''' self.decoder2 = MLPDecoder(in_units= args.gcn_out_units * 2, num_classes=len(args.rating_vals), num_basis=args.gen_r_num_basis_func) ''' elif args.decoder == "MLP": if args.loss_func == "CE": num_classes = len(args.rating_vals) else: num_classes = 1 self.decoder = MLPDecoder(in_units=args.gcn_out_units * args.layers, num_classes=num_classes, num_basis=args.gen_r_num_basis_func) self.rating_vals = args.rating_vals
def __init__(self, args, dev_id): super(Net, self).__init__() self._act = get_activation(args.model_activation) self.encoder = nn.ModuleList() self.encoder.append( GCMCLayer(args.rating_vals, args.src_in_units, args.dst_in_units, args.gcn_agg_units, args.gcn_out_units, args.gcn_dropout, args.gcn_agg_accum, agg_act=self._act, share_user_item_param=args.share_param, device=dev_id)) self.rating_vals = args.rating_vals self.gcn_agg_accum = args.gcn_agg_accum self.rating_vals = args.rating_vals self.device = dev_id self.gcn_agg_units = args.gcn_agg_units self.src_in_units = args.src_in_units self.batch_size = args.minibatch_size for i in range(1, args.layers): if args.gcn_agg_accum == 'stack': gcn_out_units = args.gcn_out_units * len(args.rating_vals) else: gcn_out_units = args.gcn_out_units self.encoder.append( GCMCLayer(args.rating_vals, args.gcn_out_units, args.gcn_out_units, gcn_out_units, args.gcn_out_units, args.gcn_dropout - i * 0.1, args.gcn_agg_accum, agg_act=self._act, share_user_item_param=args.share_param, ini=False, device=dev_id)) if args.mix_cpu_gpu and args.use_one_hot_fea: # if use_one_hot_fea, user and movie feature is None # W can be extremely large, with mix_cpu_gpu W should be stored in CPU self.encoder.partial_to(dev_id) else: self.encoder.to(dev_id) self.decoder = BiDecoder(in_units=args.gcn_out_units, num_classes=len(args.rating_vals), num_basis=args.gen_r_num_basis_func) self.decoder.to(dev_id)
def __init__(self, rating_vals, user_in_units, movie_in_units, msg_units, out_units, dropout_rate=0.0, agg='stack', # or 'sum' agg_act=None, out_act=None, share_user_item_param=False): super(GCMCLayer, self).__init__() self.rating_vals = rating_vals self.agg = agg self.share_user_item_param = share_user_item_param self.ufc = nn.Linear(msg_units, out_units) if share_user_item_param: self.ifc = self.ufc else: self.ifc = nn.Linear(msg_units, out_units) if agg == 'stack': # divide the original msg unit size by number of ratings to keep # the dimensionality assert msg_units % len(rating_vals) == 0 msg_units = msg_units // len(rating_vals) self.dropout = nn.Dropout(dropout_rate) self.W_r = nn.ParameterDict() for rating in rating_vals: # PyTorch parameter name can't contain "." rating = str(rating).replace('.', '_') if share_user_item_param and user_in_units == movie_in_units: self.W_r[rating] = nn.Parameter(th.randn(user_in_units, msg_units)) self.W_r['rev-%s' % rating] = self.W_r[rating] else: self.W_r[rating] = nn.Parameter(th.randn(user_in_units, msg_units)) self.W_r['rev-%s' % rating] = nn.Parameter(th.randn(movie_in_units, msg_units)) self.agg_act = get_activation(agg_act) self.out_act = get_activation(out_act) self.reset_parameters()
def __init__( self, src_key, dst_key, src_in_units, dst_in_units, agg_units, out_units, num_links, dropout_rate=0.0, agg_accum='stack', agg_act=None, out_act=None, # agg_ordinal_sharing=False, share_agg_weights=False, share_out_fc_weights=False, **kwargs): super(GCMCLayer, self).__init__(**kwargs) self._out_act = get_activation(out_act) self._src_key = src_key self._dst_key = dst_key with self.name_scope(): self.dropout = nn.Dropout(dropout_rate) self.aggregator = MultiLinkGCNAggregator(src_key=src_key, dst_key=dst_key, units=agg_units, src_in_units=src_in_units, dst_in_units=dst_in_units, num_links=num_links, dropout_rate=dropout_rate, accum=agg_accum, act=agg_act, prefix='agg_') self.user_out_fcs = nn.Dense(out_units, flatten=False, prefix='user_out_') self.item_out_fcs = nn.Dense(out_units, flatten=False, prefix='item_out_') self._out_act = get_activation(out_act)
def __init__(self, args): super(Net, self).__init__() self._act = get_activation(args.model_activation) self.encoder = GCMCLayer(args.rating_vals, args.src_in_units, args.dst_in_units, args.gcn_agg_units, args.gcn_out_units, args.gcn_dropout, args.gcn_agg_accum, agg_act=self._act, share_user_item_param=args.share_param) self.decoder = BiDecoder(args.rating_vals, in_units=args.gcn_out_units, num_basis_functions=args.gen_r_num_basis_func)
def __build_graph_propagation_model(self) -> tf.Tensor: h_dim = self.params['hidden_size'] activation_fn = get_activation(self.params['graph_model_activation_function']) # tanh # if the initial node feature size does not match the hidden size, we create a densely connected layer # to project the features to the correct size (h_dim). This densely connected layer has # h_dim nodes, and uses the activation function specified above (tanh). if self.task.initial_node_feature_size != self.params['hidden_size']: # projects features to the specified hidden size self.__ops['projected_node_features'] = \ tf.keras.layers.Dense(units=h_dim, use_bias=False, activation=activation_fn, )(self.__ops['initial_node_features']) else: self.__ops['projected_node_features'] = self.__ops['initial_node_features'] cur_node_representations = self.__ops['projected_node_features'] last_residual_representations = tf.zeros_like(cur_node_representations) for layer_idx in range(self.params['graph_num_layers']): with tf.variable_scope('gnn_layer_%i' % layer_idx): # with some probability, set neurons to zero in current node representations cur_node_representations = \ tf.nn.dropout(cur_node_representations, rate=1.0 - self.__placeholders['graph_layer_input_dropout_keep_prob']) # every 10000 layers, we add the previously saved node representation. # this helps address vanishing or exploding gradients if layer_idx % self.params['graph_residual_connection_every_num_layers'] == 0: t = cur_node_representations if layer_idx > 0: cur_node_representations += last_residual_representations cur_node_representations /= 2 last_residual_representations = t # finally, we construct the gnn layer. cur_node_representations = \ self._apply_gnn_layer( cur_node_representations, self.__ops['adjacency_lists'], self.__ops['type_to_num_incoming_edges'], self.params['graph_num_timesteps_per_layer']) if self.params['graph_inter_layer_norm']: cur_node_representations = tf.contrib.layers.layer_norm(cur_node_representations) if layer_idx % self.params['graph_dense_between_every_num_gnn_layers'] == 0: cur_node_representations = \ tf.keras.layers.Dense(units=h_dim, use_bias=False, activation=activation_fn, name="Dense", )(cur_node_representations) self.__ops['final_node_representations'] = cur_node_representations
def __init__(self, args): super(Net, self).__init__() self._act = get_activation(args.model_activation) self.encoder = GraphSageLayer(args.rating_vals, args.src_in_units, args.dst_in_units, args.gcn_agg_units, args.gcn_out_units, args.gcn_dropout, args.gcn_agg_accum, agg_act=self._act, share_user_item_param=args.share_param, device=args.device) self.decoder = DotProduct(args.gcn_agg_units, args.gcn_out_units)
def __build_graph_propagation_model(self) -> tf.Tensor: h_dim = self.params['hidden_size'] activation_fn = get_activation( self.params['graph_model_activation_function']) if self.task.initial_node_feature_size != self.params['hidden_size']: self.__ops['projected_node_features'] = \ tf.keras.layers.Dense(units=h_dim, use_bias=False, activation=activation_fn, )(self.__ops['initial_node_features']) else: self.__ops['projected_node_features'] = self.__ops[ 'initial_node_features'] cur_node_representations = self.__ops['projected_node_features'] last_residual_representations = tf.zeros_like(cur_node_representations) for layer_idx in range(self.params['graph_num_layers']): with tf.variable_scope('gnn_layer_%i' % layer_idx): cur_node_representations = \ tf.nn.dropout(cur_node_representations, rate=1.0 - self.__placeholders['graph_layer_input_dropout_keep_prob']) if layer_idx % self.params[ 'graph_residual_connection_every_num_layers'] == 0: t = cur_node_representations if layer_idx > 0: cur_node_representations += last_residual_representations cur_node_representations /= 2 last_residual_representations = t cur_node_representations = \ self._apply_gnn_layer( cur_node_representations, self.__ops['adjacency_lists'], self.__ops['type_to_num_incoming_edges'], self.params['graph_num_timesteps_per_layer']) if self.params['graph_inter_layer_norm']: cur_node_representations = tf.contrib.layers.layer_norm( cur_node_representations) if layer_idx % self.params[ 'graph_dense_between_every_num_gnn_layers'] == 0: cur_node_representations = \ tf.keras.layers.Dense(units=h_dim, use_bias=False, activation=activation_fn, name="Dense", )(cur_node_representations) self.__ops['final_node_representations'] = cur_node_representations
def get_masked_lm_output(bert_config, input_tensor, output_weights, positions, label_ids, label_weights, prev_bplayers=None): """Get loss and log probs for the masked LM.""" input_tensor = gather_indexes(input_tensor, positions) with tf.variable_scope("cls/predictions") as prediction_scope: # We apply one more non-linear transformation before the output layer. # This matrix is not used after pre-training. with tf.variable_scope("transform"): input_tensor = tf.layers.dense( input_tensor, units=bert_config.hidden_size, activation=utils.get_activation(bert_config.hidden_act), kernel_initializer=utils.create_initializer( bert_config.initializer_range)) input_tensor = utils.layer_norm(input_tensor) # The output weights are the same as the input embeddings, but there is # an output-only bias for each token. output_bias = tf.get_variable( "output_bias", shape=[bert_config.vocab_size], initializer=tf.zeros_initializer()) logits = tf.matmul(input_tensor, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) log_probs = tf.nn.log_softmax(logits, axis=-1) label_ids = tf.reshape(label_ids, [-1]) label_weights = tf.reshape(label_weights, [-1]) one_hot_labels = tf.one_hot( label_ids, depth=bert_config.vocab_size, dtype=tf.float32) # The `positions` tensor might be zero-padded (if the sequence is too # short to have the maximum number of predictions). The `label_weights` # tensor has a value of 1.0 for every real prediction and 0.0 for the # padding predictions. per_example_loss = -tf.reduce_sum(log_probs * one_hot_labels, axis=[-1]) numerator = tf.reduce_sum(label_weights * per_example_loss) denominator = tf.reduce_sum(label_weights) + 1e-5 loss = numerator / denominator loss_bplayer = BPLayer(loss, prediction_scope, backward_layers=prev_bplayers) return (loss, per_example_loss, log_probs, loss_bplayer)
def __init__(self, n_in, n_out, n_layers, layer_factor, act_fn='relu', dropout_p=0.0): super(MLP, self).__init__() n_h = int(n_in * layer_factor) act_fn_class = utils.get_activation(act_fn) # build the net net = [] for i in range(n_layers): if i == 0: net.append(torch.nn.Linear(n_in, n_h)) else: net.append(torch.nn.Linear(n_h, n_h)) net.append(act_fn_class()) net.append(torch.nn.BatchNorm1d(n_h)) if dropout_p > 0: net.append(torch.nn.Dropout(p=dropout_p)) net.append(torch.nn.Linear(n_h, n_out)) # convert to sequential self.net = torch.nn.Sequential(*net)
def __init__(self, layer_sizes, feature_extractor_needed=False, use_dropout=False, activation='relu', dropoutv=0.5, reshape_dims=None, seed=0, session_config=None, it=None, c=1.0, xi=0.1, lr=0.001): assert (len(layer_sizes) == 4) assert (session_config != None) assert (it != None) self.layer_sizes = layer_sizes self.feature_extractor_needed = feature_extractor_needed self.use_dropout = use_dropout self.dropoutv = dropoutv self.reshape_dims = reshape_dims self.seed = seed self.session_config = session_config self.it = it self.use_dropout = use_dropout self.activation = utils.get_activation(activation) print("Using feature extractor: %s" % self.feature_extractor_needed) print("Using dropout, bn: %s, %f" % (self.use_dropout, self.dropoutv)) self.phs = {} self.vars = {} self.objs = {} self.all_predictions = [] self.c = c self.xi = xi self.lr = float(lr)
def __init__(self, config: Dict): super().__init__(config) default_config = { "input_shape": (100, 100), "num_actions": 3, "activation": "relu", "field_threshold": 6, "hidden_sizes": (64, 64), } self.config = with_default_config(config, default_config) self.activation = get_activation(self.config["activation"]) self.field_threshold = self.config["field_threshold"] hidden_sizes: Tuple[int] = self.config.get("hidden_sizes") input_shape: Tuple[int, int] = self.config["input_shape"] _coords_i = torch.linspace(-1, 1, input_shape[0]).view(-1, 1).repeat( 1, input_shape[1]) _coords_j = torch.linspace(-1, 1, input_shape[1]).view(1, -1).repeat( input_shape[0], 1) self.coords = torch.stack([_coords_i, _coords_j]) self.bilinear = nn.Bilinear(2, 2, 4) self.pool1 = nn.AvgPool2d((100, self.field_threshold)) self.pool2 = nn.AvgPool2d((100, 100 - 2 * self.field_threshold)) self.pool3 = nn.AvgPool2d((100, self.field_threshold)) # concat + flatten to [B, 3*4] layer_sizes = (12, ) + hidden_sizes self.hidden_layers = nn.ModuleList([ nn.Linear(in_size, out_size) for in_size, out_size in zip(layer_sizes, layer_sizes[1:]) ]) self.policy_head = nn.Linear(layer_sizes[-1], self.config["num_actions"]) self.value_head = nn.Linear(layer_sizes[-1], 1)
def __init__(self, layer_sizes, feature_extractor_needed=False, use_dropout=False, activation='relu', dropoutv=0.5, reshape_dims=None, seed=0, session_config=None, it=None, embedding=False): assert (len(layer_sizes) == 4) assert (session_config != None) assert (it != None) self.layer_sizes = layer_sizes self.feature_extractor_needed = feature_extractor_needed self.use_dropout = use_dropout self.dropoutv = dropoutv self.reshape_dims = reshape_dims self.seed = seed self.session_config = session_config self.it = it self.embedding = embedding # if true, then expect x data to be # embeddings if self.use_dropout: self.glob_training_ph = tf.placeholder_with_default(False, shape=()) self.training_ph = tf.placeholder_with_default(False, shape=()) self.activation = utils.get_activation(activation) print("Using feature extractor: %s" % self.feature_extractor_needed) print("Using dropout, bn: %s, %f" % (self.use_dropout, self.dropoutv)) self.phs = {} self.vars = {} self.objs = {} self.all_predictions = []
def __init__(self, args, **kwargs): super(Net, self).__init__(**kwargs) self._act = get_activation(args.model_activation) with self.name_scope(): self.encoder = GCMCLayer(src_key=args.src_key, dst_key=args.dst_key, src_in_units=args.src_in_units, dst_in_units=args.dst_in_units, agg_units=args.gcn_agg_units, out_units=args.gcn_out_units, num_links=args.nratings, dropout_rate=args.gcn_dropout, agg_accum=args.gcn_agg_accum, agg_act=args.model_activation, prefix='enc_') if args.gen_r_use_classification: self.gen_ratings = BiDecoder( in_units=args.gcn_out_units, out_units=args.nratings, num_basis_functions=args.gen_r_num_basis_func, prefix='gen_rating') else: self.gen_ratings = InnerProductLayer(prefix='gen_rating')
def __init__(self, src_key, dst_key, units, src_in_units, dst_in_units, num_links, dropout_rate=0.0, accum='stack', act=None, **kwargs): super(MultiLinkGCNAggregator, self).__init__(**kwargs) self._src_key = src_key self._dst_key = dst_key self._accum = accum self._num_links = num_links self._units = units if accum == "stack": assert units % num_links == 0, 'units should be divisible by the num_links ' self._units = self._units // num_links with self.name_scope(): self.dropout = nn.Dropout( dropout_rate) ### dropout before feeding the out layer self.act = get_activation(act) self.src_dst_weights = self.params.get('src_dst_weight', shape=(num_links, self._units, src_in_units), dtype=np.float32, allow_deferred_init=True) self.dst_src_weights = self.params.get('dst_dst_weight', shape=(num_links, self._units, dst_in_units), dtype=np.float32, allow_deferred_init=True)
def __init__(self, args, dev_id): super(Net, self).__init__() self._act = get_activation(args.model_activation) self.encoder = GCMCLayer(args.rating_vals, args.src_in_units, args.dst_in_units, args.gcn_agg_units, args.gcn_out_units, args.gcn_dropout, args.gcn_agg_accum, agg_act=self._act, share_user_item_param=args.share_param, device=dev_id) if args.mix_cpu_gpu and args.use_one_hot_fea: # if use_one_hot_fea, user and movie feature is None # W can be extremely large, with mix_cpu_gpu W should be stored in CPU self.encoder.partial_to(dev_id) else: self.encoder.to(dev_id) self.decoder = BiDecoder(in_units=args.gcn_out_units, num_classes=len(args.rating_vals), num_basis=args.gen_r_num_basis_func) self.decoder.to(dev_id)
def sparse_rgcn_layer( node_embeddings: tf.Tensor, adjacency_lists: List[tf.Tensor], type_to_num_incoming_edges: tf.Tensor, state_dim: Optional[int], num_timesteps: int = 1, activation_function: Optional[str] = "tanh", message_aggregation_function: str = "sum", normalize_by_num_incoming: bool = True, use_both_source_and_target: bool = False, ) -> tf.Tensor: """ Compute new graph states by neural message passing. This implements the R-GCN model (Schlichtkrull et al., https://arxiv.org/pdf/1703.06103.pdf) for the case of few relations / edge types, i.e., we do not use the dimensionality-reduction tricks from section 2.2 of that paper. For this, we assume existing node states h^t_v and a list of per-edge-type adjacency matrices A_\ell. We compute new states as follows: h^{t+1}_v := \sigma(\sum_\ell \sum_{(u, v) \in A_\ell} 1/c_{v,\ell} * (W_\ell * h^t_u)) c_{\v,\ell} is usually the number of \ell edges going into v. The learnable parameters of this are the W_\ell \in R^{D,D}. We use the following abbreviations in shape descriptions: * V: number of nodes * D: state dimension * L: number of different edge types * E: number of edges of a given edge type Arguments: node_embeddings: float32 tensor of shape [V, D], the original representation of each node in the graph. adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge of type l connects node v to node u. type_to_num_incoming_edges: float32 tensor of shape [L, V] representing the number of incoming edges of a given type. Concretely, type_to_num_incoming_edges[l, v] is the number of edge of type l connecting to node v. state_dim: Optional size of output dimension of the GNN layer. If not set, defaults to D, the dimensionality of the input. If different from the input dimension, parameter num_timesteps has to be 1. num_timesteps: Number of repeated applications of this message passing layer. activation_function: Type of activation function used. message_aggregation_function: Type of aggregation function used for messages. normalize_by_num_incoming: Flag indicating if messages should be scaled by 1/(number of incoming edges). Returns: float32 tensor of shape [V, state_dim] """ num_nodes = tf.shape(input=node_embeddings, out_type=tf.int32)[0] if state_dim is None: state_dim = tf.shape(input=node_embeddings, out_type=tf.int32)[1] # === Prepare things we need across all timesteps: activation_fn = get_activation(activation_function) message_aggregation_fn = get_aggregation_function( message_aggregation_function) edge_type_to_message_transformation_layers = [ ] # Layers to compute the message from a source state edge_type_to_message_targets = [] # List of tensors of message targets for edge_type_idx, adjacency_list_for_edge_type in enumerate( adjacency_lists): edge_type_to_message_transformation_layers.append( tf.keras.layers.Dense(units=state_dim, use_bias=False, activation=None, name="Edge_%i_Weight" % edge_type_idx)) edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1]) # Let M be the number of messages (sum of all E): message_targets = tf.concat(edge_type_to_message_targets, axis=0) # Shape [M] cur_node_states = node_embeddings for _ in range(num_timesteps): messages_per_type = [] # list of tensors of messages of shape [E, H] # Collect incoming messages per edge type for edge_type_idx, adjacency_list_for_edge_type in enumerate( adjacency_lists): edge_sources = adjacency_list_for_edge_type[:, 0] edge_targets = adjacency_list_for_edge_type[:, 1] edge_source_states = \ tf.nn.embedding_lookup(params=cur_node_states, ids=edge_sources) # Shape [E, H] if use_both_source_and_target: edge_target_states = \ tf.nn.embedding_lookup(params=cur_node_states, ids=edge_targets) # Shape [E, H] edge_state_pairs = tf.concat( [edge_source_states, edge_target_states], axis=-1) # Shape [E, 2H] messages = edge_type_to_message_transformation_layers[ edge_type_idx](edge_state_pairs) # Shape [E, H] else: messages = edge_type_to_message_transformation_layers[ edge_type_idx](edge_source_states) # Shape [E, H] if normalize_by_num_incoming: num_incoming_to_node_per_message = \ tf.nn.embedding_lookup(params=type_to_num_incoming_edges[edge_type_idx, :], ids=edge_targets) # Shape [E, H] messages = tf.expand_dims( 1.0 / (num_incoming_to_node_per_message + SMALL_NUMBER), axis=-1) * messages messages_per_type.append(messages) cur_messages = tf.concat(messages_per_type, axis=0) # Shape [M, H] aggregated_messages = \ message_aggregation_fn(data=cur_messages, segment_ids=message_targets, num_segments=num_nodes) # Shape [V, H] new_node_states = activation_fn(aggregated_messages) # Shape [V, H] cur_node_states = new_node_states return cur_node_states
def sparse_rgat_layer(node_embeddings: tf.Tensor, adjacency_lists: List[tf.Tensor], state_dim: Optional[int], num_heads: int = 4, num_timesteps: int = 1, activation_function: Optional[str] = "tanh" ) -> tf.Tensor: """ Compute new graph states by neural message passing using attention. This generalises the original GAT model (Velickovic et al., https://arxiv.org/pdf/1710.10903.pdf) to multiple edge types by using different weights for different edge types. For this, we assume existing node states h^t_v and a list of per-edge-type adjacency matrices A_\ell. In the setting for a single attention head, we compute new states as follows: h^t_{v, \ell} := W_\ell h^t_v e_{u, \ell, v} := LeakyReLU(\alpha_\ell^T * concat(h^t_{u, \ell}, h^t_{v, \ell})) a_v := softmax_{\ell, u with (u, v) \in A_\ell}(e_{u, \ell, v}) h^{t+1}_v := \sigma(\sum_{ell, (u, v) \in A_\ell} a_v_{u, \ell} * h^_{u, \ell}) The learnable parameters of this are the W_\ell \in R^{D, D} and \alpha_\ell \in R^{2*D}. In practice, we use K attention heads, computing separate, partial new states h^{t+1}_{v,k} and compute h^{t+1}_v as the concatentation of the partial states. For this, we reduce the shape of W_\ell to R^{D, D/K} and \alpha_\ell to R^{2*D/K}. We use the following abbreviations in shape descriptions: * V: number of nodes * D: state dimension * K: number of attention heads * L: number of different edge types * E: number of edges of a given edge type Arguments: node_embeddings: float32 tensor of shape [V, D], the original representation of each node in the graph. adjacency_lists: List of L adjacency lists, represented as int32 tensors of shape [E, 2]. Concretely, adjacency_lists[l][k,:] == [v, u] means that the k-th edge of type l connects node v to node u. state_dim: Optional size of output dimension of the GNN layer. If not set, defaults to D, the dimensionality of the input. If different from the input dimension, parameter num_timesteps has to be 1. num_heads: Number of attention heads to use. num_timesteps: Number of repeated applications of this message passing layer. activation_function: Type of activation function used. Returns: float32 tensor of shape [V, state_dim] """ num_nodes = tf.shape(input=node_embeddings, out_type=tf.int32)[0] if state_dim is None: state_dim = tf.shape(input=node_embeddings, out_type=tf.int32)[1] per_head_dim = state_dim // num_heads # === Prepare things we need across all timesteps: activation_fn = get_activation(activation_function) edge_type_to_state_transformation_layers = [] # Layers to compute the message from a source state edge_type_to_attention_parameters = [] # Parameters for the attention mechanism edge_type_to_message_targets = [] # List of tensors of message targets for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists): edge_type_to_state_transformation_layers.append( tf.keras.layers.Dense(units=state_dim, use_bias=False, activation=None, name="Edge_%i_Weight" % edge_type_idx)) edge_type_to_attention_parameters.append( tf.compat.v1.get_variable(shape=(2 * state_dim), name="Edge_%i_Attention_Parameters" % edge_type_idx)) edge_type_to_message_targets.append(adjacency_list_for_edge_type[:, 1]) # Let M be the number of messages (sum of all E): message_targets = tf.concat(edge_type_to_message_targets, axis=0) # Shape [M] cur_node_states = node_embeddings for _ in range(num_timesteps): edge_type_to_per_head_messages = [] # type: List[tf.Tensor] # list of lists of tensors of messages of shape [E, K, D/K] edge_type_to_per_head_attention_coefficients = [] # type: List[tf.Tensor] # list of lists of tensors of shape [E, K] # Collect incoming messages per edge type # Note: # We compute the state transformations (to make use of the wider, faster matrix multiplication), # and then split into the individual attention heads via some reshapes: for edge_type_idx, adjacency_list_for_edge_type in enumerate(adjacency_lists): edge_sources = adjacency_list_for_edge_type[:, 0] edge_targets = adjacency_list_for_edge_type[:, 1] transformed_states = \ edge_type_to_state_transformation_layers[edge_type_idx](cur_node_states) # Shape [V, D] edge_transformed_source_states = \ tf.nn.embedding_lookup(params=transformed_states, ids=edge_sources) # Shape [E, D] edge_transformed_target_states = \ tf.nn.embedding_lookup(params=transformed_states, ids=edge_targets) # Shape [E, D] per_edge_per_head_transformed_source_states = \ tf.reshape(edge_transformed_source_states, shape=(-1, num_heads, per_head_dim)) per_edge_per_head_transformed_states = \ tf.concat([per_edge_per_head_transformed_source_states, tf.reshape(edge_transformed_target_states, shape=(-1, num_heads, per_head_dim))], axis=-1) # Shape [E, K, 2*D/K] per_head_attention_pars = tf.reshape(edge_type_to_attention_parameters[edge_type_idx], shape=(num_heads, 2 * per_head_dim)) # Shape [K, 2*D/K] per_edge_per_head_attention_coefficients = \ tf.nn.leaky_relu(tf.einsum('vki,ki->vk', per_edge_per_head_transformed_states, per_head_attention_pars)) # Shape [E, K] edge_type_to_per_head_messages.append(per_edge_per_head_transformed_source_states) edge_type_to_per_head_attention_coefficients.append(per_edge_per_head_attention_coefficients) per_head_messages = tf.concat(edge_type_to_per_head_messages, axis=0) per_head_attention_coefficients = tf.concat(edge_type_to_per_head_attention_coefficients, axis=0) head_to_aggregated_messages = [] # list of tensors of shape [V, D/K] for head_idx in range(num_heads): # Compute the softmax over all the attention coefficients for all messages going to this state: attention_coefficients = tf.concat(per_head_attention_coefficients[:, head_idx], axis=0) # Shape [M] attention_values = \ tf.exp(unsorted_segment_log_softmax(logits=attention_coefficients, segment_ids=message_targets, num_segments=num_nodes)) # Shape [M] messages = per_head_messages[:, head_idx, :] # Shape [M, D/K] # Compute weighted sum per target node for this head: head_to_aggregated_messages.append( tf.math.unsorted_segment_sum(data=tf.expand_dims(attention_values, -1) * messages, segment_ids=message_targets, num_segments=num_nodes)) new_node_states = activation_fn(tf.concat(head_to_aggregated_messages, axis=-1)) cur_node_states = new_node_states return cur_node_states
if not os.path.exists(args.result_dir): os.mkdir(args.result_dir) for set_cur in args.set_names: if not os.path.exists(os.path.join(args.result_dir, set_cur)): os.mkdir(os.path.join(args.result_dir, set_cur)) psnrs = [] ssims = [] for im in os.listdir(os.path.join(args.set_dir, set_cur)): if im.endswith(".jpg") or im.endswith(".bmp") or im.endswith( ".png"): # model.conv1.register_forward_hook(get_activation('conv1')) model.dncnn.register_forward_hook( get_activation('noise_level')) x = np.array(imread(os.path.join(args.set_dir, set_cur, im)), dtype=np.float32) / 255.0 np.random.seed(seed=0) # for reproducibility y = x + np.random.normal( 0, args.sigma / 255.0, x.shape) # Add Gaussian noise without clipping y = y.astype(np.float32) y_ = torch.from_numpy(y).view(1, -1, y.shape[0], y.shape[1]) torch.cuda.synchronize() start_time = time.time() y_ = y_.cuda() x_ = model(y_) # inference noise_level = activation['noise_level'].squeeze()
model = model.cuda() if not os.path.exists(args.result_dir): os.mkdir(args.result_dir) for set_cur in args.set_names: if not os.path.exists(os.path.join(args.result_dir, set_cur)): os.mkdir(os.path.join(args.result_dir, set_cur)) psnrs = [] ssims = [] for im in os.listdir(os.path.join(args.set_dir, set_cur)): if im.endswith(".jpg") or im.endswith(".bmp") or im.endswith(".png"): # model.conv1.register_forward_hook(get_activation('conv1')) model.dncnn.register_forward_hook(get_activation('noise_level')) x = np.array(imread(os.path.join(args.set_dir, set_cur, im)), dtype=np.float32)/255.0 np.random.seed(seed=0) # for reproducibility y = x + np.random.normal(0, args.sigma/255.0, x.shape) # Add Gaussian noise without clipping y = y.astype(np.float32) y_ = torch.from_numpy(y).view(1, -1, y.shape[0], y.shape[1]) High_origin, Low_origin = Decomposition(torch.from_numpy(x).unsqueeze(0)) High_noise, Low_noise = Decomposition(y_.squeeze(0)) torch.cuda.synchronize() start_time = time.time() y_ = y_.cuda()