def call(self, inputs, **kwargs): input_shape = K.int_shape(inputs) sequence_length, d_model = input_shape[-2:] # output of the "sigmoid halting unit" (not the probability yet) halting = K.sigmoid( K.reshape( K.bias_add(K.dot(K.reshape(inputs, [-1, d_model]), self.halting_kernel), self.halting_biases, data_format='channels_last'), [-1, sequence_length])) if self.zeros_like_halting is None: self.initialize_control_tensors(halting) # useful flags step_is_active = K.greater(self.halt_budget, 0) no_further_steps = K.less_equal(self.halt_budget - halting, 0) # halting probability is equal to # a. halting output if this isn't the last step (we have some budget) # b. to remainder if it is, # c. and zero for the steps that shouldn't be executed at all # (out of budget for them) halting_prob = K.switch( step_is_active, K.switch(no_further_steps, self.remainder, halting), self.zeros_like_halting) self.active_steps += K.switch(step_is_active, self.ones_like_halting, self.zeros_like_halting) # We don't know which step is the last, so we keep updating # expression for the loss with each call of the layer self.ponder_cost = (self.time_penalty_t * K.mean(self.remainder + self.active_steps)) # Updating "the remaining probability" and the halt budget self.remainder = K.switch(no_further_steps, self.remainder, self.remainder - halting) self.halt_budget -= halting # OK to become negative # If none of the inputs are active at this step, then instead # of zeroing them out by multiplying to all-zeroes halting_prob, # we can simply use a constant tensor of zeroes, which means that # we won't even calculate the output of those steps, saving # some real computational time. if self.zeros_like_input is None: self.zeros_like_input = K.zeros_like(inputs, name='zeros_like_input') # just because K.any(step_is_active) doesn't work in PlaidML any_step_is_active = K.greater(K.sum(K.cast(step_is_active, 'int32')), 0) step_weighted_output = K.switch( any_step_is_active, K.expand_dims(halting_prob, -1) * inputs, self.zeros_like_input) if self.weighted_output is None: self.weighted_output = step_weighted_output else: self.weighted_output += step_weighted_output return [inputs, self.weighted_output]
def call(self, inputs): # Implement Eq.(9) perturbed_kernel = self.kernel + \ self.sigma_kernel * K.random_uniform(shape=self.kernel_shape) outputs = K.dot(inputs, perturbed_kernel) if self.use_bias: perturbed_bias = self.bias + \ self.sigma_bias * K.random_uniform(shape=self.bias_shape) outputs = K.bias_add(outputs, perturbed_bias) if self.activation is not None: outputs = self.activation(outputs) return outputs
def call(self, x): x = x[0] #print("x",x.shape) #print("bias",self.bias.shape) ret = K.bias_add(x=x, bias=self.bias) #print("ret",ret.shape) return ret exit()
def call(self, input): # # 3D tensor input: (batch_size x n_nodes x n_features) # output = graph_conv_op(input, self.num_filters, self.graph_conv_filters, self.kernel) output = tf.tensordot(self.graph_conv_filters, input, [1, 1]) output = tf.tensordot(output, self.kernel, [2, 0]) output = tf.transpose(output, [1, 0, 2]) if self.use_bias: output = K.bias_add(output, self.bias) if self.activation is not None: output = self.activation(output) return output
def call(self, inputs): features = inputs # Convolution output = ops.dot(features, self.kernel) output = ops.mixed_mode_dot(self.fltr, output) if self.use_bias: output = K.bias_add(output, self.bias) if self.activation is not None: output = self.activation(output) return output
def call(self, inputs, mask=None): # Both image and mask must be supplied if type(inputs) is not list or len(inputs) != 2: raise Exception( 'PartialConvolution2D must be called on a list of two tensors [img, mask]. Instead got: ' + str(inputs)) # Padding done explicitly so that padding becomes part of the masked partial convolution images = K.spatial_2d_padding(inputs[0], self.pconv_padding, self.data_format) masks = K.spatial_2d_padding(inputs[1], self.pconv_padding, self.data_format) # Apply convolutions to mask mask_output = K.conv2d(masks, self.kernel_mask, strides=self.strides, padding='valid', data_format=self.data_format, dilation_rate=self.dilation_rate) # Apply convolutions to image img_output = K.conv2d((images * masks), self.kernel, strides=self.strides, padding='valid', data_format=self.data_format, dilation_rate=self.dilation_rate) # Calculate the mask ratio on each pixel in the output mask mask_ratio = self.window_size / (mask_output + 1e-8) # Clip output to be between 0 and 1 mask_output = K.clip(mask_output, 0, 1) # Remove ratio values where there are holes mask_ratio = mask_ratio * mask_output # Normalize iamge output img_output = img_output * mask_ratio # Apply bias only to the image (if chosen to do so) if self.use_bias: img_output = K.bias_add(img_output, self.bias, data_format=self.data_format) # Apply activations on the image if self.activation is not None: img_output = self.activation(img_output) return [img_output, mask_output]
def call(self, inputs): scaled_kernel = self.kernel * self.runtime_coeff outputs = K.dot(inputs, scaled_kernel) if self.use_bias: outputs = K.bias_add(outputs, self.bias, data_format='channels_last') #? if self.activation is not None: outputs = self.activation(outputs) return outputs
def call(self, inputs, training=None): if training is None: training = K.learning_phase() wBar = self._computeWeights(training) # Get output output = math_ops.matmul(inputs, wBar) if self.use_bias: output = K.bias_add(output, self.bias, data_format='channels_last') if self.activation is not None: output = self.activation(output) return output
def call(self, x, mask=None): n, d = x.shape x = K.sum(x, axis=0, keepdims=True) # compute instance-level score x = K.dot(x, self.kernel) if self.use_bias: x = K.bias_add(x, self.bias) # sigmoid out = K.sigmoid(x) return out
def call(self, inputs, **kwargs): """ Method for the forward function of the layer. :param inputs: Input tensor :param kwargs: Additional keyword arguments for the base method :return: A tensor """ gate_outputs = [] final_outputs = [] # add a shared bottom layer (relu layer) # f_{i}(x) = activation(W_{i} * x + b), where activation is ReLU according to the paper # expert_outputs = K.tf.tensordot(a=inputs, b=self.expert_kernels, axes=1) expert_outputs = tf.tensordot(a=inputs, b=self.expert_kernels, axes=1) # Add the bias term to the expert weights if necessary if self.use_expert_bias: expert_outputs = K.bias_add(x=expert_outputs, bias=self.expert_bias) expert_outputs = self.expert_activation(expert_outputs) # g^{k}(x) = activation(W_{gk} * x + b), where activation is softmax according to the paper for index, gate_kernel in enumerate(self.gate_kernels): gate_output = K.dot(x=inputs, y=gate_kernel) # Add the bias term to the gate weights if necessary if self.use_gate_bias: gate_output = K.bias_add(x=gate_output, bias=self.gate_bias[index]) gate_output = self.gate_activation(gate_output) gate_outputs.append(gate_output) # f^{k}(x) = sum_{i=1}^{n}(g^{k}(x)_{i} * f_{i}(x)) for gate_output in gate_outputs: expanded_gate_output = K.expand_dims(gate_output, axis=1) weighted_expert_output = expert_outputs * K.repeat_elements( expanded_gate_output, self.units, axis=1) final_outputs.append(K.sum(weighted_expert_output, axis=2)) return final_outputs
def call(self, inputs, states): h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state inputs_i = inputs inputs_f = inputs inputs_c = inputs inputs_o = inputs inputs_r = inputs x_i = dot(inputs_i, self.kernel_i) x_f = dot(inputs_f, self.kernel_f) x_c = dot(inputs_c, self.kernel_c) x_o = dot(inputs_o, self.kernel_o) x_r = dot(inputs_r, self.kernel_r) if self.use_bias: x_i = bias_add(x_i, self.bias_i) x_f = bias_add(x_f, self.bias_f) x_c = bias_add(x_c, self.bias_c) x_o = bias_add(x_o, self.bias_o) x_r = bias_add(x_r, self.bias_r) h_tm1_i = h_tm1 h_tm1_f = h_tm1 h_tm1_c = h_tm1 h_tm1_o = h_tm1 h_tm1_r = h_tm1 i = self.recurrent_activation(x_i + dot(h_tm1_i, self.recurrent_kernel_i)) f = self.recurrent_activation(x_f + dot(h_tm1_f, self.recurrent_kernel_f)) c = f * c_tm1 + i * self.activation( x_c + dot(h_tm1_c, self.recurrent_kernel_c)) o = self.recurrent_activation(x_o + dot(h_tm1_o, self.recurrent_kernel_o)) h = o * self.activation(c) h = self.easier_activation(dot(h, self.easier_kernel)) identity = self.activation(x_r + dot(h_tm1_r, self.recurrent_kernel_r)) h = add([h, identity]) return h, [h, c]
def call(self, inputs): features = inputs[0] fltr = inputs[1] # Convolution output = ops.dot(features, self.kernel) output = ops.filter_dot(fltr, output) if self.use_bias: output = K.bias_add(output, self.bias) if self.activation is not None: output = self.activation(output) return output
def call(self, x, training = False): deep_out = x for i in range(len(self.kernels)): #x = ks.layers.dot([x, kernel], axes =(-1,-1) ) #x = tf.tensordot(deep_out, self.kernels[i], axes =(-1,0) ) + self.bias[i] deep_out = K.dot(deep_out,self.kernels[i]) deep_out= K.bias_add(deep_out, self.bias[i], data_format='channels_last') if self.activations[i] and self.activations[i] is not None : deep_out= tf.keras.layers.Activation(self.activations[i])(deep_out) else : deep_out= tf.keras.layers.Activation('relu')(deep_out) #x= tf.keras.layers.Dropout(self.dropout_rate)(x, training = training) return deep_out
def call(self, x, mask=None): uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b)) ait = K.dot(uit, self.u) ait = K.squeeze(ait, axis=-1) ait = K.exp(ait) if mask is not None: ait *= K.cast(mask, K.floatx()) ait /= K.cast( K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx()) ait = K.expand_dims(ait) weighted_input = x * ait output = K.sum(weighted_input, axis=1) return output
def call(self, input): out_slices = [] for i in range(self.parts): put = input[:, i] res = K.dot(put, self.kernel) out_slices.append(res) output = tf.stack(out_slices) output = tf.transpose(output, perm=[1, 0, 2]) if self.use_bias: output = K.bias_add(output, self.bias, data_format='channels_last') if self.activation is not None: output = self.activation(output) return output
def decode(self, latent): recon = latent for i in range(len(self.layer_sizes)): if self.dropout > 0: recon = self._apply_dropout(recon) recon = K.dot( recon, K.transpose(self.kernels[len(self.layer_sizes) - i - 1])) if self.use_bias: recon = K.bias_add(recon, self.biases2[i]) if self.activation is not None: recon = self.activation(recon) return recon
def encode(self, inputs): latent = inputs for i in range(len(self.layer_sizes)): if self.dropout > 0: latent = self._apply_dropout(latent) latent = K.dot(latent, self.kernels[i]) if self.use_bias: latent = K.bias_add(latent, self.biases[i]) if self.activation is not None: latent = self.activation(latent) if self.l2_normalize: latent = latent / K.l2_normalize(latent, axis=-1) return latent
def call(self, inputs): x, a = inputs output = K.dot(x, self.kernel_1) output = ops.filter_dot(a, output) skip = K.dot(x, self.kernel_2) output += skip if self.use_bias: output = K.bias_add(output, self.bias) if self.activation is not None: output = self.activation(output) return output
def call(self, inputs, mask=None): x, a = inputs output = K.dot(x, self.kernel) output = ops.modal_dot(a, output) if self.use_bias: output = K.bias_add(output, self.bias) if mask is not None: output *= mask[0] output = self.activation(output) return output
def call(self, inputs): X = inputs[0] # Node features (N x F) A = inputs[1] # Adjacency matrix (N x N) outputs = [] for head in range(self.attn_heads): kernel = self.kernels[head] # W in the paper (F x F') attention_kernel = self.attn_kernels[head] # Attention kernel a in the paper (2F' x 1) # Compute inputs to attention network features = K.dot(X, kernel) # (N x F') # Compute feature combinations # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_2]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j] attn_for_self = K.dot(features, attention_kernel[0]) # (N x 1), [a_1]^T [Wh_i] attn_for_neighs = K.dot(features, attention_kernel[1]) # (N x 1), [a_2]^T [Wh_j] # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]] dense = attn_for_self + K.transpose(attn_for_neighs) # (N x N) via broadcasting # Add nonlinearty dense = LeakyReLU(alpha=0.2)(dense) # Mask values before activation (Vaswani et al., 2017) mask = -10e9 * (1.0 - A) dense += mask # Apply softmax to get attention coefficients dense = K.softmax(dense) # (N x N) # Apply dropout to features and attention coefficients dropout_attn = Dropout(self.dropout_rate)(dense) # (N x N) dropout_feat = Dropout(self.dropout_rate)(features) # (N x F') # Linear combination with neighbors' features node_features = K.dot(dropout_attn, dropout_feat) # (N x F') if self.use_bias: node_features = K.bias_add(node_features, self.biases[head]) # Add output of attention head to final output outputs.append(node_features) # Aggregate the heads' output according to the reduction method if self.attn_heads_reduction == 'concat': output = K.concatenate(outputs) # (N x KF') else: output = K.mean(K.stack(outputs), axis=0) # N x F') output = self.activation(output) return output
def call(self, inputs): input_shapes = nest.map_structure(lambda x: x.shape, inputs) output_shapes = self.compute_output_shape(input_shapes) means, covariances = inputs outputs = [[], []] outputs[0] = self._convolution_op(means, self.kernel) if self.mode == "diag": outputs[1] = self._convolution_op(covariances, K.square(self.kernel)) elif self.mode == "half": cov_shape = covariances.get_shape().as_list() covariances = K.reshape(covariances, [-1] + cov_shape[2:]) outputs[1] = K.reshape( self._convolution_op(covariances, self.kernel), [-1] + output_shapes[1].as_list()[1:], ) elif self.mode == "full": cov_shape = covariances.get_shape().as_list() covariances = K.reshape(covariances, [-1] + cov_shape[self.rank + 2:]) covariances = K.reshape( self._convolution_op(covariances, self.kernel), ([-1] + cov_shape[1:self.rank + 2] + output_shapes[1].as_list()[-self.rank - 1:]), ) covariances = K.permute_dimensions( covariances, ([0] + list(range(self.rank + 2, 2 * self.rank + 3)) + list(range(1, self.rank + 2))), ) covariances = K.reshape(covariances, [-1] + cov_shape[1:self.rank + 2]) covariances = K.reshape( self._convolution_op(covariances, self.kernel), ([-1] + output_shapes[1].as_list()[-self.rank - 1:] + output_shapes[1].as_list()[1:self.rank + 2]), ) outputs[1] = K.permute_dimensions( covariances, ([0] + list(range(self.rank + 2, 2 * self.rank + 3)) + list(range(1, self.rank + 2))), ) if self.use_bias: outputs[0] = K.bias_add(outputs[0], self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs, mode=self.mode) return outputs
def call(self, inputs, **kwargs): """ Args: inputs (list): X (tensor): node feature tensor A (tensor): edge pair tensor E (tensor): edge feature tensor degree (tensor): node degree tensor for GCN attention Returns: output (tensor): results after edge network, attention and aggregation """ # Edge network to transform edge information to message weight X, A, E, degree = inputs N = K.int_shape(X)[1] targets, sources = A[..., -2], A[..., -1] W = self.nn(E) W = tf.reshape(W, [ -1, tf.shape(E)[1], self.attn_heads, self.state_dim, self.state_dim ]) X = tf.tile(X[..., None], [1, 1, 1, self.attn_heads]) X = tf.transpose(X, [0, 1, 3, 2]) # Attention added to the message weight attn_coef = self.attn_func([X, N, targets, sources, degree]) messages = tf.gather(X, sources, batch_dims=1) messages = messages[..., None] messages = tf.matmul(W, messages) messages = messages[..., 0] output = attn_coef * messages num_rows = tf.shape(targets)[0] rows_idx = tf.range(num_rows) segment_ids_per_row = targets + N * tf.expand_dims(rows_idx, axis=1) # Aggregation to summarize neighboring node messages if self.aggr_method == 'max': output = tf.math.unsorted_segment_max(output, segment_ids_per_row, N * num_rows) elif self.aggr_method == 'mean': output = tf.math.unsorted_segment_mean(output, segment_ids_per_row, N * num_rows) elif self.aggr_method == 'sum': output = tf.math.unsorted_segment_sum(output, segment_ids_per_row, N * num_rows) # Output the mean of all attention heads output = tf.reshape(output, [-1, N, self.attn_heads, self.state_dim]) output = tf.reduce_mean(output, axis=-2) output = K.bias_add(output, self.bias) return output
def call(self, inputs): outputs = inputs for i in range(self.layers): outputs = K.dot(outputs, self.weigts[i]) outputs = K.bias_add(outputs, self.biases[i], data_format="channels_last") outputs = self.activation(outputs) theta_b_output = K.dot(outputs, self.theta_b_W) theta_f_output = K.dot(outputs, self.theta_f_W) return theta_b_output, theta_f_output
def call(self, inputs): binary_kernel = binarize(self.kernel, H=self.H) outputs = inputs * binary_kernel if self.use_bias: outputs = K.bias_add(outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation( outputs) #TODO: make sure this is not executing return outputs
def call(self, x): """ ui = tanh(xW+b) a = softmax(uV) o = sum(a*x) :param x: input tensor [batch_size, time_step, feat_len] :return: output tensor [batch_size, feat_len] """ # ui = tanh(xW+b) ui = K.tanh(K.bias_add(K.dot(x, self.W), self.b)) # [B, T, L] # a = softmax(uV) ai = K.softmax(K.dot(ui, self.V), axis=1) # [B, T, 1] o = K.sum(x * ai, axis=1, keepdims=False) return o, ai
def _time_distributed_dense(x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None, training=None): """Apply `y . w + b` for every temporal slice y of x. # Arguments x: input tensor. w: weight matrix. b: optional bias vector. dropout: wether to apply dropout (same dropout mask for every temporal slice of the input). input_dim: integer; optional dimensionality of the input. output_dim: integer; optional dimensionality of the output. timesteps: integer; optional number of timesteps. training: training phase tensor or boolean. # Returns Output tensor. """ if not input_dim: input_dim = K.shape(x)[2] if not timesteps: timesteps = K.shape(x)[1] if not output_dim: output_dim = K.int_shape(w)[1] if dropout is not None and 0. < dropout < 1.: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) # collapse time dimension and batch dimension together x = K.reshape(x, (-1, input_dim)) x = K.dot(x, w) if b is not None: x = K.bias_add(x, b) # reshape to 3D tensor if K.backend() == 'tensorflow': x = K.reshape(x, K.stack([-1, timesteps, output_dim])) x.set_shape([None, None, output_dim]) else: x = K.reshape(x, (-1, timesteps, output_dim)) return x
def call(self,inputs): inputs_real = tf.expand_dims(inputs[:,:,:,0],axis=-1) inputs_imag = tf.expand_dims(inputs[:,:,:,1],axis=-1) outputs_real = tf.math.multiply(inputs_real,self.real_kernel) - tf.math.multiply(inputs_imag,self.imag_kernel) outputs_imag = tf.math.multiply(inputs_real,self.imag_kernel) + tf.math.multiply(inputs_imag,self.real_kernel) outputs = K.concatenate([outputs_real,outputs_imag],axis=-1) if self.use_bias: outputs = K.bias_add(outputs,self.bias,data_format='channels_last') if self.activation is not None: outputs = self.activation(outputs) return outputs
def call(self, inputs): x, a, _ = self.get_inputs(inputs) a = ops.add_self_loops(a) aggregated = self.propagate(x, a) output = K.concatenate([x, aggregated]) output = ops.dot(output, self.kernel) if self.use_bias: output = K.bias_add(output, self.bias) output = K.l2_normalize(output, axis=-1) if self.activation is not None: output = self.activation(output) return output
def call(self, inputs): outputs = K.conv2d(inputs, scale_weights(self.kernel), strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.use_bias: outputs = K.bias_add(outputs, scale_weights(self.bias), data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs): output = self.local_conv3d(inputs, self.kernel, self.kernel_size, self.strides, (self.output_row, self.output_col, self.output_z), self.data_format) if self.use_bias: output = K.bias_add(output, self.bias, data_format=self.data_format) output = self.activation(output) return output