def generator(self, src_enc): G_h = K.bias_add(K.dot(src_enc, self.G_w1), self.G_b1) G_h_relu = tf.nn.relu(G_h) G_log_prob = K.bias_add(K.dot(G_h_relu, self.G_w2), self.G_b2) G_prob = tf.nn.sigmoid(G_log_prob) return G_prob
def step_gru(cell_inputs, cell_state, kernel, recurrent_kernel, input_bias, recurrent_bias): """Step function that will be used by Keras RNN backend.""" h_tm1 = cell_state # inputs projected by all gate matrices at once matrix_x = K.dot(cell_inputs, kernel) matrix_x = K.bias_add(matrix_x, input_bias) x_z, x_r, x_h = array_ops.split(matrix_x, 3, axis=1) # hidden state projected by all gate matrices at once matrix_inner = K.dot(h_tm1, recurrent_kernel) matrix_inner = K.bias_add(matrix_inner, recurrent_bias) recurrent_z, recurrent_r, recurrent_h = array_ops.split(matrix_inner, 3, axis=1) z = nn.sigmoid(x_z + recurrent_z) r = nn.sigmoid(x_r + recurrent_r) hh = nn.tanh(x_h + r * recurrent_h) # previous and candidate state mixed by update gate h = z * h_tm1 + (1 - z) * hh return h, [h]
def call(self, inputs, prev_projection, states, training=None): prev_output = states[0] dp_mask = self.get_dropout_mask_for_cell(inputs, training) rec_dp_mask = self.get_recurrent_dropout_mask_for_cell( prev_output, training) if dp_mask is not None: inputs = inputs * dp_mask output = K.dot(inputs, self.kernel) if self.use_recurrent: if rec_dp_mask is not None: prev_output = prev_output * rec_dp_mask output += K.dot(prev_output, self.recurrent_kernel) if self.use_feedback: if self.projection_activation is not None: prev_projection = self.projection_activation(prev_projection) output += K.dot(prev_projection, self.feedback_kernel) if self.bias is not None: output = K.bias_add(output, self.bias) if self.activation is not None: output = self.activation(output) projection = K.dot(output, self.projection_kernel) if self.projection_bias is not None: projection = K.bias_add(projection, self.projection_bias) return output, projection, [output]
def call(self, inputs, states): last_h = states[0] last_c = states[1] w_i, w_f, w_c, w_o = tf.split(self.w, num_or_size_splits=4, axis=1) b_i, b_f, b_c, b_o = tf.split(self.bias, num_or_size_splits=4, axis=0) # w x x_i = K.dot(inputs, w_i) x_f = K.dot(inputs, w_f) x_c = K.dot(inputs, w_c) x_o = K.dot(inputs, w_o) # w x + b x_i = K.bias_add(x_i, b_i) x_f = K.bias_add(x_f, b_f) x_c = K.bias_add(x_c, b_c) x_o = K.bias_add(x_o, b_o) u_i, u_f, u_c, u_o = tf.split(self.u, num_or_size_splits=4, axis=1) # w x + u * h + x i = self.recurrent_activation(x_i + K.dot(last_h, u_i)) f = self.recurrent_activation(x_f + K.dot(last_h, u_f)) c = (1 - i) * last_c + self.activation(x_c + K.dot(last_h, u_c)) o = self.recurrent_activation(x_o + K.dot(last_h, u_o)) # 计算 h h = o * self.activation(c) return h, (h, c)
def call(self, inputs, **kwargs): gate_outputs = [] final_outputs = [] # f_{i}(x) = activation(W_{i} * x + b), where activation is ReLU according to the paper expert_outputs = tf.tensordot(a=inputs, b=self.expert_kernels, axes=1) # Add the bias term to the expert weights if necessary expert_outputs = K.bias_add(x=expert_outputs, bias=self.expert_bias) expert_outputs = self.expert_activation(expert_outputs) # g^{k}(x) = activation(W_{gk} * x + b), where activation is softmax according to the paper for index, gate_kernel in enumerate(self.gate_kernels): gate_output = K.dot(x=inputs, y=gate_kernel) # Add the bias term to the gate weights if necessary gate_output = K.bias_add(x=gate_output, bias=self.gate_bias[index]) gate_output = self.gate_activation(gate_output) gate_outputs.append(gate_output) # f^{k}(x) = sum_{i=1}^{n}(g^{k}(x)_{i} * f_{i}(x)) for gate_output in gate_outputs: expanded_gate_output = tf.expand_dims(gate_output, axis=1) weighted_expert_output = expert_outputs * K.repeat_elements( expanded_gate_output, self.units, axis=1) final_outputs.append(K.sum(weighted_expert_output, axis=2)) return final_outputs
def call(self, inputs, states, training=None): vh = states[0] dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=2) rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(vh, training, count=2) if 0. < self.dropout < 1.: input1 = inputs * dp_mask[0] input2 = inputs * dp_mask[1] else: input1 = inputs input2 = inputs p11 = K.dot(input1, self.kernel[:, :self.units]) p21 = K.dot(input2, self.kernel[:, self.units:]) if self.use_bias: p11 = K.bias_add(p11, self.bias[:self.units]) p21 = K.bias_add(p21, self.bias[self.units:]) if 0. < self.recurrent_dropout < 1.: vh1 = vh * rec_dp_mask[0] vh2 = vh * rec_dp_mask[1] else: vh1 = vh vh2 = vh v1 = self.recurrent_activation( p11 + K.dot(vh1, self.recurrent_kernel[:, :self.units])) v2 = self.activation(p21 + K.dot(vh2 * v1, self.recurrent_kernel[:, self.units:])) vh = (1 - v1) * vh + v1 * v2 return vh, [vh]
def call(self, inputs, states, training=None): h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=4) rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(h_tm1, training, count=4) if 0 < self.dropout < 1.: inputs_i = inputs * dp_mask[0] inputs_f = inputs * dp_mask[1] inputs_c = inputs * dp_mask[2] inputs_o = inputs * dp_mask[3] else: inputs_i = inputs inputs_f = inputs inputs_c = inputs inputs_o = inputs # k_i, k_f, k_c, k_o = array_ops.split( # self.kernel, num_or_size_splits=4, axis=1) x_i = K.dot(inputs_i, self.kernel_i) x_f = K.dot(inputs_f, self.kernel_f) x_c = K.dot(inputs_c, self.kernel_c) x_o = K.dot(inputs_o, self.kernel_o) if self.use_bias: # b_i, b_f, b_c, b_o = array_ops.split( # self.bias, num_or_size_splits=4, axis=0) x_i = K.bias_add(x_i, self.bias_i) x_f = K.bias_add(x_f, self.bias_f) x_c = K.bias_add(x_c, self.bias_c) x_o = K.bias_add(x_o, self.bias_o) if 0 < self.recurrent_dropout < 1.: h_tm1_i = h_tm1 * rec_dp_mask[0] h_tm1_f = h_tm1 * rec_dp_mask[1] h_tm1_c = h_tm1 * rec_dp_mask[2] h_tm1_o = h_tm1 * rec_dp_mask[3] else: h_tm1_i = h_tm1 h_tm1_f = h_tm1 h_tm1_c = h_tm1 h_tm1_o = h_tm1 # x = (x_i, x_f, x_c, x_o) # h_tm1 = (h_tm1_i, h_tm1_f, h_tm1_c, h_tm1_o) # c, o = self._compute_carry_and_output(x, h_tm1, c_tm1) i = self.recurrent_activation(x_i + K.dot(h_tm1_i, self.recurrent_kernel_i)) f = self.recurrent_activation(x_f + K.dot(h_tm1_f, self.recurrent_kernel_f)) c = f * c_tm1 + i * self.activation( x_c + K.dot(h_tm1_c, self.recurrent_kernel_c)) o = self.recurrent_activation(x_o + K.dot(h_tm1_o, self.recurrent_kernel_o)) h = o * self.activation(c) return h, [h, c]
def call(self, inputs): outputs = [] if self.data_format == 'channels_first': count = 0 for c in range(self.input_spec.axes[1]): input = inputs[:, c:c+1, ...] for d in range(self.depth_multiplier): output = K.conv3d(input , self.depthwise_kernels[count] , padding=self.padding , data_format=self.data_format , dilation_rate=self.dilation_rate) if self.use_bias: output = K.bias_add(output , self.biases[count] , data_format=self.data_format) outputs.append(output) count +=1 outputs = K.concatenate(outputs, axis=1) else: count = 0 for c in range(self.input_spec.axes[4]): input = inputs[:, c:c + 1, ...] for d in range(self.depth_multiplier): output = K.conv3d(input , self.depthwise_kernels[count] , padding=self.padding , data_format=self.data_format , dilation_rate=self.dilation_rate) if self.use_bias: output = K.bias_add(output , self.biases[count] , data_format=self.data_format) outputs.append(output) count += 1 outputs = K.concatenate(outputs, axis=4) outputs = K.conv3d(outputs , self.pointwise_kernel , padding=self.padding , data_format=self.data_format , dilation_rate=self.dilation_rate) if self.activation is not None: return self.activation(outputs) return outputs
def _preprocess_symbolic_input(x, data_format, mode): """Preprocesses a tensor encoding a batch of images. Arguments: x: Input tensor, 3D or 4D. data_format: Data format of the image tensor. mode: One of "caffe", "tf" or "torch". - caffe: will convert the images from RGB to BGR, then will zero-center each color channel with respect to the ImageNet dataset, without scaling. - tf: will scale pixels between -1 and 1, sample-wise. - torch: will scale pixels between 0 and 1 and then will normalize each channel with respect to the ImageNet dataset. Returns: Preprocessed tensor. """ global _IMAGENET_MEAN if mode == 'tf': x /= 127.5 x -= 1. return x if mode == 'torch': x /= 255. mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] else: if data_format == 'channels_first': # 'RGB'->'BGR' if K.ndim(x) == 3: x = x[::-1, ...] else: x = x[:, ::-1, ...] else: # 'RGB'->'BGR' x = x[..., ::-1] mean = [103.939, 116.779, 123.68] std = None if _IMAGENET_MEAN is None: _IMAGENET_MEAN = constant_op.constant(-np.array(mean), dtype=K.floatx()) # Zero-center by mean pixel if K.dtype(x) != K.dtype(_IMAGENET_MEAN): x = K.bias_add(x, math_ops.cast(_IMAGENET_MEAN, K.dtype(x)), data_format) else: x = K.bias_add(x, _IMAGENET_MEAN, data_format) if std is not None: x /= std return x
def _preprocess_symbolic_input(x, data_format, mode): """Preprocesses a tensor encoding a batch of images. Arguments: x: Input tensor, 3D or 4D. data_format: Data format of the image tensor. mode: One of "caffe", "tf" or "torch". - caffe: will convert the images from RGB to BGR, then will zero-center each color channel with respect to the ImageNet dataset, without scaling. - tf: will scale pixels between -1 and 1, sample-wise. - torch: will scale pixels between 0 and 1 and then will normalize each channel with respect to the ImageNet dataset. Returns: Preprocessed tensor. """ global _IMAGENET_MEAN if mode == 'tf': x /= 127.5 x -= 1. return x if mode == 'torch': x /= 255. mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] else: if data_format == 'channels_first': # 'RGB'->'BGR' if K.ndim(x) == 3: x = x[::-1, ...] else: x = x[:, ::-1, ...] else: # 'RGB'->'BGR' x = x[..., ::-1] mean = [103.939, 116.779, 123.68] std = None if _IMAGENET_MEAN is None: _IMAGENET_MEAN = constant_op.constant(-np.array(mean), dtype=K.floatx()) # Zero-center by mean pixel if K.dtype(x) != K.dtype(_IMAGENET_MEAN): x = K.bias_add(x, math_ops.cast(_IMAGENET_MEAN, K.dtype(x)), data_format) else: x = K.bias_add(x, _IMAGENET_MEAN, data_format) if std is not None: x /= std return x
def call(self, inputs, **kwargs): input_shape = K.int_shape(inputs) d_model = input_shape[-1] step1 = self.activation( K.bias_add(K.dot(K.reshape(inputs, (-1, d_model)), self.transition_weights['weights1']), self.transition_weights['biases1'], data_format='channels_last')) step2 = K.bias_add(K.dot(step1, self.transition_weights['weights2']), self.transition_weights['biases2'], data_format='channels_last') result = K.reshape(step2, (-1, ) + input_shape[-2:]) return result
def call(self, inputs): h = K.bias_add(K.dot(inputs, self.fc_kernel), self.fc_bias) relu_h = K.tanh(h) self.mu = K.bias_add(K.dot(relu_h, self.mu_kernel), self.mu_bias) self.logvar = K.bias_add(K.dot(relu_h, self.sigma_kernel), self.sigma_bias) h_z = self.sample_z(self.mu, self.logvar) z = K.bias_add(K.dot(h_z, self.trans_kernel), self.trans_bias) z = K.tanh(z) return z
def _compute_carry_and_output(self, x, h_tm1, c_tm1, b): """Computes carry and output using split kernels.""" x_i, x_f, x_c, x_o = x h_tm1_i, h_tm1_f, h_tm1_c, h_tm1_o = h_tm1 b_i2, b_f2, b_c2, b_o2 = b i = self.recurrent_activation( x_i + K.bias_add(K.dot(h_tm1_i, K.transpose(self.recurrent_kernel[:, :self.units])), b_i2)) f = self.recurrent_activation(x_f + K.bias_add(K.dot( h_tm1_f, K.transpose(self.recurrent_kernel[:, self.units:self.units * 2])), b_f2)) c = f * c_tm1 + i * self.activation(x_c + K.bias_add(K.dot( h_tm1_c, K.transpose(self.recurrent_kernel[:, self.units * 2:self.units * 3])), b_c2)) o = self.recurrent_activation( x_o + K.bias_add(K.dot(h_tm1_o, K.transpose(self.recurrent_kernel[:, self.units * 3:])), b_o2)) return c, o
def call(self, inputs, **kwargs): main_input, embedding_matrix = inputs input_shape_tensor = K.shape(main_input) last_input_dim = K.int_shape(main_input)[-1] emb_input_dim, emb_output_dim = K.int_shape(embedding_matrix) projected = K.dot(K.reshape(main_input, (-1, last_input_dim)), self.embedding_weights['projection']) if self.add_biases: projected = K.bias_add(projected, self.embedding_weights['biases'], data_format='channels_last') if 0 < self.projection_dropout < 1: projected = K.in_train_phase( lambda: K.dropout(projected, self.projection_dropout), projected, training=kwargs.get('training')) attention = K.dot(projected, K.transpose(embedding_matrix)) if self.scaled_attention: # scaled dot-product attention, described in # "Attention is all you need" (https://arxiv.org/abs/1706.03762) sqrt_d = K.constant(math.sqrt(emb_output_dim), dtype=K.floatx()) attention = attention / sqrt_d result = K.reshape( self.activation(attention), (input_shape_tensor[0], input_shape_tensor[1], emb_input_dim)) return result
def call(self, inputs): X = inputs[0] # Node features (B x N x F) A = inputs[1] # Adjacency matrix (B x N x N) X_dims = X.get_shape().as_list() B, N, F = X_dims merged = tf.matmul(K.dot(X, self.self_kernel), tf.transpose(X, (0, 2, 1))) attention = tf.nn.tanh(merged) attention = K.reshape(attention, (-1, N, N)) if self.use_bias: attention = K.bias_add(attention, self.bias) mask = -10e9 * (1.0 - A) attention += mask attention = tf.nn.softmax(attention) output = tf.matmul(attention, X) if self.return_attention: return (output, attention) else: return output
def call(self, inputs, training=None): def _l2normalize(v, eps=1e-12): return v / (K.sum(v**2)**0.5 + eps) def power_iteration(W, u): _u = u _v = _l2normalize(K.dot(_u, K.transpose(W))) _u = _l2normalize(K.dot(_v, W)) return _u, _v if self.spectral_normalization: W_shape = self.kernel.shape.as_list() # Flatten the Tensor W_reshaped = K.reshape(self.kernel, [-1, W_shape[-1]]) _u, _v = power_iteration(W_reshaped, self.u) # Calculate Sigma sigma = K.dot(_v, W_reshaped) sigma = K.dot(sigma, K.transpose(_u)) # normalize it W_bar = W_reshaped / sigma # reshape weight tensor if training in {0, False}: W_bar = K.reshape(W_bar, W_shape) else: with tf.control_dependencies([self.u.assign(_u)]): W_bar = K.reshape(W_bar, W_shape) # update weitht self.kernel = W_bar if self.rank == 1: outputs = K.conv1d(inputs, self.kernel, strides=self.strides[0], padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate[0]) if self.rank == 2: outputs = K.conv2d(inputs, self.kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.rank == 3: outputs = K.conv3d(inputs, self.kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if self.use_bias: outputs = K.bias_add(outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs, params=None): if params[self.name + '/depthwise_kernel:0'] is None: return super(layers.DepthwiseConv2D, self).call(inputs) else: depthwise_kernel = params.get(self.name + '/depthwise_kernel:0') bias = params.get(self.name + '/bias:0') outputs = backend.depthwise_conv2d( inputs, depthwise_kernel, strides=self.strides, padding=self.padding, dilation_rate=self.dilation_rate, data_format=self.data_format) if self.use_bias: outputs = backend.bias_add( outputs, bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs): output = K.dot(inputs, self.kernel * self.connections) if self.use_bias: output = K.bias_add(output, self.bias) if self.activation is not None: output = self.activation(output) return output
def call(self, inputs): if self.implementation == 1: output = K.local_conv(inputs, self.kernel, self.kernel_size, self.strides, (self.output_row, self.output_col), self.data_format) elif self.implementation == 2: output = local_conv_matmul(inputs, self.kernel, self.kernel_mask, self.compute_output_shape(inputs.shape)) elif self.implementation == 3: output = local_conv_sparse_matmul( inputs, self.kernel, self.kernel_idxs, self.kernel_shape, self.compute_output_shape(inputs.shape)) else: raise ValueError('Unrecognized implementation mode: %d.' % self.implementation) if self.use_bias: output = K.bias_add(output, self.bias, data_format=self.data_format) output = self.activation(output) return output
def call(self, inputs): backend = K.backend() if backend == "theano": Exception( 'This version of DeepCell only works with the tensorflow backend' ) if self.data_format == 'channels_first': output = tf.tensordot(inputs, self.kernel, axes=[[1], [0]]) output = tf.transpose(output, perm=[0, 3, 1, 2]) # output = K.dot(inputs, self.kernel) elif self.data_format == 'channels_last': output = tf.tensordot(inputs, self.kernel, axes=[[3], [0]]) if self.use_bias: output = K.bias_add(output, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(output) return output
def call(self, inputs): output = K.local_conv1d(inputs, self.kernel, self.kernel_size, self.strides) if self.use_bias: output = K.bias_add(output, self.bias) if self.activation is not None: output = self.activation(output) return output
def call(self, inputs): if self.padding == 'causal': inputs = array_ops.pad(inputs, self._compute_causal_padding()) if self.data_format == 'channels_last': spatial_start_dim = 1 else: spatial_start_dim = 2 # Explicitly broadcast inputs and kernels to 4D. strides = self.strides * 2 inputs = array_ops.expand_dims(inputs, spatial_start_dim) depthwise_kernel = array_ops.expand_dims(self.depthwise_kernel, 0) dilation_rate = (1, ) + self.dilation_rate outputs = backend.depthwise_conv2d(inputs, depthwise_kernel, strides=strides, padding=self.padding, dilation_rate=dilation_rate, data_format=self.data_format) if self.use_bias: outputs = backend.bias_add(outputs, self.bias, data_format=self.data_format) outputs = array_ops.squeeze(outputs, [spatial_start_dim]) if self.activation is not None: return self.activation(outputs) return outputs
def _call_one_layer(self, inputs, flatten_memory, training, ws): dp_mask = self.get_dropout_mask_for_cell( inputs, training, count=1) rec_dp_mask = self.get_recurrent_dropout_mask_for_cell( flatten_memory, training, count=1) if 0 < self.dropout < 1: inputs = inputs * dp_mask[0] if 0 < self.recurrent_dropout < 1: flatten_memory = flatten_memory * rec_dp_mask[0] memory = array_ops.reshape( flatten_memory, shape=[-1, self.num_memory_slots, self.units]) input_gate, forget_gate = self._input_and_forget_gates(inputs, memory, ws) hs, new_memory = self._attend_over_memory(inputs, memory, ws) next_memory = input_gate * new_memory + forget_gate * memory flatten_next_memory = array_ops.reshape( next_memory, shape=[-1, self.num_memory_slots * self.units]) mus_and_log_sigmas = K.dot(hs, ws["random_kernel"]) mus_and_log_sigmas = K.bias_add(mus_and_log_sigmas, ws["random_bias"]) mus, log_sigmas = array_ops.split(mus_and_log_sigmas, 2, axis=-1) sigmas = K.log(1.0 + K.exp(log_sigmas + self.sigma_bias)) zs = K.random_normal(shape=K.shape(mus)) * sigmas + mus return zs, mus, sigmas, hs, flatten_next_memory
def call(self, inputs, training=None): def _l2normalize(v, eps=1e-12): return v / (K.sum(v ** 2) ** 0.5 + eps) def power_iteration(W, u): _u = u _v = _l2normalize(K.dot(_u, K.transpose(W))) _u = _l2normalize(K.dot(_v, W)) return _u, _v W_shape = self.kernel.shape.as_list() #Flatten the Tensor W_reshaped = K.reshape(self.kernel, [-1, W_shape[-1]]) _u, _v = power_iteration(W_reshaped, self.u) #Calculate Sigma sigma=K.dot(_v, W_reshaped) sigma=K.dot(sigma, K.transpose(_u)) #normalize it W_bar = W_reshaped / sigma #reshape weight tensor if training in {0, False}: W_bar = K.reshape(W_bar, W_shape) else: with tf.control_dependencies([self.u.assign(_u)]): W_bar = K.reshape(W_bar, W_shape) output = K.dot(inputs, W_bar) if self.use_bias: output = K.bias_add(output, self.bias, data_format='channels_last') if self.activation is not None: output = self.activation(output) return output
def call(self, inputs, prev_states): output_fb = self.prev_states[0] recur_output = self.prev_states[1] if self.cell.use_clock is False: input = K.dot(self.inputs * self.cell.in_dropout_mask, self.cell.kern_1) else: input = K.dot(self.inputs * self.cell.in_dropout_mask * self.cell.clock_kernel, self.cell.kern_1) if self.cell.use_out_fb is not False: x = K.dot(_pad(self.cell.out_fb_kern * output_fb, (self.cell.in_row, self.cell.in_col)), self.inputs) input = K.bias_add(x, input) if self.cell.use_recur is True: reservoir_output_1 = recur_output * self.cell.recur_dropout_mask reservoir_output_2 = K.dot(input, self.cell.kern_2) reservoir_output = K.bias_add(kern_output_1 kern_output_2) else: reservoir_output = K.dot(input, self.cell.kern_2) output = K.dot(reservoir_output, kern_3) return output, [output, reservoir_output]
def call(self, inputs): X = inputs[0] # Node features (N x F) A = inputs[1] # Adjacency matrix (N x N) outputs = [] for head in range(self.attn_heads): kernel = self.kernels[head] # W in the paper (F x F") attention_kernel = self.attn_kernels[ head] # Attention kernel a in the paper (2F" x 1) # Compute inputs to attention network features = K.dot(X, kernel) # (N x F") # Compute feature combinations # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_2]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j] attn_for_self = K.dot( features, attention_kernel[0]) # (N x 1), [a_1]^T [Wh_i] attn_for_neighs = K.dot( features, attention_kernel[1]) # (N x 1), [a_2]^T [Wh_j] # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]] dense = attn_for_self + K.transpose( attn_for_neighs) # (N x N) via broadcasting # Add nonlinearty dense = LeakyReLU(alpha=0.2)(dense) # Mask values before activation (Vaswani et al., 2017) mask = -10e9 * (1.0 - A) dense += mask # Apply softmax to get attention coefficients dense = K.softmax(dense) # (N x N) # Apply dropout to features and attention coefficients dropout_attn = Dropout(self.dropout_rate)(dense) # (N x N) dropout_feat = Dropout(self.dropout_rate)(features) # (N x F") # Linear combination with neighbors" features node_features = K.dot(dropout_attn, dropout_feat) # (N x F") if self.use_bias: node_features = K.bias_add(node_features, self.biases[head]) if self.attn_heads_reduction == "concat": # If "concat", compute the activation here (Eq. 5) node_features = self.activation(node_features) # Add output of attention head to final output outputs.append(node_features) # Aggregate the heads" output according to the reduction method if self.attn_heads_reduction == "concat": output = K.concatenate(outputs) # (N x KF") else: output = K.mean(K.stack(outputs), axis=0) # N x F") output = self.activation(output) return output
def call(self, inputs): X = inputs[0] # Node features (B x N x F) A = inputs[1] # Adjacency matrix (B x N x N) X_dims = X.get_shape().as_list() B, N, F = X_dims outputs = [] attentions = [] for head in range(self.attn_heads): # W in the paper (F x F") kernel = self.kernels[head] # Compute inputs to attention network features = K.dot(X, kernel) # (B x N x F") dropout_feat = Dropout(self.dropout_rate)(features) # (B x N x F") neighbor_kernel = self.neighbor_kernels[head] attn_kernel = self.attn_kernels[head] neighbor_features = K.dot(X, neighbor_kernel) dropout_neighbor = Dropout(self.dropout_rate)(neighbor_features) merged = tf.matmul(K.dot(dropout_feat, attn_kernel), tf.transpose(dropout_neighbor, (0, 2, 1))) attention = tf.nn.tanh(merged) attention = K.reshape(attention, (-1, N, N)) mask = -10e9 * (1.0 - A) attention += mask attention = tf.nn.softmax(attention) dropout_attn = Dropout(self.dropout_rate)(attention) node_features = tf.matmul(dropout_attn, dropout_feat) if self.use_bias: node_features = K.bias_add(node_features, self.biases[head]) if self.return_attention: attentions.append(attention) # Add output of attention head to final output outputs.append(node_features) # Aggregate the heads" output according to the reduction method if self.attn_heads_reduction == "concat": output = K.concatenate(outputs, axis=-1) # (B x N x KF") else: output = K.mean(K.stack(outputs), axis=0) # (B x N x F") # If "average", compute the activation here (Eq. 6) output = self.activation(output) if self.return_attention: attentions = K.stack(attentions, axis=1) return (output, attentions) else: return output
def call(self, inputs): binary_kernel = binarize(self.kernel, H=self.H) output = K.dot(inputs, binary_kernel) if self.use_bias: output = K.bias_add(output, self.bias) if self.activation is not None: output = self.activation(output) return output
def call(self, inputs): input_shape = K.shape(inputs) batch_size = input_shape[0] if self.data_format == 'channels_first': #? h_axis, w_axis = 2, 3 else: h_axis, w_axis = 1, 2 height, width = input_shape[h_axis], input_shape[w_axis] kernel_h, kernel_w = self.kernel_size stride_h, stride_w = self.strides if self.output_padding is None: out_pad_h = out_pad_w = None else: out_pad_h, out_pad_w = self.output_padding # Infer the dynamic output shape: out_height = conv_utils.deconv_output_length(height , kernel_h , self.padding , output_padding=out_pad_h , stride=stride_h , dilation=self.dilation_rate[0]) out_width = conv_utils.deconv_output_length(width , kernel_w , self.padding , output_padding=out_pad_w , stride=stride_w , dilation=self.dilation_rate[1]) if self.data_format == 'channels_first': output_shape = (batch_size, self.filters, out_height, out_width) else: output_shape = (batch_size, out_height, out_width, self.filters) scaled_kernel = self.kernel * self.runtime_coeff kernel = Ke.transpose(scaled_kernel,[0, 1, 3, 2]) #? kernel = Ke.pad(kernel , [[1,1], [1,1], [0,0], [0,0]]) fused_kernel = Ke.add_n([kernel[1:, 1:] , kernel[:-1, 1:] , kernel[1:, :-1] , kernel[:-1, :-1]]) #? outputs = K.conv2d_transpose(inputs , fused_kernel , output_shape , self.strides , padding=self.padding , data_format=self.data_format , dilation_rate=self.dilation_rate) if self.use_bias: outputs = K.bias_add(outputs , self.bias , data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs): output = K.local_conv(inputs, self.kernel, self.kernel_size, self.strides, (self.output_row, self.output_col), self.data_format) if self.use_bias: output = K.bias_add(output, self.bias, data_format=self.data_format) output = self.activation(output) return output
def input_conv(self, x, w, b=None, padding='valid'): conv_out = backend.conv2d(x, w, strides=self.strides, padding=padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if b is not None: conv_out = backend.bias_add(conv_out, b, data_format=self.data_format) return conv_out
def input_conv_u(self, x, w, b=None, padding='same'): conv_out = K.conv2d(x, w, strides=self.strides, padding=padding, data_format='channels_last', dilation_rate=self.dilation_rate) if b is not None: conv_out = K.bias_add(conv_out, b, data_format='channels_last') return conv_out
def input_conv(self, x, w, b=None, padding='valid'): conv_out = K.conv2d(x, w, strides=self.strides, padding=padding, data_format=self.data_format, dilation_rate=self.dilation_rate) if b is not None: conv_out = K.bias_add(conv_out, b, data_format=self.data_format) return conv_out
def step(cell_inputs, cell_states): """Step function that will be used by Keras RNN backend.""" h_tm1 = cell_states[0] # inputs projected by all gate matrices at once matrix_x = K.dot(cell_inputs, kernel) matrix_x = K.bias_add(matrix_x, input_bias) x_z, x_r, x_h = array_ops.split(matrix_x, 3, axis=1) # hidden state projected by all gate matrices at once matrix_inner = K.dot(h_tm1, recurrent_kernel) matrix_inner = K.bias_add(matrix_inner, recurrent_bias) recurrent_z, recurrent_r, recurrent_h = array_ops.split(matrix_inner, 3, axis=1) z = recurrent_activation(x_z + recurrent_z) r = recurrent_activation(x_r + recurrent_r) hh = activation(x_h + r * recurrent_h) # previous and candidate state mixed by update gate h = z * h_tm1 + (1 - z) * hh return h, [h]
def step(cell_inputs, cell_states): """Step function that will be used by Keras RNN backend.""" h_tm1 = cell_states[0] # previous memory state c_tm1 = cell_states[1] # previous carry state z = K.dot(cell_inputs, kernel) z += K.dot(h_tm1, recurrent_kernel) z = K.bias_add(z, bias) z0, z1, z2, z3 = array_ops.split(z, 4, axis=1) i = recurrent_activation(z0) f = recurrent_activation(z1) c = f * c_tm1 + i * activation(z2) o = recurrent_activation(z3) h = o * activation(c) return h, [h, c]
def call(self, inputs): if self.implementation == 1: output = K.local_conv(inputs, self.kernel, self.kernel_size, self.strides, (self.output_length,), self.data_format) elif self.implementation == 2: output = local_conv_matmul(inputs, self.kernel, self.kernel_mask, self.compute_output_shape(inputs.shape)) else: raise ValueError('Unrecognized implementation mode: %d.' % self.implementation) if self.use_bias: output = K.bias_add(output, self.bias, data_format=self.data_format) output = self.activation(output) return output
def step(cell_inputs, cell_states): h_tm1 = cell_states[0] # previous memory state c_tm1 = cell_states[1] # previous carry state # Only use the second half of the bias weights. _, real_bias = array_ops.split(bias, 2) z = K.dot(cell_inputs, kernel) z += K.dot(h_tm1, recurrent_kernel) z = K.bias_add(z, real_bias) z0 = z[:, :units] z1 = z[:, units:2 * units] z2 = z[:, 2 * units:3 * units] z3 = z[:, 3 * units:] i = recurrent_activation(z0) f = recurrent_activation(z1) c = f * c_tm1 + i * activation(z2) o = recurrent_activation(z3) h = o * activation(c) return h, [h, c]