def dot_product(x, kernel): """ Wrapper for dot product operation, in order to be compatible with both Theano and Tensorflow Args: x (): input kernel (): weights Returns: """ if K.backend() == 'tensorflow': return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1) else: return K.dot(x, kernel)
def _call_multiplicative_emission(self, inputs): # e_{t, t'} = x_t^T W_a x_{t'} + b_a e = K.batch_dot(K.dot(inputs, self.Wa), K.permute_dimensions(inputs, (0, 2, 1))) if self.use_attention_bias: e += self.ba[0] return e
def customLoss(yTrue, yPred): target = yTrue # output = yPred yPred /= tf.reduce_sum(yPred, reduction_indices=len(yPred.get_shape()) - 1, keep_dims=True) # manual computation of crossentropy epsilon = K._to_tensor(tf.keras.backend.epsilon(), yPred.dtype.base_dtype) yPred = tf.clip_by_value(yPred, epsilon, 1. - epsilon) yPred = tf.log(yPred) ######apply weights here############### mask = K.cast(K.expand_dims(weights, axis=-1), dtype='float32') tensor_shape = yPred.get_shape() # x = tf.add(x, tf.constant(1, shape=x.shape)) yPred_stack = [] for i in range(tensor_shape[1]): mask_i = K.cast(K.expand_dims(mask[i], axis=-1), dtype='float32') yPred_i = K.cast(K.expand_dims(yPred[:, i], axis=-1), dtype='float32') yPred_stack.append(K.dot(yPred_i, mask_i)) output = tf.reshape(tf.stack(yPred_stack, axis=1, name='stack'), [-1, tensor_shape[1]]) return -tf.reduce_sum(target * output, reduction_indices=len(output.get_shape()) - 1)
def get_initial_state(self, x): input_shape = self.input_spec[0].shape init_nb_row = input_shape[self.row_axis] init_nb_col = input_shape[self.column_axis] base_initial_state = K.zeros_like( x) # (samples, timesteps) + image_shape non_channel_axis = -1 if self.data_format == 'channels_first' else -2 for _ in range(2): base_initial_state = K.sum(base_initial_state, axis=non_channel_axis) base_initial_state = K.sum(base_initial_state, axis=1) # (samples, nb_channels) initial_states = [] states_to_pass = ['r', 'c', 'e'] nlayers_to_pass = {u: self.nb_layers for u in states_to_pass} if self.extrap_start_time is not None: states_to_pass.append( 'ahat' ) # pass prediction in states so can use as actual for t+1 when extrapolating nlayers_to_pass['ahat'] = 1 for u in states_to_pass: for l in range(nlayers_to_pass[u]): ds_factor = 2**l nb_row = init_nb_row // ds_factor nb_col = init_nb_col // ds_factor if u in ['r', 'c']: stack_size = self.R_stack_sizes[l] elif u == 'e': stack_size = 2 * self.stack_sizes[l] elif u == 'ahat': stack_size = self.stack_sizes[l] output_size = stack_size * nb_row * nb_col # flattened size reducer = K.zeros((input_shape[self.channel_axis], output_size)) # (nb_channels, output_size) initial_state = K.dot(base_initial_state, reducer) # (samples, output_size) if self.data_format == 'channels_first': output_shp = (-1, stack_size, nb_row, nb_col) else: output_shp = (-1, nb_row, nb_col, stack_size) initial_state = K.reshape(initial_state, output_shp) initial_states += [initial_state] if K._BACKEND == 'theano': from theano import tensor as T # There is a known issue in the Theano scan op when dealing with inputs whose shape is 1 along a dimension. # In our case, this is a problem when training on grayscale images, and the below line fixes it. initial_states = [ T.unbroadcast(init_state, 0, 1) for init_state in initial_states ] if self.extrap_start_time is not None: initial_states += [ K.variable(0, int if K.backend() != 'tensorflow' else 'int32') ] # the last state will correspond to the current timestep return initial_states
def gram_matrix(x): assert K.ndim(x) == 3 if K.image_dim_ordering() == 'th': features = K.batch_flatten(x) else: features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1))) gram = K.dot(features - 1, K.transpose(features - 1)) return gram
def _call_additive_emission(self, inputs): input_shape = K.shape(inputs) batch_size, input_len = input_shape[0], input_shape[1] # h_{t, t'} = \tanh(x_t^T W_t + x_{t'}^T W_x + b_h) q = K.expand_dims(K.dot(inputs, self.Wt), 2) k = K.expand_dims(K.dot(inputs, self.Wx), 1) if self.use_additive_bias: h = K.tanh(q + k + self.bh) else: h = K.tanh(q + k) # e_{t, t'} = W_a h_{t, t'} + b_a if self.use_attention_bias: e = K.reshape( K.dot(h, self.Wa) + self.ba, (batch_size, input_len, input_len)) else: e = K.reshape(K.dot(h, self.Wa), (batch_size, input_len, input_len)) return e
def call(self, inputs): output = K.dot(inputs, self.kernel) if self.scaler is not None: output = tf.multiply(output, self.scaler) if self.use_bias: output = K.bias_add(output, self.bias) if self.activation is not None: output = self.activation(output) return output
def eval_loss(y_true, y_pred): target = y_true ######output times weights here############### mask = K.cast(K.expand_dims(weights, axis=-1), dtype='float32') tensor_shape = y_pred.get_shape() # x = tf.add(x, tf.constant(1, shape=x.shape)) yPred_stack = [] for i in range(tensor_shape[1]): mask_i = K.cast(K.expand_dims(mask[i], axis=-1), dtype='float32') yPred_i = K.cast(K.expand_dims(y_pred[:, i], axis=-1), dtype='float32') yPred_stack.append(K.dot(yPred_i, mask_i)) output = tf.reshape(tf.stack(yPred_stack, axis=1, name='stack'), [-1, tensor_shape[1]]) return y_pred[0, 7]
def call(self, inputs, states): prev_output = states[0] h = K.dot(inputs, self.kernel) print('hidden', h) output = h + K.dot(prev_output, self.recurrent_kernel) return output, [output]
def gram_matrix(x): features = KTF.batch_flatten(KTF.permute_dimensions(x, (2, 0, 1))) gram = KTF.dot(features, KTF.transpose(features)) return gram
def call(self, x): return K.dot(x, self.kernel)