def call(self, inputs): encoder_ouputs, decoder_outputs = inputs U_a_dot_h = K.dot(encoder_ouputs, self.U_a) def _energy_step(inputs, states): """ Computing S . Wa where S=[s0, s1, ..., si]""" """ Computing hj . Ua """ W_a_dot_s = K.dot(inputs, self.W_a) W_a_dot_s = K.expand_dims(W_a_dot_s, 1) """ tanh(S . Wa + hj . Ua) """ Ws_plus_Uh = K.tanh(U_a_dot_h + W_a_dot_s) """ softmax(va . tanh(S . Wa + hj . Ua)) """ e_i = K.dot(Ws_plus_Uh, self.V_a) e_i = K.squeeze(e_i, axis=-1) e_i = K.softmax(e_i) return e_i, [e_i] def _context_step(inputs, states): c_i = K.sum(encoder_ouputs * K.expand_dims(inputs, -1), axis=1) return c_i, [c_i] def _initial_state(inputs, size): return K.zeros((K.shape(inputs)[0], size)) state_e = _initial_state(encoder_ouputs, self.time_steps) last_out, e_outputs, _ = K.rnn(_energy_step, decoder_outputs, [state_e]) state_c = _initial_state(encoder_ouputs, self.encoder_dim) last_out, c_outputs, _ = K.rnn(_context_step, e_outputs, [state_c]) return [c_outputs, e_outputs]
def call(self, inputs): s = K.zeros((K.shape(inputs)[0],self.units)) init_states = [s,s,s,s,s,s,s,s] outputs = K.rnn(self.step_do, inputs, init_states)[1] init_states2 = [s,s,s,s,s,s,s,s] input2 = K.reverse(inputs,axes=1) outputs2 = K.rnn(self.step_do, input2, init_states2)[1] outputs2 = K.reverse(outputs2,axes=1) outputs = (K.concatenate([outputs,outputs2])) if self.intra_attention: self.attention1_1 = self.attention1[:2*self.units,:] self.attention1_2 = self.attention1[2*self.units:,:] for i in range(inputs.shape[1]): step_in = inputs[:,i,:] h = outputs[:,i,:] h_atten=K.relu(K.dot(h,self.attention1_1) +0.0*self.biase1 ) ##################0 h_atten=(K.dot(h_atten,self.attention2)) h_b=K.relu(K.dot(step_in,self.attention1_2)+0.0*self.biase2) ##################1 h_b=(K.dot(h_b,self.attention2_2)) h_atten = K.tanh(1*h_atten*h + 1*h_b) if i ==0: output_atten = h_atten else: output_atten = K.concatenate([output_atten,h_atten]) outputs = Reshape((inputs.shape[1],2*self.units))(output_atten) return outputs
def call(self, x, mask=None): # input shape: (nb_samples, time (padded with zeros), input_dim) # note that the .build() method of subclasses MUST define # self.input_spec with a complete input shape. input_shape = self.input_spec[0].shape if K._BACKEND == 'tensorflow': if not input_shape[1]: raise Exception('When using TensorFlow, you should define ' 'explicitly the number of timesteps of ' 'your sequences.\n' 'If your first layer is an Embedding, ' 'make sure to pass it an "input_length" ' 'argument. Otherwise, make sure ' 'the first layer has ' 'an "input_shape" or "batch_input_shape" ' 'argument, including the time axis. ' 'Found input shape at layer ' + self.name + ': ' + str(input_shape)) if self.stateful: initial_states = self.states else: initial_states = self.get_initial_states(x) constants = self.get_constants(x) preprocessed_input = self.preprocess_input(x) last_output, outputs_0, states = K.rnn(self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=input_shape[1]) timer = K.zeros((2, self.output_length, 2)) last_output, outputs, states = K.rnn(self.dream, timer, states, go_backwards=self.go_backwards, mask=mask, constants=constants, input_length=self.output_length, unroll=self.unroll) last_output = K.dot(last_output, self.V) + self.ext_b outputs = K.concatenate([outputs_0, outputs], axis=1) outputs = K.dot(K.reshape(outputs, (-1, self.output_dim)), self.V) + self.ext_b ishape = K.shape(x) if K._BACKEND == "tensorflow": ishape = x.get_shape().as_list() outputs = K.reshape(outputs, (-1, ishape[1]+self.output_length, ishape[2])) if self.stateful: self.updates = [] for i in range(len(states)): self.updates.append((self.states[i], states[i])) if self.return_sequences: return outputs else: return last_output
def call(self, inputs): s = K.zeros((K.shape(inputs)[0],self.units)) init_states = [s,s] outputs = K.rnn(self.step_do, inputs, init_states)[1] init_states2 = [s,s] input2 = K.reverse(inputs,axes=1) outputs2 = K.rnn(self.step_do2, input2, init_states2)[1] outputs2 = K.reverse(outputs2,axes=1) outputs = (K.concatenate([outputs,outputs2])) return outputs
def call(self, x, mask=None): X = K.repeat(x, self.output_length) input_shape = list(self.input_spec[0].shape) input_shape = input_shape[:1] + [self.output_length] + input_shape[1:] self.input_spec = [InputSpec(shape=tuple(input_shape))] if self.stateful or self.state_input or len(self.state_outputs) > 0: initial_states = self.states[:] else: initial_states = self.get_initial_states(X) constants = self.get_constants(X) y_0 = K.permute_dimensions(X, (1, 0, 2))[0, :, :] initial_states += [y_0] last_output, outputs, states = K.rnn(self.step, X, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=self.output_length) if self.stateful and not self.state_input: self.updates = [] for i in range(2): self.updates.append((self.states[i], states[i])) self.states_to_transfer = states input_shape.pop(1) self.input_spec = [InputSpec(shape=input_shape)] return outputs
def call(self, x, mask=None): # Much of this is copied from the Keras 1.0(ish) version of TimeDistributed, though we've # modified it quite a bit, to fix the problems mentioned in the docstring and to use better # names. if not isinstance(x, list): x = [x] mask = [mask] timesteps = K.int_shape(x[0])[1] input_shape = [K.int_shape(x_i) for x_i in x] if len(x) == 1: input_shape = input_shape[0] first_input_shape = self.input_spec[0].shape if len(x) == 1 and first_input_shape[0]: # The batch size is passed when defining the layer in some cases (for example if it is # stateful). We respect the input shape in that case and don't reshape the input. This # is slower. K.rnn also expects only a single tensor, so we can't do this if we have # multiple inputs. def step(x_i, states): # pylint: disable=unused-argument output = self.layer.call(x_i) return output, [] _, outputs, _ = K.rnn(step, x, mask=mask, input_states=[]) else: reshaped_xs, reshaped_masks = self.reshape_inputs_and_masks( x, mask) outputs = self.layer.call(reshaped_xs, mask=reshaped_masks) output_shape = self.get_output_shape_for(input_shape) reshaped_shape = (-1, timesteps) + output_shape[2:] if reshaped_shape[-1] == 1: reshaped_shape = reshaped_shape[:-1] outputs = K.reshape(outputs, reshaped_shape) return outputs
def call(self, inputs): # 定义正式执行的函数 init_states = [K.zeros( (K.shape(inputs)[0], K.shape(inputs)[-1]))] # 定义初始态(全零) #init_states = [inputs[:,0], inputs[:,0]] #print('inputs',K.shape(inputs)[0]) outputs = K.rnn(self.step_do, inputs, init_states, unroll=False) # 循环执行step_do函数 #print('outputs[1]',outputs.shape) print('outputs[0].shape', outputs[0].shape) query1 = K.dot(outputs[1], self.query_kernel1) key1 = K.dot(outputs[1], self.key_kernel1) value1 = K.dot(outputs[1], self.value_kernel1) attention_prob1 = K.batch_dot(query1, key1, axes=[2, 2]) / np.sqrt( self.units) attention_prob1 = K.softmax(attention_prob1) att_out1 = K.batch_dot(attention_prob1, value1, axes=[2, 1]) query2 = K.dot(outputs[1], self.query_kernel2) key2 = K.dot(outputs[1], self.key_kernel2) value2 = K.dot(outputs[1], self.value_kernel2) attention_prob2 = K.batch_dot(query2, key2, axes=[2, 2]) / np.sqrt( self.units) attention_prob2 = K.softmax(attention_prob2) att_out2 = K.batch_dot(attention_prob2, value2, axes=[2, 1]) query3 = K.dot(outputs[1], self.query_kernel3) key3 = K.dot(outputs[1], self.key_kernel3) value3 = K.dot(outputs[1], self.value_kernel3) attention_prob3 = K.batch_dot(query3, key3, axes=[2, 2]) / np.sqrt( self.units) attention_prob3 = K.softmax(attention_prob3) att_out3 = K.batch_dot(attention_prob3, value3, axes=[2, 1]) query4 = K.dot(outputs[1], self.query_kernel4) key4 = K.dot(outputs[1], self.key_kernel4) value4 = K.dot(outputs[1], self.value_kernel4) attention_prob4 = K.batch_dot(query4, key4, axes=[2, 2]) / np.sqrt( self.units) attention_prob4 = K.softmax(attention_prob4) att_out4 = K.batch_dot(attention_prob4, value4, axes=[2, 1]) att_out = K.concatenate([att_out1, att_out2, att_out3, att_out4], axis=-1) out = K.dot(att_out, self.switch_kernel) return out[:, -1]
def call(self, inputs): # 定义正式执行的函数 init_states = [ K.zeros((K.shape(inputs)[0], K.shape(inputs)[-1])), K.zeros((K.shape(inputs)[0], K.shape(inputs)[-1])) ] # 定义初始态(全零) #init_states = [inputs[:,0], inputs[:,0]] #print('inputs',K.shape(inputs)[0]) outputs = K.rnn(self.step_do, inputs, init_states, unroll=False) # 循环执行step_do函数 #print('outputs[1]',outputs.shape) print('outputs[0].shape', outputs[0].shape) query = K.dot(outputs[1], self.query_kernel) print('query.shape', query.shape) key = K.dot(outputs[1], self.key_kernel) value = K.dot(outputs[1], self.value_kernel) attention_prob = K.batch_dot(query, key, axes=[2, 2]) / np.sqrt( self.units) attention_prob = K.softmax(attention_prob) print(attention_prob.shape) att_out = K.batch_dot(attention_prob, value, axes=[2, 1]) return att_out[:, -1]
def call(self,x): # add the weights here self.trainable_weights += self.controller.trainable_weights last_output,list_outputs,states = K.rnn(self.main_step_func,x,self.get_initial_states(x),unroll = False) # plot the states self.save_states(states) return last_output if not self.return_sequences else list_outputs
def call(self, x, mask=None): print("AttentionDecoder.call") H = x x = K.permute_dimensions(H, (1, 0, 2))[-1, :, :] if self.stateful or self.state_input or len(self.state_outputs) > 0: initial_states = self.states[:] else: initial_states = self.get_initial_states(H) constants = self.get_constants(H) + [H] y_0 = x x = K.repeat(x, self.output_length) initial_states += [y_0] last_output, outputs, states = K.rnn( self.step, x, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=self.output_length) if self.stateful and not self.state_input: self.updates = zip(self.states, states) self.states_to_transfer = states return outputs
def call(self, x, mask=None): input_shape = self.input_spec[0].shape # state format: [h(t-1), c(t-1), y(t-1)] #h_0 = K.zeros_like(x[:, 0, :]) #c_0 = K.zeros_like(x[:, 0, :]) h_0 = K.reshape(x, (-1, self.input_dim)) c_0 = K.reshape(x, (-1, self.input_dim)) initial_states = [h_0, c_0] #self.states = [None, None] #initial_states = self.get_initial_states(x) last_output, outputs, states = K.rnn(step_function=self.step, inputs=x, initial_states=initial_states, go_backwards=self.go_backwards, mask=mask, constants=None, unroll=self.unroll, input_length=input_shape[1]) if self.return_sequences: return outputs else: return last_output
def call(self, x, mask=None): constants = self.get_constants(x) assert K.ndim(x) == 5 if K._BACKEND == 'tensorflow': if not self.input_shape[1]: raise Exception('When using TensorFlow, you should define ' + 'explicitely the number of timesteps of ' + 'your sequences. Make sure the first layer ' + 'has a "batch_input_shape" argument ' + 'including the samples axis.') if self.stateful: initial_states = self.states else: initial_states = self.get_initial_states(x) last_output, outputs, states = K.rnn(self.step, x, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants) if self.stateful: self.updates = [] for i in range(len(states)): self.updates.append((self.states[i], states[i])) if self.return_sequences: return outputs else: return last_output
def call(self, x, use_teacher_forcing=True, training=None): # TODO: check that model is loading from .h5 correctly # TODO: for now cannot be shared layer # (only can it we use (or not use) teacher forcing in all cases simultationsly) # this sequence is used only to extract the amount of timesteps (the same as in output sequence) fake_input = x if isinstance(x, list): # teacher forcing for training self.x_seq, self.y_true = x self.use_teacher_forcing = use_teacher_forcing fake_input = K.expand_dims(self.y_true) else: # inference self.x_seq = x self.use_teacher_forcing = False # apply a dense layer over the time dimension of the sequence # do it here because it doesn't depend on any previous steps # therefore we can save computation time: self._uxpb = _time_distributed_dense(self.x_seq, self.U_a, b=self.b_a, dropout=self.dropout, input_dim=self.input_dim, timesteps=self.timesteps, output_dim=self.units, training=training) last_output, outputs, states = K.rnn( self.step, inputs=fake_input, initial_states=self.get_initial_state(self.x_seq)) return outputs
def call(self, inputs, mask=None, initial_state=None, training=None): # input shape: `(samples, time (padded with zeros), input_dim)` # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if initial_state is not None: if not isinstance(initial_state, (list, tuple)): initial_states = [initial_state] else: initial_states = list(initial_state) if isinstance(inputs, list): initial_states = inputs[1:] inputs = inputs[0] elif self.stateful: initial_states = self.states else: initial_states = self.get_initial_states(inputs) if len(initial_states) != len(self.states): raise ValueError('Layer has ' + str(len(self.states)) + ' states but was passed ' + str(len(initial_states)) + ' initial states.') input_shape = K.int_shape(inputs) if self.unroll and input_shape[1] is None: raise ValueError('Cannot unroll a RNN if the ' 'time dimension is undefined. \n' '- If using a Sequential model, ' 'specify the time dimension by passing ' 'an `input_shape` or `batch_input_shape` ' 'argument to your first layer. If your ' 'first layer is an Embedding, you can ' 'also use the `input_length` argument.\n' '- If using the functional API, specify ' 'the time dimension by passing a `shape` ' 'or `batch_shape` argument to your Input layer.') constants = self.get_constants(inputs, training=None) preprocessed_input = self.preprocess_input(inputs, training=None) last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=input_shape[1]) if self.stateful: updates = [] for i in range(len(states)): updates.append((self.states[i], states[i])) self.add_update(updates, inputs) # Properly set learning phase if 0 < self.dropout < 1: last_output._uses_learning_phase = True outputs._uses_learning_phase = True if self.return_sequences: return outputs else: return last_output
def call(self, x, mask=None): H = x x = K.permute_dimensions(H, (1, 0, 2))[-1, :, :] if self.stateful or self.state_input or len(self.state_outputs) > 0: initial_states = self.states[:] else: initial_states = self.get_initial_states(H) constants = self.get_constants(H) + [H] y_0 = x x = K.repeat(x, self.output_length) initial_states += [y_0] last_output, outputs, states = K.rnn(self.step, x, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=self.output_length) if self.stateful and not self.state_input: self.updates = [] for i in range(2): self.updates.append((self.states[i], states[i])) self.states_to_transfer = states return outputs
def _step(self, x_tm1, h_tm1, c_tm1, H, u_i, u_f, u_o, u_c, w_i, w_f, w_c, w_o, w_x, w_a, v_i, v_f, v_c, v_o, b_i, b_f, b_c, b_o, b_x, b_a): s_tm1 = K.repeat(c_tm1, self.input_length) e = H + s_tm1 def a(x, states): output = K.dot(x, w_a) + b_a return output, [] _, energy, _ = K.rnn(a, e, [], mask=None) energy = activations.get('linear')(energy) energy = K.permute_dimensions(energy, (2, 0, 1)) energy = energy[0] alpha = K.softmax(energy) alpha = K.repeat(alpha, self.hidden_dim) alpha = K.permute_dimensions(alpha, (0, 2 , 1)) weighted_H = H * alpha v = K.sum(weighted_H, axis=1) xi_t = K.dot(x_tm1, w_i) + K.dot(v, v_i) + b_i xf_t = K.dot(x_tm1, w_f) + K.dot(v, v_f) + b_f xc_t = K.dot(x_tm1, w_c) + K.dot(v, v_c) + b_c xo_t = K.dot(x_tm1, w_o) + K.dot(v, v_o) + b_o i_t = self.inner_activation(xi_t + K.dot(h_tm1, u_i)) f_t = self.inner_activation(xf_t + K.dot(h_tm1, u_f)) c_t = f_t * c_tm1 + i_t * self.activation(xc_t + K.dot(h_tm1, u_c)) o_t = self.inner_activation(xo_t + K.dot(h_tm1, u_o)) h_t = o_t * self.activation(c_t) x_t = K.dot(h_t, w_x) + b_x return x_t, h_t, c_t
def call(self, x, training=None, mask=None, states=None): """ :param Tensor x: Should be the output of the decoder :param Tensor states: last state of the decoder :param Tensor mask: The mask to apply :return: Pointers probabilities """ input_shape = self.input_spec[0].shape en_seq = x x_input = x[:, input_shape[1] - 1, :] x_input = K.repeat(x_input, input_shape[1]) if states: initial_states = states else: initial_states = self.decoder.get_initial_state(x_input) constants = [] preprocessed_input, _, constants = self.decoder.process_inputs( x_input, initial_states, constants) constants.append(en_seq) last_output, outputs, states = K.rnn( self.step, preprocessed_input, initial_states, go_backwards=self.decoder.lstm.go_backwards, constants=constants, input_length=input_shape[1]) return outputs
def call(self, inputs): init_states = [ tf.zeros((K.shape(inputs)[0], self.units)), tf.zeros((K.shape(inputs)[0], self.units)) ] outputs = K.rnn(self.step_do, inputs, init_states) return outputs[1]
def call(self, x, mask=None): l0 = self.layers[0] enc_output, dec_input = x if l0.stateful: initial_states = l0.states else: initial_states = l0.get_initial_states(dec_input) constants = l0.get_constants(dec_input) constants = self.get_constants(enc_output, constants) preprocessed_input = l0.preprocess_input(dec_input) last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_states, go_backwards=l0.go_backwards, mask=mask[1], constants=constants, unroll=l0.unroll, input_length=self.dec_shape[1]) if l0.stateful: self.updates = [] for i in range(len(states)): self.updates.append((l0.states[i], states[i])) return outputs if l0.return_sequences else last_output
def call(self, inputs): # Load the input vector prcp = inputs[:, :, 0:1] tmean = inputs[:, :, 1:2] dayl = inputs[:, :, 2:3] # Calculate PET using Hamon’s formulation pet = 29.8 * (dayl * 24) * 0.611 * K.exp(17.3 * tmean / (tmean + 237.3)) / (tmean + 273.2) # Concatenate pprcp, tmean, and pet into a new input new_inputs = K.concatenate((prcp, tmean, pet), axis=-1) # Define 2 initial state variables at the beginning init_states = [K.zeros((K.shape(new_inputs)[0], 2))] # Recursively calculate state variables by using RNN _, outputs, _ = K.rnn(self.step_do, new_inputs, init_states) s0 = outputs[:, :, 0:1] s1 = outputs[:, :, 1:2] # Calculate final process variables m = self.snowbucket(s0, tmean, self.ddf, self.tmax) [et, qsub, qsurf] = self.soilbucket(s1, pet, self.f, self.smax, self.qmax) if self.mode == "normal": return qsub + qsurf elif self.mode == "analysis": return K.concatenate([s0, s1, m, et, qsub, qsurf], axis=-1)
def call(self, x, mask=None): # input_shape = (batch_size, input_length, input_dim). This needs to be defined in build. read_output, initial_memory_states, output_mask = self.read(x, mask) initial_write_states = self.writer.get_initial_states( read_output) # h_0 and c_0 of the writer LSTM initial_states = initial_memory_states + initial_write_states # last_output: (batch_size, output_dim) # all_outputs: (batch_size, input_length, output_dim) # last_states: # last_memory_state: (batch_size, input_length, output_dim) # last_output # last_writer_ct last_output, all_outputs, last_states = K.rnn( self.compose_and_write_step, read_output, initial_states, mask=output_mask) last_memory = last_states[0] if self.return_mode == "last_output": return last_output elif self.return_mode == "all_outputs": return all_outputs else: # return mode is output_and_memory expanded_last_output = K.expand_dims( last_output, dim=1) # (batch_size, 1, output_dim) # (batch_size, 1+input_length, output_dim) return K.concatenate([expanded_last_output, last_memory], axis=1)
def call(self, inputs): # initial condition init_states = [inputs] zeros = K.zeros( (K.shape(inputs)[0], self.steps, K.shape(inputs)[1])) # outputs = K.rnn(self.step_do, zeros, init_states) # return outputs[1] #
def diff(a, n=1, axis=-1): if axis == -1: axis = K.ndim(a) - 1 a_aug = K.tile(K.expand_dims(a, axis=1), [1, n] + [1] * (K.ndim(a) - 1)) pattern = (axis, ) + tuple(set(range(K.ndim(a))) - {axis}) inv_pattern = tuple(range(1, axis + 1)) + (0, ) + tuple( range(axis + 1, K.ndim(a))) def step(inputs, states): prev_output = states[0] t = states[1] t_int = K.cast(t[0], 'int32') prev_output_aug = K.permute_dimensions(prev_output, pattern) inputs_aug = K.permute_dimensions(inputs, pattern) output_aug = K.switch( K.all(t_int > 0), K.concatenate([ inputs_aug[:t_int], prev_output_aug[1:] - prev_output_aug[:-1] ], axis=0), K.concatenate([inputs_aug[:1], inputs_aug[1:] - inputs_aug[:-1]], axis=0)) output = K.permute_dimensions(output_aug, inv_pattern) return output, [output, t + 1] d_aug = K.permute_dimensions( K.rnn(step, a_aug, [K.zeros_like(a), K.zeros((1, ))])[0], pattern) d = K.permute_dimensions(d_aug[-(K.shape(a)[axis] - n):], inv_pattern) return d
def call(self, x, mask=None): # This is copied from the current implementation of call in TimeDistributed, except that this actually # uses the mask (and has better variable names). input_shape = self.input_spec[0].shape if input_shape[0]: # The batch size is passed when defining the layer in some cases (for example if it is stateful). # We respect the input shape in that case and not reshape the input. This is slower. # pylint: disable=unused-argument def step(x_i, states): output = self.layer.call(x_i) return output, [] _, outputs, _ = K.rnn(step, x, mask=mask, input_states=[]) else: input_length = input_shape[1] if input_shape[1] else K.shape(x)[1] reshaped_x = K.reshape(x, (-1,) + input_shape[2:]) # (batch_size * timesteps, ...) if mask is not None: mask_ndim = K.ndim(mask) input_ndim = K.ndim(x) if mask_ndim == input_ndim: mask_shape = input_shape elif mask_ndim == input_ndim - 1: mask_shape = input_shape[:-1] else: raise Exception("Mask is of an unexpected shape. Mask's ndim: %s, input's ndim %s" % (mask_ndim, input_ndim)) mask = K.reshape(mask, (-1,) + mask_shape[2:]) # (batch_size * timesteps, ...) outputs = self.layer.call(reshaped_x, mask=mask) output_shape = self.get_output_shape_for(input_shape) outputs = K.reshape(outputs, (-1, input_length) + output_shape[2:]) return outputs
def _step(self, x_tm1, h_tm1, c_tm1, H, u_i, u_f, u_o, u_c, w_i, w_f, w_c, w_o, w_x, w_a, v_i, v_f, v_c, v_o, b_i, b_f, b_c, b_o, b_x, b_a): s_tm1 = K.repeat(c_tm1, self.input_length) e = H + s_tm1 def a(x, states): output = K.dot(x, w_a) + b_a return output, [] _, energy, _ = K.rnn(a, e, [], mask=None) energy = activations.get('linear')(energy) energy = K.permute_dimensions(energy, (2, 0, 1)) energy = energy[0] alpha = K.softmax(energy) alpha = K.repeat(alpha, self.hidden_dim) alpha = K.permute_dimensions(alpha, (0, 2, 1)) weighted_H = H * alpha v = K.sum(weighted_H, axis=1) xi_t = K.dot(x_tm1, w_i) + K.dot(v, v_i) + b_i xf_t = K.dot(x_tm1, w_f) + K.dot(v, v_f) + b_f xc_t = K.dot(x_tm1, w_c) + K.dot(v, v_c) + b_c xo_t = K.dot(x_tm1, w_o) + K.dot(v, v_o) + b_o i_t = self.inner_activation(xi_t + K.dot(h_tm1, u_i)) f_t = self.inner_activation(xf_t + K.dot(h_tm1, u_f)) c_t = f_t * c_tm1 + i_t * self.activation(xc_t + K.dot(h_tm1, u_c)) o_t = self.inner_activation(xo_t + K.dot(h_tm1, u_o)) h_t = o_t * self.activation(c_t) x_t = K.dot(h_t, w_x) + b_x return x_t, h_t, c_t
def call(self, inputs, mask=None, training=None, initial_state=None): # We need to rewrite this `call` method by combining `RNN`'s and `GRU`'s. self.cell._dropout_mask = None self.cell._recurrent_dropout_mask = None self.cell._masking_dropout_mask = None inputs = inputs[:3] if initial_state is not None: pass elif self.stateful: initial_state = self.states else: initial_state = self.get_initial_state(inputs) if len(initial_state) != len(self.states): raise ValueError('Layer has ' + str(len(self.states)) + ' states but was passed ' + str(len(initial_state)) + ' initial states.') timesteps = K.int_shape(inputs[0])[1] kwargs = {} if has_arg(self.cell.call, 'training'): kwargs['training'] = training def step(inputs, states): return self.cell.call(inputs, states, **kwargs) # concatenate the inputs and get the mask concatenated_inputs = K.concatenate(inputs, axis=-1) mask = mask[0] last_output, outputs, states = K.rnn(step, concatenated_inputs, initial_state, go_backwards=self.go_backwards, mask=mask, unroll=self.unroll, input_length=timesteps) if self.stateful: updates = [] for i, state in enumerate(states): updates.append((self.states[i], state)) self.add_update(updates, inputs) if self.return_sequences: output = outputs else: output = last_output # Properly set learning phase if getattr(last_output, '_uses_learning_phase', False): output._uses_learning_phase = True for state in states: state._uses_learning_phase = True if self.return_state: states = list(states)[:-2] # remove x_keep and ss return [output] + states return output
def get_output(self, train=False): H = self.get_input(train) X = K.permute_dimensions(H, (1, 0, 2))[-1] def reshape(x, states): h = K.dot(x, self.W_h) + self.b_h return h, [] _, H, _ = K.rnn(reshape, H, [], mask=None) if self.stateful or self.state_input or len(self.state_outputs) > 0: initial_states = self.states else: initial_states = self.get_initial_states(X) [outputs, hidden_states, cell_states], updates = theano.scan( self._step, n_steps=self.output_length, outputs_info=[X] + initial_states, non_sequences=[ H, self.U_i, self.U_f, self.U_o, self.U_c, self.W_i, self.W_f, self.W_c, self.W_o, self.W_x, self.W_a, self.V_i, self.V_f, self.V_c, self.V_o, self.b_i, self.b_f, self.b_c, self.b_o, self.b_x, self.b_a ]) states = [hidden_states[-1], cell_states[-1]] if self.stateful and not self.state_input: self.updates = [] for i in range(2): self.updates.append((self.states[i], states[i])) for o in self.state_outputs: o.updates = [] for i in range(2): o.updates.append((o.states[i], states[i])) return K.permute_dimensions(outputs, (1, 0, 2))
def get_output(self, train=False): H = self.get_input(train) X = K.permute_dimensions(H, (1, 0, 2))[-1] def reshape(x, states): h = K.dot(x, self.W_h) + self.b_h return h, [] _, H, _ = K.rnn(reshape, H, [], mask=None) if self.stateful or self.state_input or len(self.state_outputs) > 0: initial_states = self.states else: initial_states = self.get_initial_states(X) [outputs,hidden_states, cell_states], updates = theano.scan( self._step, n_steps = self.output_length, outputs_info=[X] + initial_states, non_sequences=[H, self.U_i, self.U_f, self.U_o, self.U_c, self.W_i, self.W_f, self.W_c, self.W_o, self.W_x, self.W_a, self.V_i, self.V_f, self.V_c, self.V_o, self.b_i, self.b_f, self.b_c, self.b_o, self.b_x, self.b_a]) states = [hidden_states[-1], cell_states[-1]] if self.stateful and not self.state_input: self.updates = [] for i in range(2): self.updates.append((self.states[i], states[i])) for o in self.state_outputs: o.updates = [] for i in range(2): o.updates.append((o.states[i], states[i])) return K.permute_dimensions(outputs, (1, 0, 2))
def call(self, x, mask=None, constants=None, **kwargs): # input shape: (nb_samples, time (padded with zeros), input_dim) input_shape = self.input_spec[0].shape if isinstance(x, (tuple, list)): x, *custom_initial = x else: custom_initial = None if custom_initial: initial_states = custom_initial elif self.stateful: initial_states = self.states else: initial_states = self.get_initial_states(x) preprocessed_input = x # self.preprocess_input(x) # only use the main input mask if isinstance(mask, list): mask = mask[0] last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=input_shape[1]) if self.return_sequences: return [outputs, *states] else: return [last_output, *states]
def _backward(gamma, mask): """ Backward recurrence of the linear chain crf. """ gamma = K.cast(gamma, "int32") def _backward_step(gamma_t, states): y_tm1 = K.squeeze(states[0], 0) y_t = batch_gather(gamma_t, y_tm1) return y_t, [K.expand_dims(y_t, 0)] initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)] _, y_rev, _ = K.rnn(_backward_step, gamma, initial_states, go_backwards=True) y = K.reverse(y_rev, 1) if mask is not None: mask = K.cast(mask, dtype="int32") # mask output y *= mask # set masked values to -1 y += -(1 - mask) return y
def call(self, x, mask=None): # input shape: (nb_samples, time (padded with zeros), input_dim) # note that the .build() method of subclasses MUST define # self.input_spec with a complete input shape. input_shape = self.input_spec[0].shape if self.stateful: initial_states = self.states else: initial_states = self.get_initial_states(x) constants = self.get_constants(x) preprocessed_input = self.preprocess_input(x) last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants) if self.stateful: self.updates = [] for i in range(len(states)): self.updates.append((self.states[i], states[i])) if self.return_sequences: return outputs else: return last_output
def call(self, inputs, mask=None, initial_state=None, training=None): # input shape: `(samples, time (padded with zeros), input_dim)` # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if initial_state is not None: if not isinstance(initial_state, (list, tuple)): initial_states = [initial_state] else: initial_states = list(initial_state) if isinstance(inputs, list): initial_states = inputs[1:] inputs = inputs[0] else: initial_states = self.get_initial_states(inputs) if len(initial_states) != len(self.states): raise ValueError('Layer has ' + str(len(self.states)) + ' states but was passed ' + str(len(initial_states)) + ' initial states.') input_shape = K.int_shape(inputs) constants = self.get_constants(inputs, training=None) preprocessed_input = self.preprocess_input(inputs, training=None) h = initial_states[0] h+= self.recurrent_activation(self.initial_attention) initial_states[0]=h last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=input_shape[1]) return last_output
def viterbi_decoding(self, X, mask=None): input_energy = self.activation(K.dot(X, self.kernel) + self.bias) if self.use_boundary: input_energy = self.add_boundary_energy(input_energy, mask, self.left_boundary, self.right_boundary) argmin_tables = self.recursion(input_energy, mask, return_logZ=False) argmin_tables = K.cast(argmin_tables, 'int32') # backward to find best path, `initial_best_idx` can be any, as all elements in the last argmin_table are the same argmin_tables = K.reverse(argmin_tables, 1) initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])] # matrix instead of vector is required by tf `K.rnn` if K.backend() == 'theano': initial_best_idx = [K.T.unbroadcast(initial_best_idx[0], 1)] def gather_each_row(params, indices): n = K.shape(indices)[0] if K.backend() == 'theano': return params[K.T.arange(n), indices] else: indices = K.transpose(K.stack([K.tf.range(n), indices])) return K.tf.gather_nd(params, indices) def find_path(argmin_table, best_idx): next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0]) next_best_idx = K.expand_dims(next_best_idx) if K.backend() == 'theano': next_best_idx = K.T.unbroadcast(next_best_idx, 1) return next_best_idx, [next_best_idx] _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx, input_length=K.int_shape(X)[1], unroll=self.unroll) best_paths = K.reverse(best_paths, 1) best_paths = K.squeeze(best_paths, 2) return K.one_hot(best_paths, self.units)
def call(self, x): initial_states = self.get_initial_states(x) last_output, outputs, states = K.rnn(self.step, x, initial_states) if self.return_sequences: return outputs else: return last_output
def call(self, X): xShape = K.shape(X) X = K.reshape(X, (-1, 3, 3)) X = X.dimshuffle(1, 0, 2) lastOut, outputs, states = K.rnn(self.step, X, initial_states=[]) outputs = outputs.dimshuffle(1, 0, 2) outputs = K.reshape(outputs, xShape) return outputs
def out_step(X_i, states): def in_step(x, in_states): output = K.dot(x, self.W) + self.b return output, [] _, in_outputs, _ = K.rnn(in_step, X_i, initial_states=[], mask=None) return in_outputs, []
def call(self, x, mask=None): # input shape: (nb_samples, time (padded with zeros), input_dim) # note that the .build() method of subclasses MUST define # self.input_spec with a complete input shape. if isinstance(x, (tuple, list)): x, custom_initial = x else: custom_initial = None input_shape = self.input_spec[0].shape if K._BACKEND == 'tensorflow': if not input_shape[1]: raise Exception('When using TensorFlow, you should define ' 'explicitly the number of timesteps of ' 'your sequences.\n' 'If your first layer is an Embedding, ' 'make sure to pass it an "input_length" ' 'argument. Otherwise, make sure ' 'the first layer has ' 'an "input_shape" or "batch_input_shape" ' 'argument, including the time axis. ' 'Found input shape at layer ' + self.name + ': ' + str(input_shape)) if self.stateful: initial_states = self.states elif custom_initial: initial_states = custom_initial else: initial_states = self.get_initial_states(x) constants = self.get_constants(x) preprocessed_input = self.preprocess_input(x) print "call input shape", input_shape last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask[0], constants=constants, unroll=self.unroll, input_length=input_shape[1]) if self.stateful: self.updates = [] for i in range(len(states)): self.updates.append((self.states[i], states[i])) if self.return_sequences: return outputs else: return last_output def get_output_shape_for(self, input_shape): print "get output shape", input_shape return (input_shape[0], self.output_dim)
def call(self, x, mask=None): last_output, outputs, states = K.rnn( self.step, self.preprocess_input(x), self.states or self.get_initial_states(x), go_backwards=self.go_backwards, mask=mask, constants=self.get_constants(x), unroll=self.unroll, input_length=self.input_spec[0].shape[1]) self.updates = zip(self.states, states) self.states_to_transfer = states return outputs if self.return_sequences else last_output
def call(self, x, mask=None): input_shape = self.input_spec[0].shape initial_states = self.get_initial_states(x) constants = self.get_constants(x) preprocessed_input = self.preprocess_input(x) last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_states, mask=mask, constants=constants, unroll=self.unroll, input_length=input_shape[1]) if self.return_sequences: return outputs else: return last_output
def get_output(self, train=False): X = self.get_input(train) def out_step(X_i, states): def in_step(x, in_states): output = K.dot(x, self.W) + self.b return output, [] _, in_outputs, _ = K.rnn(in_step, X_i, initial_states=[], mask=None) return in_outputs, [] _, outputs, _ = K.rnn(out_step, X, initial_states=[], mask=None) outputs = self.activation(outputs) return outputs
def get_output(self, train=False): # input shape: (nb_samples, time (padded with zeros), input_dim) X = self.get_input(train) if K._BACKEND == "tensorflow": if not self.input_shape[1]: raise Exception( "When using TensorFlow, you should define " + "explicitly the number of timesteps of " + "your sequences. Make sure the first layer " + 'has a "batch_input_shape" argument ' + "including the samples axis." ) mask = self.get_output_mask(train) if mask: # apply mask X *= K.cast(K.expand_dims(mask), X.dtype) masking = True else: masking = False if self.stateful or self.state_input or len(self.state_outputs) > 0: initial_states = self.states else: initial_states = self.get_initial_states(X) last_output, outputs, states = K.rnn( self.step, X, initial_states, go_backwards=self.go_backwards, masking=masking ) n = len(states) if self.stateful and not self.state_input: self.updates = [] self.updates = [] for i in range(n): self.updates.append((self.states[i], states[i])) for o in self.state_outputs: o.updates = [] for i in range(n): o.updates.append((o.states[i], states[i])) if self.return_sequences: return outputs else: return last_output
def call(self, x, mask=None): input_shape = self.input_spec[0].shape en_seq = x x_input = x[:, input_shape[1]-1, :] x_input = K.repeat(x_input, input_shape[1]) initial_states = self.get_initial_states(x_input) constants = super(PointerLSTM, self).get_constants(x_input) constants.append(en_seq) preprocessed_input = self.preprocess_input(x_input) last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, constants=constants, input_length=input_shape[1]) print ('outputs') print (outputs) return outputs
def call(self, x, mask=None): input_shape = self.input_spec[0].shape if K._BACKEND == 'tensorflow': if not input_shape[1]: raise Exception('When using TensorFlow, you should define ' 'explicitly the number of timesteps of ' 'your sequences.\n' 'If your first layer is an Embedding, ' 'make sure to pass it an "input_length" ' 'argument. Otherwise, make sure ' 'the first layer has ' 'an "input_shape" or "batch_input_shape" ' 'argument, including the time axis. ' 'Found input shape at layer ' + self.name + ': ' + str(input_shape)) if self.stateful or self.state_input or len(self.state_outputs) > 0: initial_states = self.states else: initial_states = self.get_initial_states(x) constants = self.get_constants(x) preprocessed_input = self.preprocess_input(x) last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=input_shape[1]) n = len(states) if self.stateful and not self.state_input: self.updates = [] for i in range(n): self.updates.append((self.states[i], states[i])) self.states_to_transfer = states if self.return_sequences: return outputs else: return last_output
def get_full_output(self, train=False): """ This method is for research and visualization purposes. Use it as X = model.get_input() # full model Y = ntm.get_output() # this layer F = theano.function([X], Y, allow_input_downcast=True) [memory, read_address, write_address, rnn_state] = F(x) if inner_rnn == "lstm" use it as [memory, read_address, write_address, rnn_cell, rnn_state] = F(x) """ # input shape: (nb_samples, time (padded with zeros), input_dim) X = self.get_input(train) assert K.ndim(X) == 3 if K._BACKEND == 'tensorflow': if not self.input_shape[1]: raise Exception('When using TensorFlow, you should define ' + 'explicitely the number of timesteps of ' + 'your sequences. Make sure the first layer ' + 'has a "batch_input_shape" argument ' + 'including the samples axis.') mask = self.get_output_mask(train) if mask: # apply mask X *= K.cast(K.expand_dims(mask), X.dtype) masking = True else: masking = False if self.stateful: initial_states = self.states else: initial_states = self.get_initial_states(X) last_output, outputs, states = K.rnn(self.step, X, initial_states, go_backwards=self.go_backwards, masking=masking) return states
def call(self, x, mask=None): # input shape: (nb_samples, time (padded with zeros), input_dim) # note that the .build() method of subclasses MUST define # self.input_spec with a complete input shape. input_shape = self.input_spec[0].shape inputs = K.repeat_elements(x * 0, self.num_timesteps, 1) initial_states = [x[:,0,:],] constants = self.get_constants(x) #preprocessed_input = self.preprocess_input(x) last_output, outputs, states = K.rnn(self.step, inputs, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=self.num_timesteps) if self.return_sequences: return outputs else: return last_output
def get_output(self, train=False): # input shape: (nb_samples, time (padded with zeros), input_dim) X = self.get_input(train) if K._BACKEND == 'tensorflow': if not self.input_shape[1]: raise Exception('When using TensorFlow, you should define ' + 'explicitly the number of timesteps of ' + 'your sequences. Make sure the first layer ' + 'has a "batch_input_shape" argument ' + 'including the samples axis.') mask = self.get_output_mask(train) if self.stateful or self.state_input or len(self.state_outputs) > 0: initial_states = self.states else: initial_states = self.get_initial_states(X) last_output, outputs, states = K.rnn(self.step, X, self.output_dim, initial_states, go_backwards=self.go_backwards, mask=mask) n = len(states) if self.stateful and not self.state_input: self.updates = [] self.updates = [] for i in range(n): self.updates.append((self.states[i], states[i])) for o in self.state_outputs: o.updates = [] for i in range(n): o.updates.append((o.states[i], states[i])) if self.return_sequences: return outputs else: return last_output
def call(self, x, mask=None): H = x x = K.permute_dimensions(H, (1, 0, 2))[-1] if self.stateful or self.state_input or len(self.state_outputs) > 0: initial_states = self.states[:] else: initial_states = self.get_initial_states(H) constants = self.get_constants(H) + [H] y_0 = x x = K.repeat(x, self.output_length) initial_states += [y_0] last_output, outputs, states = K.rnn(self.step, x, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=self.output_length) if self.stateful and not self.state_input: self.updates = [] for i in range(2): self.updates.append((self.states[i], states[i])) self.states_to_transfer = states return outputs
def call(self, x, constants=None, mask=None, initial_state=None): # input shape: (n_samples, time (padded with zeros), input_dim) input_shape = self.input_spec.shape if self.layer.stateful: initial_states = self.layer.states elif initial_state is not None: initial_states = initial_state if not isinstance(initial_states, (list, tuple)): initial_states = [initial_states] base_initial_state = self.layer.get_initial_state(x) if len(base_initial_state) != len(initial_states): raise ValueError("initial_state does not have the correct length. Received length {0} but expected {1}".format(len(initial_states), len(base_initial_state))) else: # check the state' shape for i in range(len(initial_states)): if not initial_states[i].shape.is_compatible_with(base_initial_state[i].shape): #initial_states[i][j] != base_initial_state[i][j]: raise ValueError("initial_state does not match the default base state of the layer. Received {0} but expected {1}".format([x.shape for x in initial_states], [x.shape for x in base_initial_state])) else: initial_states = self.layer.get_initial_state(x) if not constants: constants = [] constants += self.get_constants(x) last_output, outputs, states = K.rnn( self.step, x, initial_states, go_backwards=self.layer.go_backwards, mask=mask, constants=constants, unroll=self.layer.unroll, input_length=input_shape[1] ) if self.layer.stateful: self.updates = [] for i in range(len(states)): self.updates.append((self.layer.states[i], states[i])) if self.layer.return_sequences: output = outputs else: output = last_output # Properly set learning phase if getattr(last_output, '_uses_learning_phase', False): output._uses_learning_phase = True for state in states: state._uses_learning_phase = True if self.layer.return_state: if not isinstance(states, (list, tuple)): states = [states] else: states = list(states) return [output] + states else: return output
def dream(self, length=140): def _dream_step(x, states): # input + states assert len(states) == 2*self.depth + 1 x = states[-1] x = K.switch(K.equal(x, K.max(x, axis=-1, keepdims=True)), 1., 0.) states = states[:-1] h = [] for i, (h_tm1, c_tm1) in enumerate(zip(states[:-1:2], states[1::2])): x, new_states = self.lstms[i].step(x, [h_tm1, c_tm1]) h.extend(new_states) if self.readout: h += [self.readout_layer(h[-2])] final = h[-1] else: h += [h[-2]] final = h[-2] return final, h # input shape: (nb_samples, time (padded with zeros), input_dim) # Only the very first time point of the input is used, the others only # server to count the lenght of the output sequence X = self.get_input(train=False) mask = self.get_input_mask(train=False) assert K.ndim(X) == 3 if K._BACKEND == 'tensorflow': if not self.input_shape[1]: raise Exception('When using TensorFlow, you should define ' + 'explicitly the number of timesteps of ' + 'your sequences.\n' + 'If your first layer is an Embedding, ' + 'make sure to pass it an "input_length" ' + 'argument. Otherwise, make sure ' + 'the first layer has ' + 'an "input_shape" or "batch_input_shape" ' + 'argument, including the time axis.') # if self.stateful: # initial_states = self.states # else: # initial_states = self.get_initial_states(X) s = self.get_output(train=False)[:, -1] idx = [0, ] + list(np.cumsum([self.output_dim]*2*self.depth + [self.readout, ])) initial_states = [s[:, idx[i]:idx[i+1]] for i in range(len(idx)-1)] # if self.readout: # initial_states.pop(-1) # initial_states.append(X[:, 0]) last_output, outputs, states = K.rnn(_dream_step, K.zeros((1, length, 1)), initial_states, go_backwards=self.go_backwards, mask=mask) if self.stateful: self.updates = [] for i in range(len(states)): self.updates.append((self.states[i], states[i])) return outputs
def call(self, x, constants=None, mask=None, initial_state=None): # input shape: (n_samples, time (padded with zeros), input_dim) input_shape = self.input_spec[0].shape if len(x) > 2: initial_state = x[2:] x = x[:2] assert len(initial_state) >= 1 static_x = x[1] x = x[0] if self.layer.stateful: initial_states = self.layer.states elif initial_state is not None: initial_states = initial_state if not isinstance(initial_states, (list, tuple)): initial_states = [initial_states] else: initial_states = self.layer.get_initial_state(x) if not constants: constants = [] constants += self.get_constants(static_x) last_output, outputs, states = K.rnn( self.step, x, initial_states, go_backwards=self.layer.go_backwards, mask=mask, constants=constants, unroll=self.layer.unroll, input_length=input_shape[1] ) # output has at the moment the form: # (real_output, attention) # split this now up output_dim = self.layer.compute_output_shape(input_shape)[0][-1] last_output = last_output[:output_dim] attentions = outputs[:, :, output_dim:] outputs = outputs[:, :, :output_dim] if self.layer.stateful: self.updates = [] for i in range(len(states)): self.updates.append((self.layer.states[i], states[i])) if self.layer.return_sequences: output = outputs else: output = last_output # Properly set learning phase if getattr(last_output, '_uses_learning_phase', False): output._uses_learning_phase = True for state in states: state._uses_learning_phase = True if self.layer.return_state: if not isinstance(states, (list, tuple)): states = [states] else: states = list(states) output = [output] + states if self.return_attention: if not isinstance(output, list): output = [output] output = output + [attentions] return output