def infer(self, keys, key_mask, values, initial_state, target_embedding, target_bias, keep_prob): def infer_step(y_prev, mask, state, keys, values, key_mask, embedding, embedding_bias): return self._infer_step(y_prev, mask, state, keys, values, key_mask, embedding, embedding_bias, keep_prob) n_steps, batch_size = key_mask.shape seq = None initial_inputs = T.zeros((batch_size, target_embedding.shape[1]), "float32") initial_mask = T.ones((batch_size, 1), "float32") outputs_info = [ initial_inputs, initial_mask, initial_state, None, None ] non_seq = [keys, values, key_mask, target_embedding, target_bias] # max length is len_src*3 inputs, mask, states, contexts, probs = ops.scan(infer_step, seq, outputs_info, non_seq, n_steps=n_steps * 2) mask = T.reshape(mask, mask.shape[:-1]) mask = T.roll(mask, 1, 0) mask = T.set_subtensor(mask[0, :], initial_mask[:, 0]) # (step, batch, n_voc)->(step*batch, n_voc) probs = T.reshape(probs, (probs.shape[0] * probs.shape[1], probs.shape[2])) return states, contexts, probs, mask
def decoder(cell, inputs, mask, initial_state, attention_states, attention_mask, attn_size, dtype=None, scope=None): input_size, states_size = cell.input_size output_size = cell.output_size dtype = dtype or inputs.dtype # non sequences should passed to scan, DO NOT use closure def loop_fn(inputs, mask, state, attn_states, attn_mask, m_states): mask = mask[:, None] alpha = attention(state, m_states, output_size, attn_size, attn_mask) context = theano.tensor.sum(alpha[:, :, None] * attn_states, 0) output, next_state = cell([inputs, context], state) next_state = (1.0 - mask) * state + mask * next_state return [next_state, context] with ops.variable_scope(scope or "decoder"): mapped_states = map_attention_states(attention_states, states_size, attn_size) seq = [inputs, mask] outputs_info = [initial_state, None] non_seq = [attention_states, attention_mask, mapped_states] (states, contexts) = ops.scan(loop_fn, seq, outputs_info, non_seq) return states, contexts
def decoder(cell, inputs, mask, initial_state, attention_states, attention_mask, attn_size, mapped_states=None, dtype=None, scope=None): input_size, states_size = cell.input_size output_size = cell.output_size dtype = dtype or inputs.dtype att_size = [output_size, states_size, attn_size] def loop_fn(inputs, mask, state, attn_states, attn_mask, mapped_states): mask = mask[:, None] alpha = attention(state, None, mapped_states, attn_mask, att_size) context = theano.tensor.sum(alpha[:, :, None] * attn_states, 0) output, next_state = cell([inputs, context], state) next_state = (1.0 - mask) * state + mask * next_state return [next_state, context] with ops.variable_scope(scope or "decoder"): if mapped_states is None: mapped_states = attention(None, attention_states, None, None, att_size) seq = [inputs, mask] outputs_info = [initial_state, None] non_seq = [attention_states, attention_mask, mapped_states] (states, contexts) = ops.scan(loop_fn, seq, outputs_info, non_seq) return states, contexts
def scan(self, y_emb, mask, cwkeys, cpkeys, fwkeys, fpkeys, key_mask, values, initial_state): """ build model :return: """ seq = [y_emb, mask] outputs_info = [initial_state, None, None, None] non_seq = [cwkeys, cpkeys, fwkeys, fpkeys, values, key_mask] (states, contexts, y_pos_states, posscores) = ops.scan(self.step, seq, outputs_info, non_seq) return states, contexts, y_pos_states, posscores
def scan(self, y_emb, mask, keys, key_mask, values, initial_state): """ build model :return: """ seq = [y_emb, mask] outputs_info = [initial_state, None] non_seq = keys + values + key_mask (states, contexts) = ops.scan(self.step, seq, outputs_info, non_seq) return states, contexts
def gru_encoder(cell, inputs, mask, initial_state=None, dtype=None): #if not isinstance(cell, nn.rnn_cell.gru_cell): # raise ValueError("only gru_cell is supported") if isinstance(inputs, (list, tuple)): raise ValueError("inputs must be a tensor, not list or tuple") def loop_fn(inputs, mask, state): mask = mask[:, None] output, next_state = cell(inputs, state) next_state = (1.0 - mask) * state + mask * next_state return next_state if initial_state is None: batch = inputs.shape[1] state_size = cell.state_size initial_state = theano.tensor.zeros([batch, state_size], dtype=dtype) seq = [inputs, mask] states = ops.scan(loop_fn, seq, [initial_state]) return states
def gru_encoder(cell, inputs, mask, initial_state=None, dtype=None): if not isinstance(cell, nn.rnn_cell.rnn_cell): raise ValueError("cell is not an instance of rnn_cell") if isinstance(inputs, (list, tuple)): raise ValueError("inputs must be a tensor, not list or tuple") def loop_fn(inputs, mask, state): mask = mask[:, None] output, next_state = cell(inputs, state) next_state = (1.0 - mask) * state + mask * next_state return next_state if initial_state is None: batch = inputs.shape[1] state_size = cell.state_size initial_state = theano.tensor.zeros([batch, state_size], dtype=dtype) seq = [inputs, mask] # ops.scan is a wrapper of theano.scan, which automatically add updates to # optimizer, you can set return_updates=True to behave like Theano's scan states = ops.scan(loop_fn, seq, [initial_state]) return states