def infer(self, keys, key_mask, values, initial_state, target_embedding,
              target_bias, keep_prob):
        def infer_step(y_prev, mask, state, keys, values, key_mask, embedding,
                       embedding_bias):
            return self._infer_step(y_prev, mask, state, keys, values,
                                    key_mask, embedding, embedding_bias,
                                    keep_prob)

        n_steps, batch_size = key_mask.shape
        seq = None
        initial_inputs = T.zeros((batch_size, target_embedding.shape[1]),
                                 "float32")
        initial_mask = T.ones((batch_size, 1), "float32")
        outputs_info = [
            initial_inputs, initial_mask, initial_state, None, None
        ]
        non_seq = [keys, values, key_mask, target_embedding, target_bias]

        # max length is len_src*3
        inputs, mask, states, contexts, probs = ops.scan(infer_step,
                                                         seq,
                                                         outputs_info,
                                                         non_seq,
                                                         n_steps=n_steps * 2)
        mask = T.reshape(mask, mask.shape[:-1])
        mask = T.roll(mask, 1, 0)
        mask = T.set_subtensor(mask[0, :], initial_mask[:, 0])
        # (step, batch, n_voc)->(step*batch, n_voc)
        probs = T.reshape(probs,
                          (probs.shape[0] * probs.shape[1], probs.shape[2]))
        return states, contexts, probs, mask
예제 #2
0
파일: rnnsearch.py 프로젝트: Playinf/nmt
def decoder(cell, inputs, mask, initial_state, attention_states,
            attention_mask, attn_size, dtype=None, scope=None):
    input_size, states_size = cell.input_size
    output_size = cell.output_size
    dtype = dtype or inputs.dtype

    # non sequences should passed to scan, DO NOT use closure
    def loop_fn(inputs, mask, state, attn_states, attn_mask, m_states):
        mask = mask[:, None]
        alpha = attention(state, m_states, output_size, attn_size, attn_mask)
        context = theano.tensor.sum(alpha[:, :, None] * attn_states, 0)
        output, next_state = cell([inputs, context], state)
        next_state = (1.0 - mask) * state +  mask * next_state

        return [next_state, context]

    with ops.variable_scope(scope or "decoder"):
        mapped_states = map_attention_states(attention_states, states_size,
                                             attn_size)
        seq = [inputs, mask]
        outputs_info = [initial_state, None]
        non_seq = [attention_states, attention_mask, mapped_states]
        (states, contexts) = ops.scan(loop_fn, seq, outputs_info, non_seq)

    return states, contexts
예제 #3
0
def decoder(cell,
            inputs,
            mask,
            initial_state,
            attention_states,
            attention_mask,
            attn_size,
            mapped_states=None,
            dtype=None,
            scope=None):
    input_size, states_size = cell.input_size

    output_size = cell.output_size
    dtype = dtype or inputs.dtype
    att_size = [output_size, states_size, attn_size]

    def loop_fn(inputs, mask, state, attn_states, attn_mask, mapped_states):
        mask = mask[:, None]
        alpha = attention(state, None, mapped_states, attn_mask, att_size)
        context = theano.tensor.sum(alpha[:, :, None] * attn_states, 0)
        output, next_state = cell([inputs, context], state)
        next_state = (1.0 - mask) * state + mask * next_state

        return [next_state, context]

    with ops.variable_scope(scope or "decoder"):
        if mapped_states is None:
            mapped_states = attention(None, attention_states, None, None,
                                      att_size)
        seq = [inputs, mask]
        outputs_info = [initial_state, None]
        non_seq = [attention_states, attention_mask, mapped_states]
        (states, contexts) = ops.scan(loop_fn, seq, outputs_info, non_seq)

    return states, contexts
예제 #4
0
    def scan(self, y_emb, mask, cwkeys, cpkeys, fwkeys, fpkeys, key_mask, values, initial_state):
        """
        build model
        :return: 
        """
        seq = [y_emb, mask]
        outputs_info = [initial_state, None, None, None]
        non_seq = [cwkeys, cpkeys, fwkeys, fpkeys, values, key_mask]
        (states, contexts, y_pos_states, posscores) = ops.scan(self.step, seq, outputs_info, non_seq)

        return states, contexts, y_pos_states, posscores
    def scan(self, y_emb, mask, keys, key_mask, values, initial_state):
        """
        build model
        :return: 
        """
        seq = [y_emb, mask]
        outputs_info = [initial_state, None]
        non_seq = keys + values + key_mask
        (states, contexts) = ops.scan(self.step, seq, outputs_info, non_seq)

        return states, contexts
예제 #6
0
파일: rnnsearch.py 프로젝트: Playinf/nmt
def gru_encoder(cell, inputs, mask, initial_state=None, dtype=None):
    #if not isinstance(cell, nn.rnn_cell.gru_cell):
    #    raise ValueError("only gru_cell is supported")

    if isinstance(inputs, (list, tuple)):
        raise ValueError("inputs must be a tensor, not list or tuple")

    def loop_fn(inputs, mask, state):
        mask = mask[:, None]
        output, next_state = cell(inputs, state)
        next_state = (1.0 - mask) * state + mask * next_state
        return next_state

    if initial_state is None:
        batch = inputs.shape[1]
        state_size = cell.state_size
        initial_state = theano.tensor.zeros([batch, state_size], dtype=dtype)

    seq = [inputs, mask]
    states = ops.scan(loop_fn, seq, [initial_state])

    return states
예제 #7
0
def gru_encoder(cell, inputs, mask, initial_state=None, dtype=None):
    if not isinstance(cell, nn.rnn_cell.rnn_cell):
        raise ValueError("cell is not an instance of rnn_cell")

    if isinstance(inputs, (list, tuple)):
        raise ValueError("inputs must be a tensor, not list or tuple")

    def loop_fn(inputs, mask, state):
        mask = mask[:, None]
        output, next_state = cell(inputs, state)
        next_state = (1.0 - mask) * state + mask * next_state
        return next_state

    if initial_state is None:
        batch = inputs.shape[1]
        state_size = cell.state_size
        initial_state = theano.tensor.zeros([batch, state_size], dtype=dtype)

    seq = [inputs, mask]
    # ops.scan is a wrapper of theano.scan, which automatically add updates to
    # optimizer, you can set return_updates=True to behave like Theano's scan
    states = ops.scan(loop_fn, seq, [initial_state])

    return states