Exemple #1
0
def _beam_search_one_step(_step_score, _state, output_score, number_of_samples, beam_size, state_dim, output_score_list, prev_output_index_list, output_label_id_list, embedding, _tensors_to_debug=None):
    output_dim = K.shape(_step_score)[1]    # nb_samples*beam_size, output_dim
    # accumulate score
    _score = K.expand_dims(output_score) + K.log(_step_score)    # nb_samples*beam_size, output_dim
    # select top output labels for each sample
    _score = K.reshape(_score, shape=K.pack([number_of_samples, beam_size * output_dim ]))    # nb_samples, beam_size* output_dim
    _top_score , _top_indice = top_k (_score, beam_size)    # -1, beam_size
    # update accumulated output score
    output_score_list.append (_top_score)
    output_score = K.reshape(_top_score, shape=(-1,))    # nb_samples * beam_size

    # update output label and previous output index
    # _top_indice = beam_id * output_dim + output_label_id
    prev_output_index = _top_indice // output_dim
    prev_output_index_list.append(prev_output_index)
    output_label_id = _top_indice - prev_output_index * output_dim
    output_label_id_list.append (output_label_id)
    # update current input and current_state
    current_input = embedding (K.reshape(output_label_id, shape=(-1,)))    # nb_samples* beam_siz, input_dim
    # _state : nb_samples*beam_size, state_dim
    # first reshape _state to nb_samples, beam_size, state_dim
    # then gather by sample to get a tensor with the shape: nb_samples, beam_size, state_dim
    # finally reshape to nb_samples*beam_size, state_dim
    # note that prev_output_index has a shape of -1, beam_size, so should be reshape to nb_samples, beam_size before calling gather_by_sample
    current_state = K.reshape (gather_by_sample(K.reshape(_state, shape=K.pack([number_of_samples , beam_size , state_dim ])), K.reshape(prev_output_index, shape=K.pack([number_of_samples, beam_size]))), shape=K.pack([number_of_samples * beam_size , state_dim ]))
    if _tensors_to_debug is not None:
        _tensors_to_debug += [_score, _top_score, _top_indice]
    return output_score, current_input, current_state
Exemple #2
0
def accumulate(attend_function, inputs, input_length,
                                mask=None, return_probabilities=False):
    '''get the running attention over a sequence. 

    given a 3dim tensor where the 1st dim is time (or not. whatever.),  calculating the running attended sum.
    in other words, at the first time step, you only have that item.
                    at the second time step, attend over the first two items.
                    at the third..  the third. so on. 

    this basically a mod on keras' rnn implementation
    author: bcm
    '''

    ndim = inputs.ndim
    assert ndim >= 3, 'inputs should be at least 3d'

    axes = [1,0] + list(range(2, ndim))
    inputs = inputs.dimshuffle(axes)

    indices = list(range(input_length))

    successive_outputs = []
    if mask is not None:
        if mask.ndim == ndim-1:
            mask = K.expand_dims(mask)
        assert mask.ndim == ndim
        mask = mask.dimshuffle(axes)
        prev_output = None

    successive_outputs = []
    successive_pvecs = []
    uncover_mask = K.zeros_like(inputs)
    uncover_indices = K.arange(input_length)
    for _ in range(ndim-1):
        uncover_indices = K.expand_dims(uncover_indices)
    make_subset = lambda i,X: K.switch(uncover_indices <= i, X, uncover_mask)
    for i in indices:
        inputs_i = make_subset(i,inputs)
        mask_i = make_subset(i,mask)
        if mask is not None:
            output = attend_function(inputs_i, mask_i) # this should not output the time dimension; it should be marginalized over. 
        else:
            output = attend_function(inputs_i) # this should not output the time dimension; it should be marginalized over. 
        if return_probabilities:
            output, p_vectors = output
            successive_pvecs.append(p_vectors)
        assert output.ndim == 2, "Your attention function is malfunctioning; the attention accumulator should return 2 dimensional tensors"
        successive_outputs.append(output)
    outputs = K.pack(successive_outputs)
    K.squeeze(outputs, -1)
    axes = [1, 0] + list(range(2, outputs.ndim))
    outputs = outputs.dimshuffle(axes)

    if return_probabilities:
        out_pvecs = K.pack(successive_pvecs)
        K.squeeze(out_pvecs, -1)
        out_pvecs = out_pvecs.dimshuffle(axes)
        outputs = [outputs, out_pvecs]

    return outputs
Exemple #3
0
        def A_network_output(x):
            # The input of this layer is [L, mu, a] in concatenated form. We first split
            # those up.
            idx = 0
            L_flat = x[:, idx:idx + (self.nb_actions * self.nb_actions + self.nb_actions) / 2]
            idx += (self.nb_actions * self.nb_actions + self.nb_actions) / 2
            mu = x[:, idx:idx + self.nb_actions]
            idx += self.nb_actions
            a = x[:, idx:idx + self.nb_actions]
            idx += self.nb_actions

            # Create L and L^T matrix, which we use to construct the positive-definite matrix P.
            Ls = []
            LTs = []
            for idx in xrange(self.batch_size):
                L = K.zeros((self.nb_actions, self.nb_actions))
                L = T.set_subtensor(L[np.tril_indices(self.nb_actions)], L_flat[idx, :])
                diag = K.exp(T.diag(L))
                L = T.set_subtensor(L[np.diag_indices(self.nb_actions)], diag)
                Ls.append(L)
                LTs.append(K.transpose(L))
                # TODO: diagonal elements exp
            L = K.pack(Ls)
            LT = K.pack(LTs)
            P = K.batch_dot(L, LT, axes=(1, 2))
            assert K.ndim(P) == 3

            # Combine a, mu and P into a scalar (over the batches).
            A = -.5 * K.batch_dot(K.batch_dot(a - mu, P, axes=(1, 2)), a - mu, axes=1)
            assert K.ndim(A) == 2
            return A
Exemple #4
0
        def A_network_output(x):
            # The input of this layer is [L, mu, a] in concatenated form. We first split
            # those up.
            idx = 0
            L_flat = x[:, idx:idx + (self.nb_actions * self.nb_actions + self.nb_actions) / 2]
            idx += (self.nb_actions * self.nb_actions + self.nb_actions) / 2
            mu = x[:, idx:idx + self.nb_actions]
            idx += self.nb_actions
            a = x[:, idx:idx + self.nb_actions]
            idx += self.nb_actions

            # Create L and L^T matrix, which we use to construct the positive-definite matrix P.
            Ls = []
            LTs = []
            for idx in xrange(self.batch_size):
                L = K.zeros((self.nb_actions, self.nb_actions)) 
                L = T.set_subtensor(L[np.tril_indices(self.nb_actions)], L_flat[idx, :])
                diag = K.exp(T.diag(L))
                L = T.set_subtensor(L[np.diag_indices(self.nb_actions)], diag)
                Ls.append(L)
                LTs.append(K.transpose(L))
                # TODO: diagonal elements exp
            L = K.pack(Ls)
            LT = K.pack(LTs)
            P = K.batch_dot(L, LT, axes=(1, 2))
            assert K.ndim(P) == 3

            # Combine a, mu and P into a scalar (over the batches).
            A = -.5 * K.batch_dot(K.batch_dot(a - mu, P, axes=(1, 2)), a - mu, axes=1)
            assert K.ndim(A) == 2
            return A
Exemple #5
0
def accumulate(attend_function, inputs, input_length,
                                mask=None, return_probabilities=False):
    '''get the running attention over a sequence. 

    given a 3dim tensor where the 1st dim is time (or not. whatever.),  calculating the running attended sum.
    in other words, at the first time step, you only have that item.
                    at the second time step, attend over the first two items.
                    at the third..  the third. so on. 

    this basically a mod on keras' rnn implementation
    author: bcm
    '''

    ndim = inputs.ndim
    assert ndim >= 3, 'inputs should be at least 3d'

    axes = [1,0] + list(range(2, ndim))
    inputs = inputs.dimshuffle(axes)

    indices = list(range(input_length))

    successive_outputs = []
    if mask is not None:
        if mask.ndim == ndim-1:
            mask = K.expand_dims(mask)
        assert mask.ndim == ndim
        mask = mask.dimshuffle(axes)
        prev_output = None

    successive_outputs = []
    successive_pvecs = []
    uncover_mask = K.zeros_like(inputs)
    uncover_indices = K.arange(input_length)
    for _ in range(ndim-1):
        uncover_indices = K.expand_dims(uncover_indices)
    make_subset = lambda i,X: K.switch(uncover_indices <= i, X, uncover_mask)
    for i in indices:
        inputs_i = make_subset(i,inputs)
        mask_i = make_subset(i,mask)
        if mask is not None:
            output = attend_function(inputs_i, mask_i) # this should not output the time dimension; it should be marginalized over. 
        else:
            output = attend_function(inputs_i) # this should not output the time dimension; it should be marginalized over. 
        if return_probabilities:
            output, p_vectors = output
            successive_pvecs.append(p_vectors)
        assert output.ndim == 2, "Your attention function is malfunctioning; the attention accumulator should return 2 dimensional tensors"
        successive_outputs.append(output)
    outputs = K.pack(successive_outputs)
    K.squeeze(outputs, -1)
    axes = [1, 0] + list(range(2, outputs.ndim))
    outputs = outputs.dimshuffle(axes)

    if return_probabilities:
        out_pvecs = K.pack(successive_pvecs)
        K.squeeze(out_pvecs, -1)
        out_pvecs = out_pvecs.dimshuffle(axes)
        outputs = [outputs, out_pvecs]

    return outputs
Exemple #6
0
def beam_search(initial_input, initial_state, constant_context, embedding, step_func, beam_size=1, max_length=20):
    '''Returns a lattice with time steps = max_length and beam size = beam_size; each node of the lattice at time step t has a parent node at time step t-1, an accumulated score, and a label as its output.

    # Parameters
    ----------
    initial_input : a tensor with a shape of nb_samples, representing the initial input used by the step function
    initial_state: a tensor with a shape of nb_samples,state_dim, representing the initial state used by the step function
    constant_context: a tensor with a shape of nb_samples,context_dim, representing the context tensor used by the step function
    embedding: an embedding layer that maps input/output labels to their embedding
    step_func: in a form like step_func(current_input, current_state, constant_context), which returns a score tensor and a tensor representing the updated state
    beam_size: beam size
    max_length: max time steps to expand

    # Returns
    ------
    output_label_id_tensor: a tensor with a shape of max_length, nb_samples, beam_size of type int32, representing labels of nodes
    prev_output_index_tensor: a tensor with a shape of max_length, nb_samples, beam_size of type int32, representing parent's indexes (in the range of 0..beam_size-1) of nodes
    output_score_tensor: a tensor with a shape of max_length, nb_samples, beam_size of type float32, representing accumulated scores of nodes
    '''
    number_of_samples = K.shape(initial_input)[0]
    state_dim = K.shape(initial_state)[K.ndim(initial_state) - 1]
    current_input = repeat(initial_input, beam_size)    # shape: nb_samples*beam_size, input_dim
    current_state = repeat(initial_state, beam_size)    # shape: nb_samples*beam_size, state_dim
    constant_context = repeat(constant_context, beam_size)    # shape: nb_samples*beam_size,context_input_dim
    output_score = K.sum(K.zeros_like(current_state), -1)    # shape: nb_samples*beam_size

    output_score_list = []    # nb_samples, beam_size
    output_label_id_list = []
    prev_output_index_list = []    # the index of candidate from which current label id is generated

    for _ in xrange(max_length):
        _step_score, _state = step_func(current_input, current_state, constant_context)    # nb_samples*beam_size , output_dim
        output_score, current_input, current_state = _beam_search_one_step(_step_score, _state, output_score, number_of_samples, beam_size, state_dim, output_score_list, prev_output_index_list, output_label_id_list, embedding)
    # returning a list instead of a tuple of tensors so that keras will know multiple output tensors are generated
    return [K.pack(output_label_id_list), K.pack(prev_output_index_list), K.pack(output_score_list)]
Exemple #7
0
def repeat(x, n):
    x_shape = K.shape(x)
    x_ndim = K.ndim(x)
    # to 1D tensor
    x_tiled = K.tile(K.reshape(x, (-1,)), n)
    # re-shape to (n,...)
    x_tiled_shape = K.pack([n] + [x_shape[i] for i in range(x_ndim)])
    output = K.reshape(x_tiled, x_tiled_shape)
    pattern = [1, 0] + [i + 1 for i in range(1, x_ndim)]
    output = K.permute_dimensions(output, pattern)
    output_shape = K.pack([n * x_shape[0]] + [x_shape[i] for i in range(1, x_ndim)])
    return K.reshape(output, output_shape)
Exemple #8
0
	def get_initial_states(self, x):
		M = K.zeros_like(x[:, 0, 0])  # (nb_samples,)
		M = K.pack([M] * self.nb_slots)  # (nb_slots, nb_samples)
		M = K.pack([M] * self.memory_size)  # (memory_size, nb_slots, nb_samples)
		M = K.permute_dimensions(M, (2, 1, 0))  # (nb_samples, nb_slots, memory_size)
		h = K.zeros_like(x[:, 0, 0])  # (nb_samples,)
		h = K.pack([h] * self.memory_size)  # (memory_size, nb_samples)
		h = K.permute_dimensions(h, (1, 0))  # (nb_samples, memory_size)
		w = K.zeros_like(x[:, 0, 0])  # (nb_samples,)
		w = K.pack([w] * self.nb_slots)  # (nb_slots, nb_samples)
		w = K.permute_dimensions(w, (1, 0))  # (nb_samples, nb_slots)
		states = [M, h, w]
		return states
Exemple #9
0
 def get_initial_states(self, x):
     M = K.zeros_like(x[:, 0, 0])  # (nb_samples,)
     M = K.pack([M] * self.nb_slots)  # (nb_slots, nb_samples)
     M = K.pack([M] *
                self.memory_size)  # (memory_size, nb_slots, nb_samples)
     M = K.permute_dimensions(
         M, (2, 1, 0))  # (nb_samples, nb_slots, memory_size)
     h = K.zeros_like(x[:, 0, 0])  # (nb_samples,)
     h = K.pack([h] * self.memory_size)  # (memory_size, nb_samples)
     h = K.permute_dimensions(h, (1, 0))  # (nb_samples, memory_size)
     w = K.zeros_like(x[:, 0, 0])  # (nb_samples,)
     w = K.pack([w] * self.nb_slots)  # (nb_slots, nb_samples)
     w = K.permute_dimensions(w, (1, 0))  # (nb_samples, nb_slots)
     states = [M, h, w]
     return states
Exemple #10
0
	def step(self, x, states):
		states = list(states)
		state_index = 0
		if self.decode:
			x = states[0]
			_x = x
			states = states[1:]
		for i in range(len(self.model.layers)):
			layer = self.model.layers[i]
			if self.readout and i == 0:
				if self.readout in ['add', True]:
					x += states[-1]
				elif self.readout == 'mul':
					x *= states[-1]
				elif self.readout == 'pack':
					x = K.pack([x, states[-1]])
				elif self.readout == 'readout_only':
					x = states[-1]
			if _isRNN(layer):
				if self.state_sync:
					x, new_states = layer._step(x, states[:len(layer.states)])
					states[:len(layer.states)] = new_states
				else:
					x, new_states = layer._step(x, states[state_index : state_index + len(layer.states)])
					states[state_index : state_index + len(layer.states)] = new_states
					state_index += len(layer.states)
			else:
				x = layer.call(x)
		if self.decode:
			states = [_x] + states
		if self.readout:
			states[-1] = x
		return x, states
Exemple #11
0
 def test_get_k_best_from_lattice(self):
     nb_samples = 2
     beam_size = 3
     time_steps = 2
     _tensors_to_debug = []
     output_label_id_list = [
         K.placeholder(shape=(nb_samples, beam_size), dtype='int32')
         for _ in range(time_steps)
     ]
     prev_output_index_list = [
         K.placeholder(shape=(nb_samples, beam_size), dtype='int32')
         for _ in range(time_steps)
     ]
     output_score_list = [
         K.placeholder(shape=(nb_samples, beam_size))
         for _ in range(time_steps)
     ]
     lattice = (K.pack(output_label_id_list),
                K.pack(prev_output_index_list), K.pack(output_score_list))
     output, output_score = get_k_best_from_lattice(
         lattice, k=2, eos=-1, _tensors_to_debug=_tensors_to_debug)
     f = K.function(inputs=output_label_id_list + prev_output_index_list +
                    output_score_list,
                    outputs=[output, output_score] + _tensors_to_debug)
     output_label_id_list_val = [[[3, 2, 1], [1, 3, -1]],
                                 [[2, 1, 3], [3, -1, -1]]]
     prev_output_index_list_val = [[[0, 0, 0], [0, 0, 0]],
                                   [[0, 1, 2], [2, 2, 1]]]
     output_score_list_val = [[[-0.1, -0.2, -0.3], [-0.25, -0.36, -0.45]],
                              [[-0.6, -0.5, -0.7], [-0.9, -1.2, -0.75]]]
     output_0 = [[2, 1], [3, 2]]
     output_1 = [[-1, 3], [3, -1]]
     output_val_ref = [output_0, output_1]  # nb_samples, k, time_steps
     output_score_val_ref = [[-0.5, -0.6], [-0.45, -0.75]]
     outputs_val = f(output_label_id_list_val + prev_output_index_list_val +
                     output_score_list_val)
     output_val, output_score_val = outputs_val[:2]
     self.assertTrue(
         np.sum(np.abs(output_score_val - output_score_val_ref)) < 0.001,
         "output_score_val")
     self.assertTrue(np.array_equal(output_val, output_val_ref),
                     "output_val")
Exemple #12
0
    def dot(x, y):
        '''Multiplies 2 tensors.
        When attempting to multiply a ND tensor
        with a ND tensor, reproduces the Theano behavior
        (e.g. (2, 3).(4, 3, 5) = (2, 4, 5))
        '''
        ndim_x = K.ndim(x)
        ndim_y = K.ndim(y)

        if ndim_x is not None and ndim_x > 2 or ndim_y > 2:
            x_shape = tf.shape(x)
            y_shape = tf.shape(y)
            y_permute_dim = list(range(ndim_y))
            y_permute_dim = [y_permute_dim.pop(-2)] + y_permute_dim
            xt = tf.reshape(x, K.pack([-1, x_shape[ndim_x - 1]]))
            yt = tf.reshape(tf.transpose(y, perm=y_permute_dim), K.pack([y_shape[ndim_y - 2], -1]))
            target_shape = [x_shape[i] for i in range(ndim_x - 1)] + [y_shape[i] for i in range(ndim_y - 2)] + [y_shape[ndim_y - 1]]
            return tf.reshape(tf.matmul(xt, yt), K.pack(target_shape))
        out = tf.matmul(x, y)
        return out
Exemple #13
0
def accumulate(attend_function,
               inputs,
               input_length,
               go_backwards=False,
               mask=None):
    '''get the running attention over a sequence. 

    given a 3dim tensor where the 1st dim is time (or not. whatever.),  calculating the running attended sum.
    in other words, at the first time step, you only have that item.
                    at the second time step, attend over the first two items.
                    at the third..  the third. so on. 

    this basically a mod on keras' rnn implementation
    author: bcm
    '''

    ndim = inputs.ndim
    assert ndim >= 3, 'inputs should be at least 3d'

    axes = [1, 0] + list(range(2, ndim))
    inputs = inputs.dimshuffle(axes)

    indices = list(range(input_length))
    if go_backwards:
        indices = indices[::-1]

    successive_outputs = []
    if mask is not None:
        if mask.ndim == ndim - 1:
            mask = expand_dims(mask)
        assert mask.ndim == ndim
        mask = mask.dimshuffle(axes)
        prev_output = None

    successive_outputs = []
    for i in indices:
        if mask is not None:
            output = attend_function(
                inputs[:i + 1], mask[:i + 1]
            )  # this should not output the time dimension; it should be marginalized over.
        else:
            output = attend_function(
                inputs[:i + 1]
            )  # this should not output the time dimension; it should be marginalized over.
        assert output.ndim == 2, "Your attention function is malfunctioning; the attention accumulator should return 2 dimensional tensors"
        successive_outputs.append(output)
    outputs = K.pack(successive_outputs)
    K.squeeze(outputs, -1)
    # current assumption. modify if that changes.
    axes = [1, 0] + list(range(2, outputs.ndim))
    outputs = outputs.dimshuffle(axes)
    return outputs
Exemple #14
0
 def step(self, x, states):
     states = list(states)
     state_index = 0
     if self.decode:
         x = states[0]
         _x = x
         states = states[1:]
     for i in range(len(self.model.layers)):
         layer = self.model.layers[i]
         if self.readout and (
             (i == 0 and self.readout != 'call') or
             (self.readout == 'call' and hasattr(layer, 'receive_readout')
              and layer.receive_readout)):
             readout = states[-1]
             if self._truth_tensor is not None:
                 slices = [
                     slice(None),
                     states[-2][0] - K.switch(states[-2][0], 1, 0)
                 ] + [slice(None)] * (K.ndim(self._truth_tensor) - 2)
                 readout = K.in_train_phase(
                     K.switch(states[-2][0], self._truth_tensor[slices],
                              readout), readout)
             if self.readout in ['add', True]:
                 x += readout
             elif self.readout == 'mul':
                 x *= readout
             elif self.readout == 'pack':
                 x = K.pack([x, readout])
             elif self.readout == 'readout_only':
                 x = readout
             elif self.readout == 'call':
                 x = [x, readout]
         if _isRNN(layer):
             if self.state_sync:
                 x, new_states = layer._step(x, states[:len(layer.states)])
                 states[:len(layer.states)] = new_states
             else:
                 x, new_states = layer._step(
                     x, states[state_index:state_index + len(layer.states)])
                 states[state_index:state_index +
                        len(layer.states)] = new_states
                 state_index += len(layer.states)
         else:
             x = layer.call(x)
     if self.decode:
         states = [_x] + states
     if self.readout:
         if self._truth_tensor is not None:
             states[-2] += 1
         states[-1] = x
     return x, states
Exemple #15
0
 def call(self, x, mask=None):
     input_vector = x[0]
     target_classes = x[1]
     nb_req_classes = self.input_spec[1].shape[1]
     if nb_req_classes is None:
         nb_req_classes = K.shape(target_classes)
     if K.dtype(target_classes) != 'int32':
         target_classes = K.cast(target_classes, 'int32')
     if self.mode == 0:
         # One giant matrix mul
         input_dim = self.input_spec[0].shape[1]
         nb_req_classes = self.input_spec[1].shape[1]
         path_lengths = map(len, self.paths)
         huffman_codes = K.variable(np.array(self.huffman_codes))
         req_nodes = K.gather(self.class_path_map, target_classes)
         req_W = K.gather(self.W, req_nodes)
         y = K.batch_dot(input_vector, req_W, axes=(1, 3))
         if self.bias:
             req_b = K.gather(self.b, req_nodes)
             y += req_b
         y = K.sigmoid(y[:, :, :, 0])
         req_huffman_codes = K.gather(huffman_codes, target_classes)
         return K.prod(req_huffman_codes + y - 2 * req_huffman_codes * y,
                       axis=-1)  # Thug life
     elif self.mode == 1:
         # Many tiny matrix muls
         probs = []
         for i in range(len(self.paths)):
             huffman_code = self.huffman_codes[i]
             path = self.paths[i]
             prob = 1.
             for j in range(len(path)):
                 node = path[j]
                 node_index = self.node_indices[node]
                 p = K.dot(input_vector, self.W[node_index, :, :])[:, 0]
                 if self.bias:
                     p += self.b[node_index, :][0]
                 h = huffman_code[j]
                 p = K.sigmoid(p)
                 prob *= h + p - 2 * p * h
             probs += [prob]
         probs = K.pack(probs)
         req_probs = K.gather(probs, target_classes)
         req_probs = K.permute_dimensions(req_probs, (0, 2, 1))
         req_probs = K.reshape(req_probs, (-1, nb_req_classes))
         batch_size = K.shape(input_vector)[0]
         indices = arange(batch_size * batch_size, batch_size + 1)
         req_probs = K.gather(req_probs, indices)
         return req_probs
Exemple #16
0
def get_k_best_from_lattice(lattice, k=1, eos=None, _tensors_to_debug=None):
    '''Selects top k best path from a lattice in a descending order by their scores

    # Parameters
    ----------
    lattice : a triple consisting of output_label_id_tensor, prev_output_index_tensor and output_score_tensor. This lattice is generated by calling beam_search.
    k: the number of path to select from that lattice
    eos: if not None, it is the id of the label that represents the end of sequence

    # Returns
    ------
    sequence: a tensor of type int32 with a shape of nb_samples, k, time_stpes, representing the top-k best sequences
    sequence_score: a tensor of type float32 with a shape of nb_samples, k, representing the scores of the top-k best sequences
    '''
    lattice = [unpack(_) for _ in  lattice]
    for l in lattice: l.reverse()
    output_label_id_list, prev_output_index_list, output_score_list = lattice
    sequence_score, output_indice = top_k (output_score_list[0], k)    # shape: nb_samples,k
    if _tensors_to_debug is not None:
        _tensors_to_debug.append(sequence_score)
        _tensors_to_debug.append(output_indice)

    nb_samples = K.shape(sequence_score)[0]
    # fill sequence and update sequence_score
    sequence = []
    for cur_output_score, output_label_id, prev_output_index in zip(output_score_list, output_label_id_list, prev_output_index_list):
        sequence_score_candidate = K.reshape(gather_by_sample(cur_output_score, output_indice), shape=K.pack([nb_samples, k]))
        sequence.append (K.reshape(gather_by_sample(output_label_id, output_indice), shape=K.pack([nb_samples, k])))    # shape: -1,  k, nb_samples could be -1
        if eos is not None and len(sequence) > 1:
            cond = K.equal(sequence[-1], eos)
            sequence_score = choose_by_cond(cond, sequence_score_candidate, sequence_score)
            if _tensors_to_debug is not None:
                _tensors_to_debug.append(cond)
                _tensors_to_debug.append(sequence_score_candidate)
                _tensors_to_debug.append(sequence_score)
        output_indice = gather_by_sample(prev_output_index, output_indice)
        if _tensors_to_debug is not None:
            _tensors_to_debug.append(output_indice)

    if eos is not None and len(sequence) > 1:
        sequence_score, output_indice = top_k(sequence_score, k)
        sequence = [gather_by_sample(_, output_indice) for _ in sequence]

    # reverse the sequence so we get sequence from time step 0, 1, ...,
    sequence.reverse()
    sequence = K.permute_dimensions(K.pack(sequence), (1, 2, 0))    # time_steps, nb_samples, k -> nb_samples, k, time_steps
    return sequence, sequence_score
Exemple #17
0
    def gather_by_sample(x, indices):
        '''Performs gather operation along the first dimension, i.e., ret[i] = gather( x[i], indices[i]).
        For example, when x is a matrix, and indices is a vector, it selects one element for each row from x.
        Note that this is different from gather, which selects |indices| ndim-1 sub tensors (i.e., x[i], where i = indices[:::]) from x

        # Parameters
        ----------
        x : a tensor with a shape nb_samples, ...; its number of dimensions >= 2
        indices : a tensor of type int with a shape nb_sample,...; its number of dimensions <= # of dimensions of x - 1

        # Returns
        ------
        a tensor with the shape of nb_samples, ..., where ret[i,:::,:::]= x[i,indices[i,:::],:::]; and its number of dimensions = # dimensions of x + # dimension of indices - 2
        '''
        y_list = []
        for x_i , i in zip(unpack(x), unpack(indices)):
            y_i = K.gather(x_i, i)
            y_list.append(y_i)
        return K.pack(y_list)
Exemple #18
0
    def gather_by_sample(x, indices):
        '''Performs gather operation along the first dimension, i.e., ret[i] = gather( x[i], indices[i]).
        For example, when x is a matrix, and indices is a vector, it selects one element for each row from x.
        Note that this is different from gather, which selects |indices| ndim-1 sub tensors (i.e., x[i], where i = indices[:::]) from x

        # Parameters
        ----------
        x : a tensor with a shape nb_samples, ...; its number of dimensions >= 2
        indices : a tensor of type int with a shape nb_sample,...; its number of dimensions <= # of dimensions of x - 1

        # Returns
        ------
        a tensor with the shape of nb_samples, ..., where ret[i,:::,:::]= x[i,indices[i,:::],:::]; and its number of dimensions = # dimensions of x + # dimension of indices - 2
        '''
        x_shape = K.shape(x)
        nb_samples = x_shape[0]
        ones = tf.ones(shape=K.pack([nb_samples]), dtype='int32')
        elems = tf.scan(lambda prev, one: prev + one , ones, initializer=tf.constant(-1, dtype='int32'))
        def _step(prev, i):
            x_i = K.gather(x, i)
            indices_i = K.gather(indices, i)
            return K.gather(x_i, indices_i)
        return tf.scan(_step , elems, initializer=tf.zeros(shape=x_shape[1:], dtype=x.dtype))
Exemple #19
0
 def reverse(x):
     x_list = tf.unpack(x)
     x_list.reverse()
     return K.pack(x_list)
Exemple #20
0
def unroll_scan(fn,
                sequences=None,
                initial_values=None,
                non_sequences=None,
                n_steps=None,
                batch=False):
    """Limited reimplementation of theano.scan() by unrolling.

    Based on unroll_scan() from Lasagne.
    """
    sequences = _to_list(sequences)
    initial_values = _to_list(initial_values)
    non_sequences = _to_list(non_sequences)

    sequential_outputs = []
    previous = initial_values
    for i in range(n_steps):
        if not batch:
            args = [s[i] for s in sequences]
        else:
            args = [s[:, i] for s in sequences]
        args += previous + non_sequences
        outputs = _to_list(fn(*args))
        sequential_outputs.append(outputs)
        previous = outputs

    # Output formatting. sequential_ouputs is now a list of lists, the
    # outer containing an item for each of the steps (n_steps in
    # total) and each of the inner containing the outputs of the step
    # function fn, i.e.
    #
    # [ [ step_1_out_1 step_1_out_2 ... step_1_out_o ]
    #   [ step_2_out_1 step_2_out_2 ... step_2_out_o ]
    #   ...
    #   [ step_n_out_1 step_n_out_2 ... step_n_out_o ] ]
    #
    # these must be reorganized into the theano.scan() order
    #
    # [ [ step_1_out_1 step_2_out_1 ... step_n_out_1 ]
    #   [ step_1_out_2 step_2_out_2 ... step_n_out_2 ]
    #   ....
    #   [ step_1_out_n step_2_out_2 ... step_n_out_o ] ]
    #
    # i.e from (n_steps, n_outputs) to (n_outputs, n_steps). Also,
    # the various step values for each output should be packed into
    # a tensor (instead of a list), giving [ out_1_steps, out_2_steps,
    # ... out_o_steps ].
    #
    # Then, if run in batch mode, each of the output tensors will have
    # shape (n_steps, batch_size, ...), which should be permuted into
    # (batch_size, n_steps, ...).
    #
    # Finally, following the model of theano.scan(), if there is only
    # a single output, return the corresponding tensor t instead of
    # a list [t] with a single elements, and if there are no outputs,
    # return None instead of an empty list.

    # Reorganize and pack
    output_sequences = []
    n_outputs = len(sequential_outputs[0])
    for o in range(n_outputs):
        outs = [s[o] for s in sequential_outputs]
        output_sequences.append(K.pack(outs))

    # Permute if batchwise
    if batch:
        for o, s in enumerate(output_sequences):
            dim_indices = range(K.ndim(s))  # [0, 1, ...]
            pattern = [1, 0] + dim_indices[2:]  # [1, 0, ...]
            output_sequences[o] = K.permute_dimensions(s, pattern)

    # Remove list wrapping
    if len(output_sequences) == 0:
        output = None
    elif len(output_sequences) == 1:
        output = output_sequences[0]
    else:
        output = output_sequences

    return output, None  # None for updates dummy