예제 #1
0
def _mask_sequences_tensor(sequence,
                           sequence_length,
                           dtype=None,
                           time_major=False,
                           tensor_rank=2):
    """Masks out sequence entries that are beyond the respective sequence
    lengths. Masks along the time dimension.

    Args:
        sequence: A Tensor of sequence values.

            If `time_major=False` (default), this must be a Tensor of shape:
                `[batch_size, max_time, d_2, ..., d_rank]`, where the rank of
                the Tensor is specified with :attr:`tensor_rank`.

            If `time_major=True`, this must be a Tensor of shape:
                `[max_time, batch_size, d_2, ..., d_rank].`
        sequence_length: A Tensor of shape `[batch_size]`. Time steps beyond
            the respective sequence lengths will be made zero.
        dtype (dtype): Type of :attr:`sequence`. If `None`, infer from
            :attr:`sequence` automatically.
        time_major (bool): The shape format of the inputs. If `True`,
            :attr:`sequence` must have shape
            `[max_time, batch_size, d_2, ..., d_rank]`.
            If `False` (default), :attr:`sequence` must have
            shape `[batch_size, max_time, d_2, ..., d_rank]`.
        tensor_rank (int): The number of dimensions of :attr:`sequence`.
            Default is 2, i.e., :attr:`sequence` is a 2D Tensor consisting
            of batch and time dimensions.

    Returns:
        The masked sequence, i.e., a Tensor of the same shape as
        :attr:`sequence` but with masked-out entries (set to zero).
    """
    if tensor_rank is None:
        tensor_rank = 2
    if tensor_rank < 2:
        raise ValueError(
            "tensor_rank must be > 2. Got tensor_rank = {}".format(
                tensor_rank))
    if time_major:
        sequence = rnn._transpose_batch_time(sequence)
    max_time = tf.to_int32(tf.shape(sequence)[1])
    if dtype is None:
        dtype = sequence.dtype
    mask = tf.sequence_mask(tf.to_int32(sequence_length),
                            max_time,
                            dtype=dtype)
    for _ in range(2, tensor_rank):
        mask = tf.expand_dims(mask, axis=-1)
    sequence = sequence * mask
    if time_major:
        sequence = rnn._transpose_batch_time(sequence)
    return sequence
예제 #2
0
    def decoder_p3(self, inputs, reuse, max_time, char_sequence_length):
        #        _inputs_ta = tf.TensorArray(dtype=tf.float32, size=max_time,name='context_array')
        #	_inputs_ta = _inputs_ta.unstack(tf.transpose(inputs,[1,0,2]))
        _inputs_ta = inputs
        outputs_ta = tf.TensorArray(dtype=tf.float32, size=max_time, name='pred_char_array')

        cell = tf.contrib.rnn.LSTMCell(self.decoder_p3_units)

        def loop_fn(time, cell_output, cell_state, loop_state):
            next_loop_state = loop_state
            emit_output = cell_output  # == None for time == 0
            if cell_output is None:  # time == 0
                next_cell_state = cell.zero_state(self.batch_size, tf.float32)
                next_input = tf.concat(
                    [tf.zeros(shape=[self.batch_size, self.dict_length], dtype=tf.float32), _inputs_ta.read(time)],
                    axis=-1)
                next_loop_state = outputs_ta
            else:

                next_cell_state = cell_state
                prediction = tf.layers.dense(inputs=cell_output, activation=None, units=self.dict_length)
                next_loop_state = loop_state.write(time - 1, prediction)
                next_input = tf.concat([prediction, _inputs_ta.read(time)], axis=-1)
            # argmax seems to be working a bit better, funny as it's not differentiable
            # next_input = tf.concat([tf.one_hot(tf.argmax(prediction, -1), depth=self.dict_length, axis=-1), _inputs_ta.read(time)],axis=-1)
            elements_finished = (time >= char_sequence_length - 1)

            return (elements_finished, next_input, next_cell_state, emit_output, next_loop_state)

        with tf.variable_scope('decoder_p3', reuse=reuse):
            _, _, loop_ta = tf.nn.raw_rnn(cell, loop_fn)
            output = _transpose_batch_time(loop_ta.stack())
        return output
예제 #3
0
    def vanilla_decoder(self, inputs, reuse):

        outputs_ta = tf.TensorArray(dtype=tf.float32, size=max_time, name='pred_char_array')

        cell = tf.contrib.rnn.LSTMCell(self.decoder_p3_units)

        def loop_fn(time, cell_output, cell_state, loop_state):
            next_loop_state = loop_state
            emit_output = cell_output  # == None for time == 0
            if cell_output is None:  # time == 0
                next_cell_state = cell.zero_state(self.batch_size, tf.float32)
                next_input = tf.concat(
                    [tf.zeros(shape=[self.batch_size, self.dict_length + self.lat_word_dim], dtype=tf.float32)],
                    axis=-1)
                next_loop_state = outputs_ta
            else:

                next_cell_state = cell_state
                prediction = tf.layers.dense(inputs=cell_output, activation=None, units=self.dict_length)
                next_loop_state = loop_state.write(time - 1, prediction)
                next_input = tf.concat([prediction, inputs], axis=-1)
            elements_finished = (time >= sequence_length - 1)

            return (elements_finished, next_input, next_cell_state, emit_output, next_loop_state)

        with tf.variable_scope('vanilla_decoder', reuse=reuse):
            _, _, loop_ta = tf.nn.raw_rnn(cell, loop_fn)
            output = _transpose_batch_time(loop_ta.stack())
        return output
예제 #4
0
    def __init__(
        self,
        inputs,
        sequence_length,
        time_major= False,
        is_training= False,
        name= None
        ):
        self._inputs = inputs;
        self._sequence_length = tf.convert_to_tensor(sequence_length, name="sequence_length")
        if self._sequence_length.get_shape().ndims != 1:
            raise ValueError(
                "Expected sequence_length to be a vector, but received shape: %s" %
                self._sequence_length.get_shape()
                )
        self.time_major = time_major;
        self.is_training = is_training;
        self._batch_size = tf.shape(self._inputs)[0];
        
        if not time_major:
            inputs = rnn._transpose_batch_time(inputs);        
        self._zero_inputs = tf.zeros_like(inputs[0, :])

        self._input_tas = tf.TensorArray(
            dtype=inputs.dtype,
            size=tf.shape(inputs)[0],
            element_shape=inputs.get_shape()[1:]
            ).unstack(inputs)
예제 #5
0
def assemble_mem_view(final_loop_state, series_list, vocab_size):
    filters = [ 
        [ [ [ [ 1.0, 0.0, 0.0 ] ] ] ], #R
        [ [ [ [ 0.0, 1.0, 0.0 ] ] ] ], #G
        [ [ [ [ 0.0, 0.0, 1.0 ] ] ] ]  #B
    ] 
    
    memory_view = tuple (_transpose_batch_time(view.stack()) for view in final_loop_state)
    read_weightings = memory_view[0]
    write_weightings = memory_view[1]
    
    #inputs outputs and targets
    series_list = [tf.one_hot(s, depth=vocab_size) for s in series_list]
    series_imgs_ = []
    for i, s in enumerate(series_list):
        series_imgs_.append( tf.tile(tf.expand_dims(s, -1), [1,1,1,3]) * filters[i%3])
    series_imgs = tf.concat(series_imgs_, 2)    

    #memory views
    read_weightings = tf.expand_dims(memory_view[0][:, :, 0, :], -1)
    write_weightings = tf.expand_dims(memory_view[1][:, :, 0, :], -1)
    mem_imgs = tf.concat([read_weightings, write_weightings, tf.zeros_like(write_weightings)], -1)
    usage_vectors = tf.tile(tf.expand_dims(memory_view[2], -1), [1, 1, 1, 3])

    #return concatenated memory views and series views
    return tf.transpose(tf.concat([series_imgs, usage_vectors, mem_imgs], 2), [0, 2, 1, 3])
    
예제 #6
0
    def decoder2(self, inputs, reuse, hap_lens, units_lstm):

        outputs_ta = tf.TensorArray(dtype=tf.float32, size=self.max_hap_len)

        cell = tf.contrib.rnn.LSTMCell(units_lstm)

        def loop_fn(time, cell_output, cell_state, loop_state):
            emit_output = cell_output  # == None for time == 0
            if cell_output is None:  # time == 0
                next_cell_state = cell.zero_state(self.batch_size, tf.float32)
                next_loop_state = outputs_ta
                next_input = tf.zeros(
                    shape=[self.batch_size, self.dim_ancs + 1],
                    dtype=tf.float32)
            else:
                next_cell_state = cell_state
                prediction = tf.layers.dense(inputs=cell_output,
                                             activation=None,
                                             units=1)
                next_input = tf.concat([inputs, prediction], axis=-1)
                next_loop_state = loop_state.write(time - 1, prediction)
            elements_finished = (time >= hap_lens)

            return (elements_finished, next_input, next_cell_state,
                    emit_output, next_loop_state)

        with tf.variable_scope('decoder_p2', reuse=reuse):
            _, _, loop_state_ta = tf.nn.raw_rnn(cell, loop_fn)
            loop_state_out = _transpose_batch_time(loop_state_ta.stack())
            predictions = tf.nn.sigmoid(
                tf.reshape(loop_state_out,
                           [self.batch_size, self.max_hap_len]))
        return predictions
예제 #7
0
    def unrolled_prior(self,values, num_units, global_latent, word_lens, reuse):
        #inputs_ta = tf.TensorArray(dtype=tf.float32, size=self.max_num_lat_words)
        values = tf.transpose(values,[1,0,2])
        #values.set_shape([self.max_num_lat_words,self.batch_size,self.lat_word_dim])
        #inputs_ta.unstack(values)
        mean_ta = tf.TensorArray(dtype=tf.float32, size=self.max_num_lat_words)
        logsigmas_ta = tf.TensorArray(dtype=tf.float32, size=self.max_num_lat_words)
        cell = tf.contrib.rnn.LSTMCell(num_units)

        def loop_fn(time, cell_output, cell_state, loop_state):
            emit_output = cell_output  # == None for time == 0
            if cell_output is None:  # time == 0
                next_cell_state = cell.zero_state(self.batch_size, tf.float32)
                next_loop_state = (mean_ta, logsigmas_ta)

                # self.lat_word_dim is very important, need from kevin
                next_input = tf.concat(
                    [tf.zeros(shape=[self.batch_size, self.lat_word_dim], dtype=tf.float32), global_latent], axis=-1)

            else:
                next_cell_state = cell_state
                with tf.variable_scope('prior_pred', reuse=reuse):
                    w = tf.get_variable(name='prior_dense_w', shape=[self.lat_word_dim, self.lat_word_dim * 2],
                                        dtype=tf.float32)
                    b = tf.get_variable(name='prior_dense_b', shape=self.lat_word_dim * 2, dtype=tf.float32)

                    cell_output = tf.reshape(tf.matmul(cell_output, w) + b, [self.batch_size, self.lat_word_dim * 2])

                mu, logsig = tf.split(cell_output, axis=-1, num_or_size_splits=2)
                #eps = tf.random_normal(shape=[self.batch_size, self.lat_word_dim], dtype=tf.float32)
                #samples_word = eps * tf.exp(logsig) + mu

                next_input = tf.concat([values[time-1], global_latent], axis=-1)

                next_loop_state = (loop_state[0].write(time - 1, mu),loop_state[1].write(time - 1, logsig))

            elements_finished = (time >= word_lens)

            return (elements_finished, next_input, next_cell_state, emit_output, next_loop_state)

        with tf.variable_scope('prior', reuse=reuse):
            _, _, loop_state_ta = tf.nn.raw_rnn(cell, loop_fn)
            mean_state_out = _transpose_batch_time(loop_state_ta[0].stack())
            logsigma_state_out = _transpose_batch_time(loop_state_ta[1].stack())
        return [mean_state_out,logsigma_state_out]
def dynamic_raw_rnn(cell, input_, batch_size, seq_length, horizon, output_dim, rate, policy_number):
    # raw_rnn expects time major inputs as TensorArrays
    inputs_ta = tf.TensorArray(dtype=tf.float32, size=seq_length, clear_after_read=False)
    inputs_ta = inputs_ta.unstack(_transpose_batch_time(input_))  # model_input is the input placeholder

    input_dim = input_.get_shape()[-1].value  # the dimensionality of the input to each time step
    output_dim = output_dim  # the dimensionality of the model's output at each time step

    player_fts = 4
    def loop_fn(time, cell_output, cell_state, loop_state):
        # check if finished 
        elements_finished = (time >= seq_length)
        finished = tf.reduce_all(elements_finished)
        if cell_output is None:
            next_cell_state = cell.zero_state(batch_size, tf.float32)
            emit_output = tf.zeros([output_dim])
            # create input
            next_input = inputs_ta.read(time)    
        else:
            next_cell_state = cell_state
            # emit_output = cell_output
            # since we want the 2d x, y position output
            dense = tf.contrib.layers.fully_connected(inputs=cell_output, num_outputs=output_dim)
            emit_output = tf.layers.dropout(inputs=dense, rate=rate)
            # create input
            next_input = tf.cond(finished, 
                                 lambda: tf.zeros([batch_size, input_dim], dtype=tf.float32), 
                                 lambda: tf.cond(tf.equal(tf.mod(time, horizon+1), tf.constant(0)),
                                                 lambda: inputs_ta.read(time),
                                                 lambda: tf.concat((inputs_ta.read(time)[:, :policy_number*player_fts],
                                                                    emit_output, 
                                                                    inputs_ta.read(time)[:, policy_number*player_fts+2:]), axis=1)))
        next_loop_state = None
        return (elements_finished, next_input, next_cell_state, emit_output, next_loop_state)
    
    outputs_ta, last_state, _ = tf.nn.raw_rnn(cell, loop_fn)
    outputs = _transpose_batch_time(outputs_ta.stack())
    final_state = last_state

    return outputs, final_state
예제 #9
0
def dynamic_rnn(input_data, cell, loop_state_fn, initial_loop_state):
    inputs_shape_g = tf.shape(input_data)
    input_shape_l = input_data.get_shape().as_list()

    pad_input = tf.zeros([
        inputs_shape_g[0],
    ] + input_shape_l[2:])

    seq_lengths = inputs_shape_g[1]

    # raw_rnn uses TensorArray for the input and outputs, in which Tensor must be in [time, batch_size, input_depth] shape.
    inputs_ta = tf.TensorArray(size=inputs_shape_g[1],
                               dtype=tf.float32).unstack(
                                   _transpose_batch_time(input_data),
                                   'TBD_Input')

    initial_state = cell.zero_state(inputs_shape_g[0], None)

    def loop_fn(time, previous_output, previous_state, previous_loop_state):
        # this operation produces boolean tensor of [batch_size] defining if corresponding sequence has ended
        # all False at the initial step (time == 0)
        finished = time >= seq_lengths
        if previous_state is None:  # time == 0
            return (finished, inputs_ta.read(time), initial_state,
                    previous_output, initial_loop_state)
        else:
            step_input = tf.cond(tf.reduce_all(finished), lambda: pad_input,
                                 lambda: inputs_ta.read(time))
            previous_loop_state = loop_state_fn(time, previous_loop_state,
                                                previous_state)
            return (finished, step_input, previous_state, previous_output,
                    previous_loop_state)

    outputs_ta, final_state, final_loop_state = tf.nn.raw_rnn(cell, loop_fn)

    output = _transpose_batch_time(outputs_ta.stack())

    return output, final_state, final_loop_state
예제 #10
0
    def generation(self, samples):
        outputs_ta = tf.TensorArray(dtype=tf.float32, size=self.max_num_lat_words)
        cell = tf.contrib.rnn.LSTMCell(self.decoder_units)
        print('GENER samples {}'.format(np.shape(samples)))

        def loop_fn(time, cell_output, cell_state, loop_state):
            emit_output = cell_output  # == None for time == 0
            if cell_output is None:  # time == 0
                next_cell_state = cell.zero_state(self.batch_size, tf.float32)
                next_loop_state = outputs_ta

                # self.lat_word_dim is very important, need from kevin
                next_input = tf.concat(
                    [tf.zeros(shape=[self.batch_size, self.lat_word_dim], dtype=tf.float32), samples], axis=-1)

            else:
                next_cell_state = cell_state
                with tf.variable_scope('prior_pred', reuse=True):
                    w = tf.get_variable(name='prior_dense_w')
                    b = tf.get_variable(name='prior_dense_b')

                    cell_output = tf.reshape(tf.matmul(cell_output, w) + b, [self.batch_size, self.lat_word_dim * 2])



                mu, logsig = tf.split(cell_output, axis=-1, num_or_size_splits=2)
                eps = tf.random_normal(shape=[self.batch_size, self.lat_word_dim], dtype=tf.float32)
                samples_word = eps * tf.exp(logsig) + mu

                next_input = tf.concat([samples_word, samples], axis=-1)

                next_loop_state = loop_state.write(time - 1, samples_word)

            elements_finished = (time >= self.max_num_lat_words)

            return (elements_finished, next_input, next_cell_state, emit_output, next_loop_state)

        with tf.variable_scope('prior', reuse=True):
            _, _, loop_state_ta = tf.nn.raw_rnn(cell, loop_fn)
            loop_state_out = _transpose_batch_time(loop_state_ta.stack())
        context = self.decoder_p2(num_hidden_word_units=self.lat_word_dim, inputs=loop_state_out,
                                  char_sequence_length=np.repeat(self.num_sentence_characters, self.batch_size, axis=-1),
                                  global_latent=samples, reuse=True, context_dim=self.decoder_units,
                                  max_time=self.num_sentence_characters)
        predictions = self.decoder_p3(inputs=context, reuse=True,
                                      char_sequence_length=np.repeat(self.num_sentence_characters, self.batch_size, axis=-1),
                                      max_time=self.num_sentence_characters)
        return predictions
예제 #11
0
def transpose_batch_time(inputs):
    """Transposes inputs between time-major and batch-major.

    Args:
        inputs: A Tensor of shape `[batch_size, max_time, ...]` (batch-major)
            or `[max_time, batch_size, ...]` (time-major), or a (possibly
            nested) tuple of such elements.

    Returns:
        A Tensor with transposed batch and time dimensions of inputs.
    """
    flat_input = nest.flatten(inputs)
    flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input]
    # pylint: disable=protected-access
    flat_input = [rnn._transpose_batch_time(input_) for input_ in flat_input]
    return nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input)
예제 #12
0
    def alt_encoder2_rnn(self, dense_intmdt_pred_units, input_encoder, temperature, units_lstm, train, hap_lens, reuse):
        input_encoder=tf.cast(input_encoder,dtype=tf.float32)

        with tf.variable_scope('enc_p2', reuse=reuse):
            # Ancestors
            Anc = tf.get_variable(name='Ancs', shape=[self.len_ancs, self.dim_ancs])

            w_proj = tf.get_variable(shape=[units_lstm, self.dim_ancs], dtype=tf.float32, name='w_proj')
            b_proj = tf.get_variable(shape=[self.dim_ancs], dtype=tf.float32, name='b_proj')

        cell = tf.contrib.rnn.LSTMCell(units_lstm*2)
        inputs = tf.transpose(input_encoder, perm=[1, 0, 2])
        # had to concat these zeros, kind of awkward, not sure why
        inputs = tf.concat([inputs, tf.zeros([1, self.batch_size, tf.shape(inputs)[-1]], dtype=tf.float32)], axis=0)
        output_ta = (tf.TensorArray(size=self.max_hap_len, dtype=tf.float32),
                     tf.TensorArray(size=self.max_hap_len, dtype=tf.float32),
                     tf.TensorArray(size=self.max_hap_len, dtype=tf.float32),
                     tf.TensorArray(size=self.max_hap_len, dtype=tf.float32),
                     tf.TensorArray(size=self.max_hap_len, dtype=tf.float32),
                     tf.TensorArray(size=self.max_hap_len, dtype=tf.float32))

        # inputs_ta = tf.TensorArray(dynamic_size=False,dtype=tf.float32,size=self.max_hap_len,clear_after_read=False)
        # inputs_ta.unstack(inputs)

        print(input_encoder)
        print(output_ta)
        print(tf.transpose(input_encoder, perm=[1, 0, 2]))
        # take out when using placeholders
        print('here')

        def loop_fn(time, cell_output, cell_state, loop_state):
            print('cell_output {}'.format(cell_output))
            print('cell_state {}'.format(cell_state))
            # print(inputs_ta)

            emit_output = cell_output  # don't care about this one, only care about loop_state in this case because loop_state doesn't have to be same shape as rnn output
            if cell_output is None:  # time == 0
                print('here1')
                next_cell_state = cell.zero_state(self.batch_size, tf.float32)
                print('here2')
                print(time)
                next_anc = tf.concat(
                    [tf.zeros(shape=[self.batch_size, self.len_ancs * 2], dtype=tf.float32), inputs[[time]]], axis=-1)
                # inputs_ta.read(time) ], axis=-1)
                print('here2.5')
                print('here3')
                next_loop_state = output_ta
            else:
                print('here4')
                next_cell_state = cell_state
                hap_1, hap_2 = tf.split(cell_output, num_or_size_splits=2, axis=-1)
                with tf.variable_scope('enc_p2', reuse=True):
                    pre_next_anc1 = tf.nn.relu(tf.matmul(hap_1, w_proj) + b_proj)
                    pre_next_anc2 = tf.nn.relu(tf.matmul(hap_2, w_proj) + b_proj)
                    print('here5')
                    anc_distribution_h1 = self.att_dot(query=pre_next_anc1, values=Anc)
                    anc_distribution_h2 = self.att_dot(query=pre_next_anc2, values=Anc)
                if train:
                    dist_h1 = tf.contrib.distributions.ExpRelaxedOneHotCategorical(temperature=temperature,
                                                                                   probs=anc_distribution_h1)
                    next_anc_sample_h1 = dist_h1.sample()
                    dist_h2 = tf.contrib.distributions.ExpRelaxedOneHotCategorical(temperature=temperature,
                                                                                   probs=anc_distribution_h2)
                    next_anc_sample_h2 = dist_h2.sample()
                    # next_anc_sample = anc_distribution
                    next_anc = tf.concat([tf.concat([next_anc_sample_h1, next_anc_sample_h2], axis=-1), inputs[[time]]],
                                         axis=-1)
                else:
                    dist_h1 = tf.contrib.distributions.Categorical(probs=anc_distribution_h1)
                    next_anc_sample_h1 = tf.cast(tf.one_hot(dist_h1.sample(), depth=self.len_ancs, axis=-1),dtype=tf.float32)
                    dist_h2 = tf.contrib.distributions.Categorical(probs=anc_distribution_h2)
                    next_anc_sample_h2 = tf.cast(tf.one_hot(dist_h2.sample(), depth=self.len_ancs, axis=-1),dtype=tf.float32)
                    # next_anc_sample = anc_distribution
                    next_anc = tf.concat([tf.concat([next_anc_sample_h1, next_anc_sample_h2], axis=-1), inputs[[time]]],
                                         axis=-1)

                anc_h1 = tf.reduce_sum(tf.reshape(tf.matmul(tf.reshape(tf.matrix_diag(next_anc_sample_h1),[-1,self.len_ancs]), Anc),[self.batch_size,self.len_ancs,self.dim_ancs]),1)
                anc_h2 = tf.reduce_sum(tf.reshape(tf.matmul(tf.reshape(tf.matrix_diag(next_anc_sample_h2),[-1,self.len_ancs]), Anc),[self.batch_size,self.len_ancs,self.dim_ancs]),1)

                anc_h1_2 = tf.layers.dense(anc_h1, units=dense_intmdt_pred_units, activation=tf.nn.relu)
                anc_h2_2 = tf.layers.dense(anc_h2, units=dense_intmdt_pred_units, activation=tf.nn.relu)

                pred_current_h1 = tf.layers.dense(anc_h1_2, units=1, activation=None)
                pred_next_h1 = tf.layers.dense(anc_h1_2, units=1, activation=None)
                pred_current_h2 = tf.layers.dense(anc_h2_2, units=1, activation=None)
                pred_next_h2 = tf.layers.dense(anc_h2_2, units=1, activation=None)



                # this is sent as input to the next iteration of the cell

                # inputs_ta.read(time)], axis=-1)
                print('here7')
                # output to store for the iteration
                next_loop_state = (
                    loop_state[0].write(time - 1, next_anc_sample_h1),
                    loop_state[1].write(time - 1, next_anc_sample_h1), loop_state[2].write(time - 1, pred_current_h1),
                    loop_state[3].write(time - 1, pred_next_h1), loop_state[4].write(time - 1, pred_current_h2),
                    loop_state[5].write(time - 1, pred_next_h2))

            print('out_loop')
            # this gives us a vector in the size of the batch, telling us which elements have finished
            elements_finished = time >= hap_lens
            print(elements_finished)
            # because we are not interested in the state

            return (elements_finished, next_anc, next_cell_state, emit_output, next_loop_state)

        with tf.variable_scope('state', reuse=reuse):
            _, _, loop_state_ta = tf.nn.raw_rnn(cell, loop_fn)
        print('Anc_O {}'.format(_transpose_batch_time(loop_state_ta[0].stack())))
        print('params_O {}'.format(_transpose_batch_time(loop_state_ta[1].stack())))

        X_sampled_h1 = _transpose_batch_time(loop_state_ta[0].stack())
        X_sampled_h2 = _transpose_batch_time(loop_state_ta[1].stack())
        reconstruction_h1 = tf.nn.sigmoid(_transpose_batch_time(loop_state_ta[2].stack()))
        pred_next_rec_h1 = tf.nn.sigmoid(_transpose_batch_time(loop_state_ta[3].stack())[:, 0:-1])
        reconstruction_h2 = tf.nn.sigmoid(_transpose_batch_time(loop_state_ta[4].stack()))
        pred_next_rec_h2 = tf.nn.sigmoid(_transpose_batch_time(loop_state_ta[5].stack())[:, 0:-1])

        ####DONT FORGET TO CUT OFF LAST next allele PREDICTION, MEANINGLESS

        return X_sampled_h1, X_sampled_h2, reconstruction_h1, pred_next_rec_h1, reconstruction_h2, pred_next_rec_h2
예제 #13
0
    epoch_loss_avg(loss_value)  #add current batch loss

    #end epoch
    train_loss_results.append(epoch_loss_avg.result())

    if epoch % 5 == 0:
        print('Epoch: {},      Loss: {}'.format(epoch,
                                                train_loss_results[epoch]))

#save model weights
model.save_weights('C:\\deep_SSM\\model_rank0.h5')
'''
creating latent variables used in the second phase
'''

#producing latent variables as new features for the next phase (environment state)
rep = 100  #number of running replication, to reduce variance
latents_to_average = np.zeros(shape=(rep, seq_length, num_seq,
                                     model.latent_dim))

for i in range(rep):
    latents_to_average[i] = SSM_model(model, train_data)[1]

latents_to_average = tf.convert_to_tensor(latents_to_average)
env_state = tf.reduce_mean(latents_to_average, axis=0)
env_state = _transpose_batch_time(env_state)  #adjust with common format
env_state = tf.reshape(env_state, (-1, model.latent_dim))  #make it 2D

#saving the results
np.savetxt('C:\\deep_SSM\\envstate_rank0.csv', env_state, delimiter=',')
예제 #14
0
def mask_and_reduce(sequence,
                    sequence_length,
                    rank=2,
                    average_across_batch=True,
                    average_across_timesteps=False,
                    average_across_remaining=False,
                    sum_over_batch=False,
                    sum_over_timesteps=True,
                    sum_over_remaining=True,
                    dtype=None,
                    time_major=False):
    """Masks out sequence entries that are beyond the respective sequence
    lengths, and reduces (average or sum) away dimensions.

    This is a combination of :func:`~texar.tf.utils.shapes.mask_sequences`
    and :func:`~texar.tf.losses.losses_utils.reduce_batch_time`.

    Args:
        sequence: A Tensor of sequence values.
            If `time_major=False` (default), this must be a Tensor of shape
            `[batch_size, max_time, d_2, ..., d_rank]`, where the rank of
            the Tensor is specified with :attr:`rank`.
            The batch and time dimensions are exchanged if `time_major` is True.
        sequence_length: A Tensor of shape `[batch_size]`. Time steps beyond
            the respective sequence lengths will be made zero. If `None`,
            not masking is performed.
        rank (int): The rank of :attr:`sequence`. Must be >= 2. Default is 2,
            i.e., `sequence` is a 2D Tensor consisting of batch and time
            dimensions.
        average_across_timesteps (bool): If set, average the sequence across
            the time dimension. Must not set `average_across_timesteps`
            and `sum_over_timesteps` at the same time.
        average_across_batch (bool): If set, average the sequence across the
            batch dimension. Must not set `average_across_batch`'
            and `sum_over_batch` at the same time.
        average_across_remaining (bool): If set, average the sequence across the
            remaining dimensions. Must not set `average_across_remaining`'
            and `sum_over_remaining` at the same time.
        sum_over_timesteps (bool): If set, sum the loss across the
            time dimension. Must not set `average_across_timesteps`
            and `sum_over_timesteps` at the same time.
        sum_over_batch (bool): If set, sum the loss across the
            batch dimension. Must not set `average_across_batch`
            and `sum_over_batch` at the same time.
        sum_over_remaining (bool): If set, sum the loss across the
            remaining dimension. Must not set `average_across_remaining`
            and `sum_over_remaining` at the same time.
        time_major (bool): The shape format of the inputs. If `True`,
            :attr:`sequence` must have shape `[max_time, batch_size, ...]`.
            If `False` (default), `sequence` must have
            shape `[batch_size, max_time, ...]`.
        dtype (dtype): Type of :attr:`sequence`. If `None`, infer from
            :attr:`sequence` automatically.

    Returns
        A Tensor containing the masked and reduced sequence.
    """
    if rank < 2:
        raise ValueError('`rank` must be >= 2.')

    if time_major:
        sequence = rnn._transpose_batch_time(sequence)

    if sequence_length is not None:
        sequence = mask_sequences(sequence, sequence_length, dtype=dtype,
                                  time_major=False, tensor_rank=rank)

    if rank > 2:
        if average_across_remaining and sum_over_remaining:
            raise ValueError("Only one of `average_across_remaining` and "
                             "`sum_over_remaining` can be set.")
        if average_across_remaining:
            sequence = tf.reduce_mean(sequence, axis=np.arange(2, rank))
        elif sum_over_remaining:
            sequence = tf.reduce_sum(sequence, axis=np.arange(2, rank))

    sequence = reduce_batch_time(sequence,
                                 sequence_length,
                                 average_across_batch,
                                 average_across_timesteps,
                                 sum_over_batch,
                                 sum_over_timesteps)

    reduce_time = average_across_timesteps or sum_over_timesteps
    reduce_batch = average_across_batch or sum_over_batch
    if not reduce_time and not reduce_batch and time_major:
        sequence = rnn._transpose_batch_time(sequence)

    return sequence
예제 #15
0
    def __init__(self,
                 max_seq_len,
                 input_size,
                 rnn_size,
                 batch_size,
                 lr,
                 train_keep_prob,
                 decay_rate=0.95,
                 lambda_a=0.1,
                 lambda_z=0.1,
                 df_size=200,
                 num_class=60,
                 class_lr=1e-3,
                 dtype=tf.float32):
        self.max_seq_len = max_seq_len
        self.rnn_size = rnn_size
        self.df_size = df_size
        self.batch_size = tf.placeholder_with_default(batch_size, shape=())
        self.input_size = input_size
        self.class_lr = tf.Variable(float(class_lr),
                                    trainable=False,
                                    dtype=dtype)
        self.lr = tf.Variable(float(lr), trainable=False, dtype=dtype)
        self.learning_rate_decay_op = self.lr.assign(self.lr * decay_rate)
        self.cls_lr_decay = self.lr.assign(self.class_lr * decay_rate)

        self.keep_prob = tf.placeholder_with_default(1.0, shape=())

        self.global_step = tf.Variable(0, trainable=False)
        # print('rnn_size = {0}'.format(rnn_size))

        with tf.variable_scope("prediction"):
            with tf.variable_scope("inputs"):
                self.enc_in = tf.placeholder(
                    dtype,
                    shape=[None, self.max_seq_len, input_size],
                    name='enc_in')
                self.dec_in = tf.placeholder(
                    dtype,
                    shape=[None, self.max_seq_len, input_size],
                    name='dec_in')
                self.dec_rel = tf.placeholder(
                    dtype,
                    shape=[None, self.max_seq_len, input_size],
                    name='dec_in')
                self.seq_len = tf.placeholder(tf.int32, [None])
                self.label = tf.placeholder(tf.float32,
                                            shape=[None, num_class],
                                            name='labels')
                mask = tf.sign(tf.reduce_max(tf.abs(self.enc_in[:, 1:, :]), 2))

            with tf.variable_scope("encoder", reuse=tf.AUTO_REUSE):
                cell_fw = [
                    tf.nn.rnn_cell.GRUCell(self.rnn_size // 2)
                    for _ in range(3)
                ]
                cell_bw = [
                    tf.nn.rnn_cell.GRUCell(self.rnn_size // 2)
                    for _ in range(3)
                ]
                ref_outputs, ref_fw_state, ref_bw_state = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
                    cell_fw,
                    cell_bw,
                    self.enc_in,
                    dtype=tf.float32,
                    sequence_length=self.seq_len)
                self.encoder_all_states = ref_outputs
                self.ref_concat = tf.keras.layers.concatenate(
                    [ref_fw_state[-1], ref_bw_state[-1]], axis=1)
                self.ref_final_state = self.ref_concat

            pred_cell = tf.nn.rnn_cell.GRUCell(self.rnn_size)
            cell_ = LinearSpaceDecoderWrapper(pred_cell, self.input_size)
            cell = ResidualWrapper(cell_)
            with tf.variable_scope("decoder", reuse=tf.AUTO_REUSE):

                def loop_fn(time, cell_output, cell_state, loop_state):
                    """
                    Loop function that allows to control input to the rnn cell and manipulate cell outputs.
                    :param time: current time step
                    :param cell_output: output from previous time step or None if time == 0
                    :param cell_state: cell state from previous time step
                    :param loop_state: custom loop state to share information between different iterations of this loop fn
                    :return: tuple consisting of
                      elements_finished: tensor of size [bach_size] which is True for sequences that have reached their end,
                        needed because of variable sequence size
                      next_input: input to next time step
                      next_cell_state: cell state forwarded to next time step
                      emit_output: The first return argument of raw_rnn. This is not necessarily the output of the RNN cell,
                        but could e.g. be the output of a dense layer attached to the rnn layer.
                      next_loop_state: loop state forwarded to the next time step
                    """
                    if cell_output is None:
                        # time == 0, used for initialization before first call to cell
                        next_cell_state = self.ref_final_state
                        # the emit_output in this case tells TF how future emits look
                        emit_output = tf.zeros([self.input_size])
                    else:
                        # t > 0, called right after call to cell, i.e. cell_output is the output from time t-1.
                        # here you can do whatever ou want with cell_output before assigning it to emit_output.
                        # In this case, we don't do anything
                        next_cell_state = self.ref_final_state  # cell_state#
                        emit_output = cell_output

                        # check which elements are finished
                    elements_finished = (time >= self.seq_len - 1)
                    finished = tf.reduce_all(elements_finished)

                    # assemble cell input for upcoming time step
                    current_output = emit_output if cell_output is not None else None
                    input_original = self.enc_in[:,
                                                 0, :]  # tensor of shape (None, input_dim)

                    if current_output is None:
                        # this is the initial step, i.e. there is no output from a previous time step, what we feed here
                        # can highly depend on the data. In this case we just assign the actual input in the first time step.
                        next_in = input_original
                    else:
                        # time > 0, so just use previous output as next input
                        # here you could do fancier things, whatever you want to do before passing the data into the rnn cell
                        # if here you were to pass input_original than you would get the normal behaviour of dynamic_rnn
                        next_in = current_output

                    next_input = tf.cond(
                        finished,
                        lambda: tf.zeros([self.batch_size, self.input_size],
                                         dtype=tf.float32),
                        # copy through zeros
                        lambda: next_in
                    )  # if not finished, feed the previous output as next input

                    # set shape manually, otherwise it is not defined for the last dimensions
                    next_input.set_shape([None, self.input_size])

                    # loop state not used in this example
                    next_loop_state = None
                    return (elements_finished, next_input, next_cell_state,
                            emit_output, next_loop_state)

                outputs_ta, dec_final_state, _ = tf.nn.raw_rnn(cell, loop_fn)
                dec_outputs = _transpose_batch_time(outputs_ta.stack())
        #                 dec_outputs, dec_final_state = tf.nn.dynamic_rnn(pred_cell, tf.zeros_like(self.dec_in), \
        #                                                 initial_state= self.ref_concat, dtype=tf.float32, \
        #                                                 sequence_length=self.seq_len)

        #                   dec_outputs, dec_final_state = tf.nn.bidirectional_dynamic_rnn(pred_fw_cell, pred_bw_cell, self.dec_in, \
        #                                 initial_state_fw=tf.contrib.rnn.LSTMStateTuple(enc_fw_state[-1].c, enc_fw_state[-1].h), \
        #                                 initial_state_bw=tf.contrib.rnn.LSTMStateTuple(enc_bw_state[-1].c, enc_bw_state[-1].h),
        #                                 dtype=tf.float32, \
        #                                 sequence_length=self.seq_len)

        with tf.variable_scope("pred_fc", reuse=tf.AUTO_REUSE):
            #             FC = tf.layers.Dense(units=75,activation=None,name='pred_skel')
            #             pred_skel = FC(tf.keras.layers.concatenate([dec_outputs[0], dec_outputs[1]], axis = 0))
            #             pred_skel = FC(dec_outputs)
            self.pred_skel = dec_outputs  # pred_skel

        #             self.enc_in[:,1:,:]
        #             print(mask)
        loss_l2 = tf.reduce_sum(tf.abs(self.pred_skel - self.enc_in[:, 1:, :]),
                                2) * mask
        loss_l2 = tf.reduce_sum(loss_l2, axis=1)
        loss_l2 /= tf.reduce_sum(mask, 1)
        self.loss_pred = tf.reduce_mean(loss_l2)
        # print("loss shape: ", self.loss_pred)

        self.loss = self.loss_pred  # + 0.6*self.f_loss#self.enc_loss

        self.pred_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           "prediction")
        #         params = tf.trainable_variables(self.pred_vars)
        opt = tf.train.AdamOptimizer(self.lr)
        gradients, self.pred_vars = zip(*opt.compute_gradients(self.loss))
        clipped_gradients, norm = tf.clip_by_global_norm(gradients, 25)
        self.gradient_norms = norm
        self.updates = opt.apply_gradients(zip(clipped_gradients,
                                               self.pred_vars),
                                           global_step=self.global_step)

        with tf.variable_scope("classifier") as scope:
            logits = self.Classifier(self.ref_final_state)
            self.cost = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                           labels=self.label),
                name='cost')

        #         Classification learning rate
        optimizer = tf.train.AdamOptimizer(class_lr)
        self.encoder_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                              "prediction/encoder")
        self.classifier_vars = tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, "classifier")
        # print(self.encoder_vars, self.classifier_vars)
        self.classification_vars = self.encoder_vars + self.classifier_vars
        # print("classifier varibales", self.classification_vars)
        self.train_finetune = optimizer.minimize(
            self.cost, var_list=self.classification_vars)
        self.train_fixed = optimizer.minimize(self.cost,
                                              var_list=self.classifier_vars)

        correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(self.label, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32),
                                       name='accuracy')
        self.pred_label = tf.argmax(logits, 1)

        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
예제 #16
0
def build_generator(z_prior,
                    embeddings,
                    num_classes,
                    hidden_layer_size,
                    embedding_size,
                    z_prior_size,
                    max_sentence_length,
                    real_sentences=None,
                    after_sentence_id=None):
    """
    real_sentences: if not None, each sentence in real_sentences is the
                    sentence which generated the corresponding entry in
                    z_prior. TODO wording
                    real_sentences is used for pretraining.
                    shape: [batch_size, sentence_length]
                    each entry is a word id, not a word embedding.
    after_sentence_id: must not be None if real_sentences is not None.
    """
    with tf.variable_scope('generator') as function_scope:

        batch_size = tf.shape(z_prior)[0]

        # tf.Assert(tf.rank(z_prior) == 2, [z_prior])
        # tf.Assert(tf.shape(z_prior)[0] == batch_size, [z_prior])
        # tf.Assert(tf.shape(z_prior)[1] == prior_size, [z_prior])

        cell = tf.nn.rnn_cell.LSTMCell(hidden_layer_size, state_is_tuple=True)
        #cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
        init_state = cell.zero_state(batch_size, tf.float32)

        total_log_probability = None
        if real_sentences is not None:
            # See Gan 2016 section 2.1 (LSTM decoder) for an explanation
            total_log_probability = 0
            increasing = tf.range(
                start=0,
                limit=tf.cast(batch_size, tf.int64),
                delta=1,
                dtype=tf.int64)

        def loop_fn(time, cell_output, cell_state, loop_state):
            if cell_output is None:
                # time=0, everything here will be used for initialization only

                # TODO not sure about this
                # what i do know is that, according to the __call__ method of cells,
                # the state shape should be [batch size, state size], or [1, state size] for  us
                # tf.tanh(tf.matmul(z_prior, C) + Cb)
                with tf.variable_scope('C', reuse=tf.AUTO_REUSE):
                    h1 = tf.layers.dense(
                        z_prior,
                        hidden_layer_size,
                        activation=tf.tanh,
                        kernel_regularizer=None,  # TODO
                        bias_regularizer=None)
                next_cell_state = tf.contrib.rnn.LSTMStateTuple(
                    c=init_state.c, h=h1)

                h = h1

                # [batch_size, num_classes]
                # mul = tf.matmul(h1, V) + Vb
                # next_word_id = tf.argmax(mul, axis=1)
                # TODO C is NaN when running textgan!
                # but after a single batch?
                # maybe gradient needs to be clipped!
                # next_word_id = tf.Print(next_word_id, [C], summarize= 100)

                # section 2.5 of Zhang discusses this "soft-argmax". in simpler terms,
                # this is needed because argmax has no gradient and thus breaks the path
                # between the loss function and the variables V, Vb, etc.
                # The other way is to use something like REINFORCE, but zhang thankfully
                # proposes this simpler solution.
                # next_word = tf.matmul(
                # tf.nn.softmax(L * mul, axis=1), embeddings)
                # This is the old way
                #next_word = tf.map_fn(lambda id: tf.nn.embedding_lookup(embeddings, id), next_word_id, dtype=tf.float32)

                # this is what should be emitted next
                # next_loop_state = (next_word_id, next_word)

                # this tells raw_rnn what the rest of our emits will look like.
                # first item: the id of the word that was generated
                # second item: the embedding of the word that was generated, calculated
                # via soft-argmax.
                # basically a placeholder for what INDIVIDUAL batch items will be emitting on
                # each iteration.
                emit_output = (
                    tf.zeros([], dtype=tf.int64),
                    tf.zeros([embedding_size], dtype=tf.float32),
                    tf.zeros([], dtype=tf.float32))  # negative log probability

            else:
                # If this first emit_output return value is None, then the emit_ta
                # result of raw_rnn will have the same structure and dtypes as
                # cell.output_size. Otherwise emit_ta will have the same structure,
                # shapes (prepended with a batch_size dimension), and dtypes as
                # emit_output.
                # so we needed to expand this so that its first dim is the batch size
                #emit_output = tf.expand_dims(loop_state,0)
                # this shouldn't be the case anymore...we should be able to directly do:
                # Note: moved this below
                emit_output = loop_state
                next_cell_state = cell_state
                h = next_cell_state.h

            with tf.variable_scope('V', reuse=tf.AUTO_REUSE):
                mul = tf.layers.dense(
                    h,
                    num_classes,
                    activation=None,
                    kernel_regularizer=None,  # TODO
                    bias_regularizer=None)
            next_word_id = tf.argmax(mul, axis=1)
            # see above for the explanation of this soft-argmax
            next_word = tf.matmul(tf.nn.softmax(L * mul, axis=1), embeddings)
            #next_word = tf.map_fn(lambda id: tf.nn.embedding_lookup(embeddings, id), next_word_id, dtype=tf.float32)
            # next_loop_state = (next_word_id, next_word)

            # TODO this should be improved
            elements_finished = (time >= max_sentence_length)

            if real_sentences is not None:
                # For each sentence, we get the negative log probability of
                # the ACTUAL word that should have been generated.
                # The sum of all of these probabilities forms the objective
                # function. See Gan 2016.

                # https://stackoverflow.com/questions/36824580
                # I don't know why there's not an easier way to do this.

                # Concatenate batch index and true label
                # Note that in Tensorflow < 1.0.0 you must call tf.pack
                # Note the cond: basically just avoiding an error when we
                # finish the sentence. Note that this whole block gets run
                # when elements_finished is true, but the output isn't used
                # so there's probably a cleaner way to do this.
                mask = tf.stack(
                    [
                        increasing,
                        real_sentences[:,
                                       tf.cond(time < max_sentence_length,
                                               lambda: time, lambda: 0)]
                    ],
                    axis=1)

                # Extract values
                sm = tf.nn.softmax(mul)

                masked = tf.gather_nd(params=sm, indices=mask)

                # only take the softmax values that correspond to valid words.
                # otherwise, use 1, so that the sum of logs will not be affected.
                masked = tf.where(
                    tf.not_equal(mask[:, 1], after_sentence_id), masked,
                    tf.ones([batch_size], dtype=tf.float32))

                neg_log_probability = -tf.log(masked)

                # TODO not sure what to do here. Zeros after the softmax lead
                # to infinities after the log.
                replace = tf.ones_like(neg_log_probability) * tf.constant(1e2)
                neg_log_probability = tf.where(
                    tf.is_inf(neg_log_probability), replace,
                    neg_log_probability)

            # Determine what should be emitted next time.
            if real_sentences is not None:
                next_loop_state = (next_word_id, next_word,
                                   neg_log_probability)
            else:
                next_loop_state = (next_word_id, next_word,
                                   tf.zeros([batch_size], dtype=tf.float32))

            return (elements_finished, next_word, next_cell_state, emit_output,
                    next_loop_state)

        emit_ta, final_state, final_loop_state = tf.nn.raw_rnn(cell, loop_fn)

        word_ids, words, neg_log_probability_ta = emit_ta

        out_log_prob = _transpose_batch_time(neg_log_probability_ta.stack())

        # must transpose first two dimensions from [sentence_length, batch_size]
        # to [batch_size, sentence_length]
        return _transpose_batch_time(word_ids.stack()), _transpose_batch_time(
            words.stack()), out_log_prob
예제 #17
0
    def forward(self, x, keep_prob):
        """
        x -- input features, [batch_size, n_seg, n_input]
        """

        if self.reverse:
            # reverse the sequence if needed, claimed to be useful for NMT
            x = x[:, ::-1, :]

        batch_size = tf.shape(x)[0]
        seq_len = tf.ones((batch_size, ), dtype='int32') * self.n_seg

        ###################### Encoder ###################

        def RNN(x):
            dropout_cell = tf.contrib.rnn.DropoutWrapper(
                self.encoder_cell,
                input_keep_prob=keep_prob)  # onlyt input dropout is used
            encoder_outputs, encoder_final_state = tf.nn.dynamic_rnn(
                dropout_cell,
                x,
                seq_len,
                dtype=tf.float32,
                scope="Seq2seqTSN/encoder")
            return encoder_outputs[:, -1], encoder_final_state

        # encode
        x_flat = tf.reshape(x, [-1, self.n_input])
        h_encode = tf.nn.relu(
            tf.nn.xw_plus_b(x_flat, self.W_encode, self.b_encode))
        h_encode = tf.reshape(h_encode, [-1, self.n_seg, self.emb_dim])

        self.hidden, encoder_final_state = RNN(h_encode)

        ###################### Decoder ###################

        def loop_fn(time, cell_output, cell_state, loop_state):
            def get_next_input():
                if cell_state is None:
                    next_input = tf.zeros([batch_size, self.n_input],
                                          dtype=tf.float32)
                else:
                    #next_input = tf.nn.xw_plus_b(cell_output, self.W_ho, self.b_o)   # conditioned
                    next_input = tf.zeros([batch_size, self.n_input],
                                          dtype=tf.float32)  # un-conditioned
                return next_input

            emit_output = cell_output

            if cell_state is None:
                next_cell_state = encoder_final_state
            else:
                next_cell_state = cell_state

            elements_finished = (time >= seq_len)
            finished = tf.reduce_all(elements_finished)
            next_input = tf.cond(
                finished,
                lambda: tf.zeros([batch_size, self.n_input], dtype=tf.float32),
                get_next_input)
            next_loop_state = None

            return (elements_finished, next_input, next_cell_state,
                    emit_output, next_loop_state)

        # decode
        outputs_ta, final_state, _ = tf.nn.raw_rnn(self.decoder_cell,
                                                   loop_fn,
                                                   scope="Seq2seqTSN/decoder")
        outputs = _transpose_batch_time(outputs_ta.stack(
        ))  # outputs and shape [batch_size, time ,output_dim]

        outputs = tf.reshape(outputs, [-1, self.emb_dim])
        h_decode = tf.nn.relu(
            tf.nn.xw_plus_b(outputs, self.W_decode1, self.b_decode1))

        x_recon = tf.nn.xw_plus_b(h_decode, tf.transpose(self.W_encode),
                                  self.b_decode2)
        self.x_recon = tf.reshape(x_recon, [-1, self.n_seg, self.n_input])
예제 #18
0
act_direct = act_direct.reshape((act_direct.shape[0], 1))
act_direct.shape  #to make it (30000,1)

#call true_latent data (from Phase 1 nodel with rank 2)
masterlatent = np.genfromtxt('C:\\deep_SSM\\envstate_rank2.csv', delimiter=',')
masterlatent = np.array(masterlatent, dtype='float32')
masterlatent.shape

#Global variable
pre_seq_length = 20  #original trajectory length of train data
seq_length = 19  #effective length used for modelling phase 2 is 20-1 (the first opponent action is assummed given)
num_seq = 1500  #number of sequences/trajectories in training data

#preparing the shape of training data
temp_action = tf.reshape(act_direct, shape=(num_seq, pre_seq_length, -1))
temp_action = _transpose_batch_time(temp_action)
action = temp_action[-seq_length:, :, :]

temp_latent = tf.reshape(masterlatent, shape=(num_seq, pre_seq_length, -1))
temp_latent = _transpose_batch_time(temp_latent)
latent = temp_latent[:seq_length, :, :]

train_data = tf.concat([action, latent], -1)
train_data.shape


#define a class of the model (simple: rank 0), in fact it is also an RNN cell
class SSM_phase2(tf.keras.Model):
    def __init__(self, latent_dim=2, emission_dim=1, phase1latent_dim=4):
        super(SSM_phase2, self).__init__()
        self.latent_dim = latent_dim
    def _build_net(self):
        with tf.variable_scope(self.name):
            #### PLACEHOLDER DECLARATION
            self.mb_size = tf.placeholder(tf.int32, [], name='batch_size')

            self.lr_rate = tf.placeholder(tf.float32)
            self.keep_prob = tf.placeholder(tf.float32)  #keeping rate
            self.a = tf.placeholder(tf.float32)
            self.b = tf.placeholder(tf.float32)
            self.c = tf.placeholder(tf.float32)

            self.x = tf.placeholder(tf.float32,
                                    shape=[None, self.max_length, self.x_dim])
            self.x_mi = tf.placeholder(
                tf.float32, shape=[None, self.max_length, self.x_dim]
            )  #this is the missing indicator (including for cont. & binary) (includes delta)
            self.k = tf.placeholder(
                tf.float32, shape=[None,
                                   1])  #event/censoring label (censoring:0)
            self.t = tf.placeholder(tf.float32, shape=[None, 1])

            self.fc_mask1 = tf.placeholder(
                tf.float32, shape=[None, self.num_Event,
                                   self.num_Category])  #for denominator
            self.fc_mask2 = tf.placeholder(
                tf.float32, shape=[None, self.num_Event,
                                   self.num_Category])  #for Loss 1
            self.fc_mask3 = tf.placeholder(tf.float32,
                                           shape=[None, self.num_Category
                                                  ])  #for Loss 2

            seq_length = get_seq_length(self.x)
            tmp_range = tf.expand_dims(tf.range(0, self.max_length, 1), axis=0)

            self.rnn_mask1 = tf.cast(
                tf.less_equal(tmp_range, tf.expand_dims(seq_length - 1,
                                                        axis=1)), tf.float32)
            self.rnn_mask2 = tf.cast(
                tf.equal(tmp_range, tf.expand_dims(seq_length - 1, axis=1)),
                tf.float32)

            ### DEFINE LOOP FUNCTION FOR RAW_RNN w/ TEMPORAL ATTENTION
            def loop_fn_att(time, cell_output, cell_state, loop_state):

                emit_output = cell_output

                if cell_output is None:  # time == 0
                    next_cell_state = cell.zero_state(self.mb_size, tf.float32)
                    next_loop_state = loop_state_ta
                else:
                    next_cell_state = cell_state
                    tmp_h = utils.create_concat_state(next_cell_state,
                                                      self.num_layers_RNN,
                                                      self.RNN_type)

                    e = utils.create_FCNet(tf.concat([tmp_h, all_last],
                                                     axis=1),
                                           self.num_layers_ATT,
                                           self.h_dim2,
                                           tf.nn.tanh,
                                           1,
                                           None,
                                           self.initial_W,
                                           keep_prob=self.keep_prob)
                    e = tf.exp(e)

                    next_loop_state = (
                        loop_state[0].write(time - 1,
                                            e),  # save att power (e_{j})
                        loop_state[1].write(time - 1, tmp_h)
                    )  # save all the hidden states

                # elements_finished = (time >= seq_length)
                elements_finished = (time >= self.max_length - 1)

                #this gives the break-point (no more recurrence after the max_length)
                finished = tf.reduce_all(elements_finished)
                next_input = tf.cond(
                    finished,
                    lambda: tf.zeros([self.mb_size, 2 * self.x_dim],
                                     dtype=tf.float32),  # [x_hist, mi_hist]
                    lambda: inputs_ta.read(time))

                return (elements_finished, next_input, next_cell_state,
                        emit_output, next_loop_state)

            # divide into the last x and previous x's
            x_last = tf.slice(self.x, [0, (self.max_length - 1), 1],
                              [-1, -1, -1])  #current measurement
            x_last = tf.reshape(x_last,
                                [-1, (self.x_dim_cont + self.x_dim_bin)
                                 ])  #remove the delta of the last measurement

            x_last = tf.reduce_sum(
                tf.tile(tf.expand_dims(self.rnn_mask2, axis=2),
                        [1, 1, self.x_dim]) * self.x,
                reduction_indices=1
            )  #sum over time since all others time stamps are 0
            x_last = tf.slice(
                x_last, [0, 1],
                [-1, -1])  #remove the delta of the last measurement
            x_hist = self.x * (
                1. - tf.tile(tf.expand_dims(self.rnn_mask2, axis=2),
                             [1, 1, self.x_dim])
            )  #since all others time stamps are 0 and measurements are 0-padded
            x_hist = tf.slice(x_hist, [0, 0, 0],
                              [-1, (self.max_length - 1), -1])

            # do same thing for missing indicator
            mi_last = tf.slice(self.x_mi, [0, (self.max_length - 1), 1],
                               [-1, -1, -1])  #current measurement
            mi_last = tf.reshape(mi_last,
                                 [-1, (self.x_dim_cont + self.x_dim_bin)
                                  ])  #remove the delta of the last measurement

            mi_last = tf.reduce_sum(
                tf.tile(tf.expand_dims(self.rnn_mask2, axis=2),
                        [1, 1, self.x_dim]) * self.x_mi,
                reduction_indices=1
            )  #sum over time since all others time stamps are 0
            mi_last = tf.slice(
                mi_last, [0, 1],
                [-1, -1])  #remove the delta of the last measurement
            mi_hist = self.x_mi * (
                1. - tf.tile(tf.expand_dims(self.rnn_mask2, axis=2),
                             [1, 1, self.x_dim])
            )  #since all others time stamps are 0 and measurements are 0-padded
            mi_hist = tf.slice(mi_hist, [0, 0, 0],
                               [-1, (self.max_length - 1), -1])

            all_hist = tf.concat([x_hist, mi_hist], axis=2)
            all_last = tf.concat([x_last, mi_last], axis=1)

            #extract inputs for the temporal attention: mask (to incorporate only the measured time) and x_{M}
            seq_length = get_seq_length(x_hist)
            rnn_mask_att = tf.cast(
                tf.not_equal(tf.reduce_sum(x_hist, reduction_indices=2), 0),
                dtype=tf.float32
            )  #[mb_size, max_length-1], 1:measurements 0:no measurements

            ##### SHARED SUBNETWORK: RNN w/ TEMPORAL ATTENTION
            #change the input tensor to TensorArray format with [max_length, mb_size, x_dim]
            inputs_ta = tf.TensorArray(dtype=tf.float32,
                                       size=self.max_length - 1).unstack(
                                           _transpose_batch_time(all_hist),
                                           name='Shared_Input')

            #create a cell with RNN hyper-parameters (RNN types, #layers, #nodes, activation functions, keep proability)
            cell = utils.create_rnn_cell(self.h_dim1, self.num_layers_RNN,
                                         self.keep_prob, self.RNN_type,
                                         self.RNN_active_fn)

            #define the loop_state TensorArray for information from rnn time steps
            loop_state_ta = (
                tf.TensorArray(size=self.max_length - 1,
                               dtype=tf.float32),  #e values (e_{j})
                tf.TensorArray(size=self.max_length - 1,
                               dtype=tf.float32))  #hidden states (h_{j})

            rnn_outputs_ta, self.rnn_final_state, loop_state_ta = tf.nn.raw_rnn(
                cell, loop_fn_att)
            #rnn_outputs_ta  : TensorArray
            #rnn_final_state : Tensor
            #rnn_states_ta   : (TensorArray, TensorArray)

            rnn_outputs = _transpose_batch_time(rnn_outputs_ta.stack())
            # rnn_outputs =  tf.reshape(rnn_outputs, [-1, self.max_length-1, self.h_dim1])

            rnn_states = _transpose_batch_time(loop_state_ta[1].stack())

            att_weight = _transpose_batch_time(
                loop_state_ta[0].stack())  #e_{j}
            att_weight = tf.reshape(att_weight, [
                -1, self.max_length - 1
            ]) * rnn_mask_att  # masking to set 0 for the unmeasured e_{j}

            #get a_{j} = e_{j}/sum_{l=1}^{M-1}e_{l}
            self.att_weight = div(
                att_weight, (tf.reduce_sum(att_weight, axis=1, keepdims=True) +
                             _EPSILON))  #softmax (tf.exp is done, previously)

            # 1) expand att_weight to hidden state dimension, 2) c = \sum_{j=1}^{M} a_{j} x h_{j}
            self.context_vec = tf.reduce_sum(tf.tile(
                tf.reshape(self.att_weight, [-1, self.max_length - 1, 1]),
                [1, 1, self.num_layers_RNN * self.h_dim1]) * rnn_states,
                                             axis=1)

            self.z_mean = FC_Net(rnn_outputs,
                                 self.x_dim,
                                 activation_fn=None,
                                 weights_initializer=self.initial_W,
                                 scope="RNN_out_mean1")
            self.z_std = tf.exp(
                FC_Net(rnn_outputs,
                       self.x_dim,
                       activation_fn=None,
                       weights_initializer=self.initial_W,
                       scope="RNN_out_std1"))

            epsilon = tf.random_normal(
                [self.mb_size, self.max_length - 1, self.x_dim],
                mean=0.0,
                stddev=1.0,
                dtype=tf.float32)
            self.z = self.z_mean + self.z_std * epsilon

            ##### CS-SPECIFIC SUBNETWORK w/ FCNETS
            inputs = tf.concat([x_last, self.context_vec], axis=1)

            #1 layer for combining inputs
            h = FC_Net(inputs,
                       self.h_dim2,
                       activation_fn=self.FC_active_fn,
                       weights_initializer=self.initial_W,
                       scope="Layer1")
            h = tf.nn.dropout(h, keep_prob=self.keep_prob)

            # (num_layers_CS-1) layers for cause-specific (num_Event subNets)
            out = []
            for _ in range(self.num_Event):
                cs_out = utils.create_FCNet(h, (self.num_layers_CS),
                                            self.h_dim2, self.FC_active_fn,
                                            self.h_dim2, self.FC_active_fn,
                                            self.initial_W, self.reg_W,
                                            self.keep_prob)
                out.append(cs_out)
            out = tf.stack(out, axis=1)  # stack referenced on subject
            out = tf.reshape(out, [-1, self.num_Event * self.h_dim2])
            out = tf.nn.dropout(out, keep_prob=self.keep_prob)

            out = FC_Net(out,
                         self.num_Event * self.num_Category,
                         activation_fn=tf.nn.softmax,
                         weights_initializer=self.initial_W,
                         weights_regularizer=self.reg_W_out,
                         scope="Output")
            self.out = tf.reshape(out, [-1, self.num_Event, self.num_Category])

            ##### GET LOSS FUNCTIONS
            self.loss_Log_Likelihood()  #get loss1: Log-Likelihood loss
            self.loss_Ranking()  #get loss2: Ranking loss
            self.loss_RNN_Prediction()  #get loss3: RNN prediction loss

            self.LOSS_TOTAL = self.a * self.LOSS_1 + self.b * self.LOSS_2 + self.c * self.LOSS_3 + tf.losses.get_regularization_loss(
            )
            self.LOSS_BURNIN = self.LOSS_3 + tf.losses.get_regularization_loss(
            )

            self.solver = tf.train.AdamOptimizer(
                learning_rate=self.lr_rate).minimize(self.LOSS_TOTAL)
            self.solver_burn_in = tf.train.AdamOptimizer(
                learning_rate=self.lr_rate).minimize(self.LOSS_BURNIN)
예제 #20
0
    def __init__(
            self,
            architecture,
            max_seq_len,
            human_size,
            rnn_size,  # hidden recurrent layer size
            num_layers,
            max_gradient_norm,
            stddev,
            batch_size,
            learning_rate,
            learning_rate_decay_factor,
            summaries_dir,
            loss_to_use,
            number_of_actions,
            one_hot=True,
            residual_velocities=False,
            dtype=tf.float32):
        """Create the model.

    Args:
      architecture: [basic, tied] whether to tie the decoder and decoder.
      source_seq_len: lenght of the input sequence.
      #target_seq_len: lenght of the target sequence.
      rnn_size: number of units in the rnn.
      num_layers: number of rnns to stack.
      max_gradient_norm: gradients will be clipped to maximally this norm.
      batch_size: the size of the batches used during training;
        the model construction is independent of batch_size, so it can be
        changed after initialization if this is convenient, e.g., for decoding.
      learning_rate: learning rate to start with.
      learning_rate_decay_factor: decay learning rate by this much when needed.
      summaries_dir: where to log progress for tensorboard.
      loss_to_use: [supervised, sampling_based]. Whether to use ground truth in
        each timestep to compute the loss after decoding, or to feed back the
        prediction from the previous time-step.
      number_of_actions: number of classes we have.
      one_hot: whether to use one_hot encoding during train/test (sup models).
      residual_velocities: whether to use a residual connection that models velocities.
      dtype: the data type to use to store internal variables.
    """

        self.HUMAN_SIZE = human_size
        self.input_size = self.HUMAN_SIZE + number_of_actions if one_hot else self.HUMAN_SIZE

        print("One hot is ", one_hot)
        print("Input size is %d" % self.input_size)

        # Summary writers for train and test runs
        self.train_writer = tf.summary.FileWriter(
            os.path.normpath(os.path.join(summaries_dir, 'train')))
        self.test_writer = tf.summary.FileWriter(
            os.path.normpath(os.path.join(summaries_dir, 'test')))

        self.max_seq_len = max_seq_len
        self.rnn_size = rnn_size
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=dtype)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        # === Create the RNN that will keep the state ===
        print('rnn_size = {0}'.format(rnn_size))
        cell = tf.contrib.rnn.GRUCell(self.rnn_size)

        if num_layers > 1:
            cell = tf.contrib.rnn.MultiRNNCell([
                tf.contrib.rnn.GRUCell(self.rnn_size)
                for _ in range(num_layers)
            ])

        # === Transform the inputs ===
        with tf.name_scope("inputs_gts"):

            inputs = tf.placeholder(
                dtype,
                shape=[None, self.max_seq_len + 1, self.input_size],
                name="inputs")
            gts = tf.placeholder(
                dtype,
                shape=[None, self.max_seq_len, self.input_size],
                name="gts")
            seq_len = tf.placeholder(tf.int32, shape=[None], name="seq_len")

            self.inputs = inputs
            self.gts = gts
            self.seq_len = seq_len
            '''
      inputs = tf.transpose(inputs, [1, 0, 2])
      gts    = tf.transpose(gts, [1, 0, 2])

      inputs = tf.reshape(inputs, [-1, self.input_size])
      gts    = tf.reshape(gts,    [-1, self.input_size])

      inputs = tf.split(inputs, self.max_seq_len, axis=0)
      gts    = tf.split(gts,    self.max_seq_len, axis=0)
      '''
            inputs = _transpose_batch_time(inputs)
            gts = _transpose_batch_time(gts)

        # === Add space decoder ===
        cell = rnn_cell_extensions.LinearSpaceDecoderWrapper(
            cell, self.input_size)

        # Finally, wrap everything in a residual layer if we want to model velocities
        if residual_velocities:
            cell = rnn_cell_extensions.ResidualWrapper(cell)

        # Store the outputs here
        outputs = []

        self.stddev = stddev

        def addGN(inputs):
            noise = tf.random_normal(shape=tf.shape(inputs),
                                     mean=0.0,
                                     stddev=self.stddev,
                                     dtype=tf.float32)
            return inputs + noise

        self.is_training = tf.placeholder(dtype=tf.bool)

        # Build the RNN
        if architecture == "basic":
            cell_init_state = tf.Variable(np.zeros([1, cell.state_size]),
                                          trainable=True,
                                          dtype=tf.float32)
            init_input = tf.Variable(np.zeros([63]),
                                     trainable=True,
                                     dtype=tf.float32)
            output_ta = tf.TensorArray(size=self.max_seq_len, dtype=tf.float32)

            def loop_fn(time, cell_output, cell_state, loop_state):
                emit_output = cell_output
                if cell_output is None:
                    #next_cell_state = cell.zero_state(self.batch_size, tf.float32)
                    next_cell_state = tf.tile(cell_init_state,
                                              [tf.shape(inputs[0])[0], 1])
                    next_input = tf.cond(
                        self.is_training, lambda: tf.concat([
                            tf.tile(tf.expand_dims(init_input, 0),
                                    [tf.shape(inputs[0])[0], 1]),
                            addGN(inputs[time])
                        ],
                                                            axis=1),
                        lambda: tf.concat([
                            tf.tile(tf.expand_dims(init_input, 0),
                                    [tf.shape(inputs[0])[0], 1]), inputs[time]
                        ],
                                          axis=1))
                    next_loop_state = output_ta
                else:
                    next_cell_state = cell_state
                    next_input = tf.cond(
                        self.is_training, lambda: tf.concat(
                            [cell_output, addGN(inputs[time])], axis=1),
                        lambda: tf.concat([cell_output, inputs[time]], axis=1))

                    next_loop_state = loop_state.write(time - 1, cell_output)

                finished = (time > self.max_seq_len - 1)
                #finished = False
                return (finished, next_input, next_cell_state, emit_output,
                        next_loop_state)

            # Basic RNN does not have a loop function in its API, so copying here.
            with vs.variable_scope("raw_rnn"):
                _, _, loop_state_ta = tf.nn.raw_rnn(cell, loop_fn)
                #outputs = _transpose_batch_time(loop_state_ta.stack())
                outputs = loop_state_ta.stack()

        self.outputs = outputs
        mask1 = tf.tile(
            tf.expand_dims(
                tf.transpose(
                    tf.sequence_mask(self.seq_len,
                                     dtype=tf.float32,
                                     maxlen=self.max_seq_len)), -1),
            [1, 1, self.input_size])
        mask2 = tf.tile(
            tf.expand_dims(
                tf.transpose(
                    tf.sequence_mask(self.seq_len - 1,
                                     dtype=tf.float32,
                                     maxlen=self.max_seq_len - 1)), -1),
            [1, 1, self.input_size])
        with tf.name_scope("loss_pos"):
            loss_pos = tf.reduce_mean(
                tf.square(
                    tf.subtract(tf.multiply(outputs, mask1),
                                tf.multiply(gts, mask1))))
        with tf.name_scope("loss_smooth"):
            loss_smooth = tf.reduce_mean(
                tf.square(
                    tf.multiply(tf.subtract(outputs[1:], outputs[:-1]),
                                mask2)))
        #self.loss         = tf.add(loss_pos, loss_smooth*1000)
        self.loss = loss_pos
        self.loss_summary = tf.summary.scalar('loss/loss', self.loss)

        self.loss_each_data = tf.reduce_mean(tf.square(tf.subtract(tf.multiply(gts,mask1),
                                                                   tf.multiply(outputs,mask1))),
                                             axis=[0,2]) \
                              + tf.reduce_mean(tf.square(tf.multiply(tf.subtract(
                              outputs[1:], outputs[:-1]),mask2)),axis=[0,2])
        # Gradients and SGD update operation for training the model.
        params = tf.trainable_variables()

        opt = tf.train.AdamOptimizer(learning_rate=learning_rate)

        # Update all the trainable parameters
        gradients = tf.gradients(self.loss, params)

        clipped_gradients, norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.gradient_norms = norm
        self.updates = opt.apply_gradients(zip(clipped_gradients, params),
                                           global_step=self.global_step)

        self.learning_rate_summary = tf.summary.scalar(
            'learning_rate/learning_rate', self.learning_rate)

        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)
예제 #21
0
    def __call__(self, input, conditioned_lst, reuse=False):
        """
        Use this to construct tensorflow network graph.

        :param input: tf.Placeholder
                the shape of tensor should be (batch_size, time_steps, feature_size)
        :param conditioned_lst: tf.Placeholder
                the shape of tensor should be (time_steps)
        :param reuse: Bool
                if reuse variable
        :return: network out tensor
                the shape of tensor should be [batch_size, time_steps, feature_size]
        """
        with tf.variable_scope(self.name) as scope:
            if reuse:
                scope.reuse_variables()

            lstm_layer = tf.nn.rnn_cell.MultiRNNCell([
                tf.nn.rnn_cell.BasicLSTMCell(self.nb_lstm_units,
                                             name='lstm_{}'.format(i))
                for i in range(self.nb_lstm_layers)
            ])

            batch_size = self.batch_size

            initial_state = lstm_layer.zero_state(batch_size=batch_size,
                                                  dtype=tf.float32)

            # raw_rnn expects time major inputs as TensorArrays
            time_steps = self.nb_time_steps
            inputs_ta = tf.TensorArray(dtype=tf.float32,
                                       size=time_steps,
                                       clear_after_read=False,
                                       name='Inputs')
            inputs_ta = inputs_ta.unstack(_transpose_batch_time(
                input))  # model_input is the input placeholder
            input_dim = input.get_shape(
            )[-1].value  # the dimensionality of the input to each time step
            output_dim = input_dim  # the dimensionality of the model's output at each time step
            conditioned_ta = tf.TensorArray(dtype=tf.bool,
                                            size=time_steps,
                                            clear_after_read=False,
                                            name='Conditioned')
            conditioned_ta = conditioned_ta.unstack(conditioned_lst)

            def loop_fn(time, cell_output, cell_state, loop_state):
                elements_finished = (time >= time_steps)
                finished = tf.reduce_all(elements_finished)

                if cell_output is None:
                    next_cell_state = initial_state
                    emit_output = tf.zeros([output_dim])
                    # create input
                    next_input = inputs_ta.read(time)
                else:
                    next_cell_state = cell_state
                    emit_output = tf.layers.dense(cell_output,
                                                  output_dim,
                                                  reuse=tf.AUTO_REUSE)
                    if self.layer_norm:
                        emit_output = layers.layer_norm(emit_output,
                                                        center=True,
                                                        scale=True)
                    emit_output = tf.nn.relu(emit_output)

                    # if conditioned_lst[time] is 0, use current_output
                    next_input = tf.cond(
                        finished, lambda: tf.zeros([batch_size, input_dim],
                                                   dtype=tf.float32), lambda:
                        tf.cond(conditioned_ta.read(time), lambda: inputs_ta.
                                read(time), lambda: emit_output))

                # loop state not used in this example
                next_loop_state = None
                return elements_finished, next_input, next_cell_state, emit_output, next_loop_state

            out_ta, _, _ = tf.nn.raw_rnn(lstm_layer, loop_fn)
            out = _transpose_batch_time(out_ta.stack())
        return out
예제 #22
0
    def p2_encoder2_rnn(self, input_encoder, temperature, units_lstm, train,
                        hap_lens, reuse):
        with tf.variable_scope('enc_p2', reuse=reuse):
            # Ancestors
            Anc = tf.get_variable(name='Ancs',
                                  shape=[self.len_ancs, self.dim_ancs])

            w_proj = tf.get_variable(shape=[units_lstm, self.dim_ancs],
                                     dtype=tf.float32,
                                     name='w_proj')
            b_proj = tf.get_variable(shape=[self.dim_ancs],
                                     dtype=tf.float32,
                                     name='b_proj')

        cell = tf.contrib.rnn.LSTMCell(units_lstm)
        inputs = tf.transpose(input_encoder, perm=[1, 0, 2])
        #had to concat these zeros, kind of awkward, not sure why
        inputs = tf.concat([
            inputs,
            tf.zeros(
                [1, self.batch_size, tf.shape(inputs)[-1]], dtype=tf.float32)
        ],
                           axis=0)
        output_ta = (tf.TensorArray(size=self.max_hap_len, dtype=tf.float32),
                     tf.TensorArray(size=self.max_hap_len, dtype=tf.float32),
                     tf.TensorArray(size=self.max_hap_len, dtype=tf.float32))

        #inputs_ta = tf.TensorArray(dynamic_size=False,dtype=tf.float32,size=self.max_hap_len,clear_after_read=False)
        #inputs_ta.unstack(inputs)

        print(input_encoder)
        print(output_ta)
        print(tf.transpose(input_encoder, perm=[1, 0, 2]))
        #take out when using placeholders
        print('here')

        def loop_fn(time, cell_output, cell_state, loop_state):
            print('cell_output {}'.format(cell_output))
            print('cell_state {}'.format(cell_state))
            #print(inputs_ta)

            emit_output = cell_output  # don't care about this one, only care about loop_state in this case because loop_state doesn't have to be same shape as rnn output
            if cell_output is None:  # time == 0
                print('here1')
                next_cell_state = cell.zero_state(self.batch_size, tf.float32)
                print('here2')
                print(time)
                next_anc = tf.concat([
                    tf.zeros(shape=[self.batch_size, self.len_ancs],
                             dtype=tf.float32), inputs[[time]]
                ],
                                     axis=-1)
                #inputs_ta.read(time) ], axis=-1)
                print('here2.5')
                print('here3')
                next_loop_state = output_ta
            else:
                print('here4')
                next_cell_state = cell_state
                with tf.variable_scope('enc_p2', reuse=True):
                    pre_next_anc1 = tf.nn.relu(
                        tf.matmul(cell_output, w_proj) + b_proj)
                    print('here5')
                    anc_distribution = self.att_dot(query=pre_next_anc1,
                                                    values=Anc)
                if train:
                    dist = tf.contrib.distributions.ExpRelaxedOneHotCategorical(
                        temperature=temperature, probs=anc_distribution)
                    next_anc_sample = dist.sample()
                    #next_anc_sample = anc_distribution
                    next_anc = tf.concat([next_anc_sample, inputs[[time]]],
                                         axis=-1)
                else:
                    dist = tf.contrib.distributions.Categorical(
                        probs=anc_distribution)
                    next_anc_sample = tf.cast(tf.one_hot(dist.sample(),
                                                         depth=self.len_ancs,
                                                         axis=-1),
                                              dtype=tf.float32)
                    #next_anc_sample=anc_distribution
                    next_anc = tf.concat([next_anc_sample, inputs[[time]]],
                                         axis=-1)

                print('ANC DIST {}'.format(anc_distribution))
                print('here6')

                print(next_anc_sample)

                # this is sent as input to the next iteration of the cell

                #inputs_ta.read(time)], axis=-1)
                print('here7')
                # output to store for the iteration
                next_loop_state = (loop_state[0].write(time - 1,
                                                       next_anc_sample),
                                   loop_state[1].write(time - 1,
                                                       anc_distribution),
                                   loop_state[2].write(time - 1,
                                                       pre_next_anc1))

            print('out_loop')
            # this gives us a vector in the size of the batch, telling us which elements have finished
            elements_finished = time >= hap_lens
            print(elements_finished)
            # because we are not interested in the state
            print('next_anc {}'.format(next_anc))
            print('next_cell {}'.format(next_cell_state))
            return (elements_finished, next_anc, next_cell_state, emit_output,
                    next_loop_state)

        with tf.variable_scope('state', reuse=reuse):
            _, _, loop_state_ta = tf.nn.raw_rnn(cell, loop_fn)
        print('Anc_O {}'.format(_transpose_batch_time(
            loop_state_ta[0].stack())))
        print('params_O {}'.format(
            _transpose_batch_time(loop_state_ta[1].stack())))

        X_sampled = _transpose_batch_time(loop_state_ta[0].stack())
        dist_params = _transpose_batch_time(loop_state_ta[1].stack())
        query_vecs = _transpose_batch_time(loop_state_ta[2].stack())

        return X_sampled, dist_params, query_vecs
예제 #23
0
def dynamic_crnn(cell,
                 inputs,
                 gate_vector,
                 sequence_length=None,
                 initial_state=None,
                 dtype=None,
                 parallel_iterations=None,
                 swap_memory=False,
                 time_major=False,
                 scope=None):

    if not _like_rnncell(cell):
        raise TypeError("cell must be an instance of RNNCell")
    flat_input = nest.flatten(inputs)

    if not time_major:
        # (B,T,D) => (T,B,D)
        flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input]
        flat_input = tuple(
            rnn._transpose_batch_time(input_) for input_ in flat_input)

    parallel_iterations = parallel_iterations or 32
    if sequence_length is not None:
        sequence_length = math_ops.to_int32(sequence_length)
        if sequence_length.get_shape().ndims not in (None, 1):
            raise ValueError(
                "sequence_length must be a vector of length batch_size, "
                "but saw shape: %s" % sequence_length.get_shape())
        sequence_length = array_ops.identity(  # Just to find it in the graph.
            sequence_length,
            name="sequence_length")

    with vs.variable_scope(scope or "rnn") as varscope:
        if varscope.caching_device is None:
            varscope.set_caching_device(lambda op: op.device)
        batch_size = rnn._best_effort_input_batch_size(flat_input)

        if initial_state is not None:
            state = initial_state
        else:
            if not dtype:
                raise ValueError(
                    "If there is no initial_state, you must give a dtype.")
            state = cell.zero_state(batch_size, dtype)

        def _assert_has_shape(x, shape):
            x_shape = array_ops.shape(x)
            packed_shape = array_ops.stack(shape)
            return rnn.control_flow_ops.Assert(
                math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)), [
                    "Expected shape for Tensor %s is " % x.name, packed_shape,
                    " but saw shape: ", x_shape
                ])

        if sequence_length is not None:
            # Perform some shape validation
            with ops.control_dependencies(
                [_assert_has_shape(sequence_length, [batch_size])]):
                sequence_length = array_ops.identity(sequence_length,
                                                     name="CheckSeqLen")

        inputs = nest.pack_sequence_as(structure=inputs,
                                       flat_sequence=flat_input)
        (outputs, final_state) = _dynamic_crnn_loop(
            cell,
            inputs,
            state,
            parallel_iterations=parallel_iterations,
            gate_vector=gate_vector,
            swap_memory=swap_memory,
            sequence_length=sequence_length,
            dtype=dtype)
        if not time_major:
            # (T,B,D) => (B,T,D)
            outputs = nest.map_structure(rnn._transpose_batch_time, outputs)
        return (outputs, final_state)
예제 #24
0
    def sampling_rnn(self, cell, initial_state, input_, seq_lengths):

        # raw_rnn expects time major inputs as TensorArrays
        max_time = seq_lengths+1  # this is the max time step per batch
        inputs_ta = tf.TensorArray(dtype=tf.float32, size=max_time, clear_after_read=False)
        inputs_ta = inputs_ta.unstack(_transpose_batch_time(input_))  # model_input is the input placeholder
        output_dim = self.k  # the dimensionality of the model's output at each time step
        input_dim = input_.get_shape()[-1].value +  output_dim # the dimensionality of the input to each time step

        def loop_fn(time, cell_output, cell_state, loop_state):
            """
            Loop function that allows to control input to the rnn cell and manipulate cell outputs.
            :param time: current time step
            :param cell_output: output from previous time step or None if time == 0
            :param cell_state: cell state from previous time step
            :param loop_state: custom loop state to share information between different iterations of this loop fn
            :return: tuple consisting of
              elements_finished: tensor of size [bach_size] which is True for sequences that have reached their end,
                needed because of variable sequence size
              next_input: input to next time step
              next_cell_state: cell state forwarded to next time step
              emit_output: The first return argument of raw_rnn. This is not necessarily the output of the RNN cell,
                but could e.g. be the output of a dense layer attached to the rnn layer.
              next_loop_state: loop state forwarded to the next time step
            """
            if cell_output is None:
                # time == 0, used for initialization before first call to cell
                next_cell_state = initial_state
                # the emit_output in this case tells TF how future emits look
                emit_output = tf.zeros([output_dim])
            else:
                # t > 0, called right after call to cell, i.e. cell_output is the output from time t-1.
                # here you can do whatever ou want with cell_output before assigning it to emit_output.
                # In this case, we don't do anything
                next_cell_state = cell_state
                emit_output = cell_output

            # check which elements are finished
            elements_finished = (time >= seq_lengths)
            finished = tf.reduce_all(elements_finished)

            # assemble cell input for upcoming time step
            current_output = emit_output if cell_output is not None else None
            input_original = inputs_ta.read(time)  # tensor of shape (None, input_dim)

            if current_output is None:
                # this is the initial step, i.e. there is no output from a previous time step, what we feed here
                # can highly depend on the data. In this case we just assign the actual input in the first time step.
                next_in = tf.concat([input_original, tf.zeros([self.batch_size,output_dim])],axis=1)
            else:
                # time > 0, so just use previous output as next input
                # here you could do fancier things, whatever you want to do before passing the data into the rnn cell
                # if here you were to pass input_original than you would get the normal behaviour of dynamic_rnn
                next_in = tf.concat([input_original,current_output],axis=1)

            next_input = tf.cond(finished,
                                 lambda: tf.zeros([self.batch_size, input_dim], dtype=tf.float32),  # copy through zeros
                                 lambda: next_in)  # if not finished, feed the previous output as next input

            # set shape manually, otherwise it is not defined for the last dimensions
            next_input.set_shape([None, input_dim])

            # loop state not used in this example
            next_loop_state = None
            return (elements_finished, next_input, next_cell_state, emit_output, next_loop_state)

        outputs_ta, last_state, _ = tf.nn.raw_rnn(cell, loop_fn)
        outputs = _transpose_batch_time(outputs_ta.stack())
        final_state = last_state

        return outputs, final_state
예제 #25
0
        def seq2seq_f(encoder_inputs,
                      decoder_inputs,
                      targets,
                      last_input,
                      track_padding_vec=None):
            # returns (self.LSTM_output, self.internal_states)
            target_input_ta = tf.TensorArray(dtype=tf.float32,
                                             size=len(targets))
            for j in range(len(decoder_inputs)):
                target_input_ta = target_input_ta.write(j, targets[j])
            if track_padding_vec is not None:
                track_padding_ta = tf.TensorArray(dtype=tf.bool,
                                                  size=len(track_padding_vec))
                for j in range(len(decoder_inputs)):
                    track_padding_ta = track_padding_ta.write(
                        j, track_padding_vec[j])
            """ First this runs the encoder, then it saves the last internal RNN c state, and passes that into the
            loop parameter as the initial condition. Then it runs the decoder."""

            with tf.variable_scope('seq2seq_encoder'):
                # So I have a list of len(time) of Tensors of shape (batch, RNN dim)
                reordered_encoder_inputs = tf.stack(encoder_inputs, axis=1)
                encoder_outputs, last_enc_state = tf.nn.dynamic_rnn(
                    self._RNN_layers,
                    inputs=reordered_encoder_inputs,
                    dtype=tf.float32)
            """RNN loop function, the heart of this network. """
            def loop_fn(time, cell_output, cell_state, loop_state):
                emit_output = cell_output

                if cell_output is None:
                    # Set initial params
                    next_cell_state = last_enc_state
                    # I have defined last 'encoder input' as actually the first decoder input. It is data for time T_0
                    next_input = decoder_inputs[
                        0]  # Encoder inputs already have input layer applied
                    next_loop_state = (output_ta[0], output_ta[1],
                                       output_ta[2].write(time, last_input),
                                       output_ta[3], output_ta[4])
                else:
                    next_cell_state = cell_state
                    projected_output = MDN_output_function(cell_output)

                    # Take a single sample of the MDN. This may be ignored later, depending on the use-case.
                    sampled = MDN.sample(
                        projected_output,
                        temperature=self.parameters['sample_temperature'])
                    upscale_sampled = _upscale_sampled_output(sampled)

                    # If the no feedforward flag, just give the next time-step of the network zeros.
                    # This is the equivalent of the RNN-ZF (zero feed) network in the paper.
                    if self.parameters['no_feedforward']:
                        next_sampled_input = tf.zeros(
                            [
                                upscale_sampled.shape[0],
                                scaling_layer[0].shape[0]
                            ],
                            dtype=tf.float32)  # Size batch, input width

                    elif self.parameters['input_mask'][2:4] == [0, 0]:
                        next_sampled_input = _pad_missing_output_with_zeros(
                            upscale_sampled)

                    # Else take a sample, and feed this as the next input for the next sequence.
                    # All of this is done within tensorflow, as it allows it to run INSIDE the GPU.
                    # This section is often done sampled once outside of tensorflow using Numpy to resolve the MDN
                    # and performing it this way does not allow
                    else:
                        next_sampled_input = MDN.compute_derivates(
                            loop_state[2].read(time - 1),
                            upscale_sampled,
                            self.parameters['input_columns'],
                            self.parameters['velocity_threshold'],
                            subsample_rate=self.parameters['subsample'])
                    target_ta = target_input_ta.read(
                        time -
                        1)  # Only allowed to call read() once. Dunno why.
                    next_datapoint = next_sampled_input  # tf.cond(feed_forward, lambda: target_ta, lambda: next_sampled_input)
                    next_input = _apply_scaling_and_input_layer(next_datapoint)
                    # That dotted loopy line in the diagram

                    loss = MDN.lossfunc_wrapper(target_ta, projected_output)
                    timewise_track_padding = track_padding_ta.read(time - 1)
                    timewise_track_padding_logits = _padding_bool_to_logits(
                        timewise_track_padding)
                    if track_padding_vec is not None:  # If we have declared padding is being used.
                        # use padding as binary mask for mixture based loss
                        # i.e. if the ground truth says this timestep is padding data, set that timestep's loss to zero
                        loss = tf.multiply(
                            loss,
                            tf.minimum(
                                tf.to_float(
                                    parameters['padding_loss_mixture_weight']),
                                tf.expand_dims(
                                    tf.to_float(
                                        tf.logical_not(timewise_track_padding)
                                    ),  # Hyperparam search sometimes makes this a float64
                                    axis=-1),
                                name='mixture_loss'))
                        padding_output = pad_output_function(
                            cell_output
                        )  # compute what the network thinks about padding
                        # Normalize the softmax loss w.r.t. number of prediction steps
                        # If weight is zero, don't bother computing
                        if abs(parameters['padding_loss_logit_weight']
                               ) > 1e-12:
                            loss = tf.add(
                                loss,
                                tf.expand_dims(
                                    tf.multiply(
                                        tf.
                                        divide(  # Normalize by prediction_steps
                                            tf.nn.
                                            softmax_cross_entropy_with_logits(
                                                logits=padding_output,
                                                labels=
                                                timewise_track_padding_logits),
                                            self.prediction_steps),
                                        tf.to_float(parameters[
                                            'padding_loss_logit_weight'])),
                                    axis=-1,
                                    name="padding_logit_loss"
                                )  # Without this tf.add( shape(100,), shape(100,1)) becomes (100, 100) for some reason
                            )  # compare to GT
                    else:
                        padding_output = None  # loop_state write needs something at least

                    next_loop_state = (loop_state[0].write(
                        time - 1, next_sampled_input), loop_state[1].write(
                            time - 1, loss), loop_state[2].write(
                                time, next_datapoint), loop_state[3].write(
                                    time - 1,
                                    MDN.upscale_and_resolve_mixtures(
                                        projected_output,
                                        scaling_layer)), loop_state[4].write(
                                            time - 1, padding_output))
                    #Its an off by one error I'd rather solve with a new array for readability

                elements_finished = (
                    time >= self.prediction_steps
                )  # whether or not this RNN in the batch has declared itself done

                return (elements_finished, next_input, next_cell_state,
                        emit_output, next_loop_state)

            with tf.variable_scope('seq2seq_decoder'):
                from tensorflow.python.ops.rnn import _transpose_batch_time
                emit_ta, final_state, loop_state_ta = tf.nn.raw_rnn(
                    self._RNN_layers, loop_fn)
                # Here emit_ta should contain all the MDN's for each timestep. To confirm.
                output_sampled = _transpose_batch_time(
                    loop_state_ta[0].stack())
                losses = _transpose_batch_time(loop_state_ta[1].stack())
                MDN_output = _transpose_batch_time(loop_state_ta[3].stack())
                track_padding_output = _transpose_batch_time(
                    loop_state_ta[4].stack())

            return (
                output_sampled,
                losses,  # tf.reduce_sum(losses,axis=1)/len(self.decoder_inputs),\
                final_state,
                MDN_output,
                track_padding_output)
예제 #26
0
def dynamic_rnn(cell,
                inputs,
                sequence_length=None,
                initial_state=None,
                dtype=None,
                parallel_iterations=None,
                swap_memory=False,
                time_major=True,
                scope=None):
    """Creates a recurrent neural network specified by RNNCell `cell`.

  Performs fully dynamic unrolling of `inputs`.

  Example:

  ```python
  # create a BasicRNNCell
  rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)

  # 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size]

  # defining initial state
  initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32)

  # 'state' is a tensor of shape [batch_size, cell_state_size]
  outputs, state = tf.nn.dynamic_rnn(rnn_cell, input_data,
                                     initial_state=initial_state,
                                     dtype=tf.float32)
  ```

  ```python
  # create 2 LSTMCells
  rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [128, 256]]

  # create a RNN cell composed sequentially of a number of RNNCells
  multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers)

  # 'outputs' is a tensor of shape [batch_size, max_time, 256]
  # 'state' is a N-tuple where N is the number of LSTMCells containing a
  # tf.contrib.rnn.LSTMStateTuple for each cell
  outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell,
                                     inputs=data,
                                     dtype=tf.float32)
  ```


  Args:
    cell: An instance of RNNCell.
    inputs: The RNN inputs.
      If `time_major == False` (default), this must be a `Tensor` of shape:
        `[batch_size, max_time, ...]`, or a nested tuple of such elements.
      If `time_major == True`, this must be a `Tensor` of shape: `[max_time,
        batch_size, ...]`, or a nested tuple of such elements. This may also be
        a (possibly nested) tuple of Tensors satisfying this property.  The
        first two dimensions must match across all the inputs, but otherwise the
        ranks and other shape components may differ. In this case, input to
        `cell` at each time-step will replicate the structure of these tuples,
        except for the time dimension (from which the time is taken). The input
        to `cell` at each time step will be a `Tensor` or (possibly nested)
        tuple of Tensors each with dimensions `[batch_size, ...]`.
    sequence_length: (optional) An int32/int64 vector sized `[batch_size]`. Used
      to copy-through state and zero-out outputs when past a batch element's
      sequence length.  So it's more for performance than correctness.
    initial_state: (optional) An initial state for the RNN. If `cell.state_size`
      is an integer, this must be a `Tensor` of appropriate type and shape
      `[batch_size, cell.state_size]`. If `cell.state_size` is a tuple, this
      should be a tuple of tensors having shapes `[batch_size, s] for s in
      cell.state_size`.
    dtype: (optional) The data type for the initial state and expected output.
      Required if initial_state is not provided or RNN state has a heterogeneous
      dtype.
    parallel_iterations: (Default: 32).  The number of iterations to run in
      parallel.  Those operations which do not have any temporal dependency and
      can be run in parallel, will be.  This parameter trades off time for
      space.  Values >> 1 use more memory but take less time, while smaller
      values use less memory but computations take longer.
    swap_memory: Transparently swap the tensors produced in forward inference
      but needed for back prop from GPU to CPU.  This allows training RNNs which
      would typically not fit on a single GPU, with very minimal (or no)
      performance penalty.
    time_major: The shape format of the `inputs` and `outputs` Tensors. If true,
      these `Tensors` must be shaped `[max_time, batch_size, depth]`. If false,
      these `Tensors` must be shaped `[batch_size, max_time, depth]`. Using
      `time_major = True` is a bit more efficient because it avoids transposes
      at the beginning and end of the RNN calculation.  However, most TensorFlow
      data is batch-major, so by default this function accepts input and emits
      output in batch-major form.
    scope: VariableScope for the created subgraph; defaults to "rnn".

  Returns:
    A pair (outputs, state) where:

    outputs: The RNN output `Tensor`.

      If time_major == False (default), this will be a `Tensor` shaped:
        `[batch_size, max_time, cell.output_size]`.

      If time_major == True, this will be a `Tensor` shaped:
        `[max_time, batch_size, cell.output_size]`.

      Note, if `cell.output_size` is a (possibly nested) tuple of integers
      or `TensorShape` objects, then `outputs` will be a tuple having the
      same structure as `cell.output_size`, containing Tensors having shapes
      corresponding to the shape data in `cell.output_size`.

    state: The final state.  If `cell.state_size` is an int, this
      will be shaped `[batch_size, cell.state_size]`.  If it is a
      `TensorShape`, this will be shaped `[batch_size] + cell.state_size`.
      If it is a (possibly nested) tuple of ints or `TensorShape`, this will
      be a tuple having the corresponding shapes. If cells are `LSTMCells`
      `state` will be a tuple containing a `LSTMStateTuple` for each cell.

  Raises:
    TypeError: If `cell` is not an instance of RNNCell.
    ValueError: If inputs is None or an empty list.
    RuntimeError: If not using control flow v2.
  """

    # Currently only support time_major == True case.
    assert time_major

    # TODO(b/123051275): We need to check if the cells are TfLiteLSTMCells or
    # TfLiteRNNCells.
    rnn_cell_impl.assert_like_rnncell("cell", cell)

    if not control_flow_util.ENABLE_CONTROL_FLOW_V2:
        raise RuntimeError("OpHint dynamic rnn only supports control flow v2.")

    parent_first_child_input = [{
        "parent_ophint_input_index": 0,
        "first_child_ophint_input_index": 0
    }]
    parent_last_child_output = [{
        "parent_output_index": 0,
        # For LstmCell, the index is 2.
        # For RnnCell, the index is 1.
        # So we use -1 meaning it's the last one.
        "child_output_index": -1
    }]
    internal_children_input_output = [{
        "child_input_index": 0,
        # For LstmCell, the index is 2.
        # For RnnCell, the index is 1.
        # So we use -1 meaning it's the last one.
        "child_output_index": -1
    }]
    inputs_outputs_mappings = {
        "parent_first_child_input": parent_first_child_input,
        "parent_last_child_output": parent_last_child_output,
        "internal_children_input_output": internal_children_input_output
    }
    tflite_wrapper = op_hint.OpHint(
        "TfLiteDynamicRnn",
        level=2,
        children_inputs_mappings=inputs_outputs_mappings)
    with vs.variable_scope(scope or "rnn") as varscope:
        # Create a new scope in which the caching device is either
        # determined by the parent scope, or is set to place the cached
        # Variable using the same placement as for the rest of the RNN.
        if _should_cache():
            if varscope.caching_device is None:
                varscope.set_caching_device(lambda op: op.device)

        inputs = tflite_wrapper.add_input(inputs,
                                          name="input",
                                          index_override=0)

        # By default, time_major==False and inputs are batch-major: shaped
        #   [batch, time, depth]
        # For internal calculations, we transpose to [time, batch, depth]
        flat_input = nest.flatten(inputs)

        if not time_major:
            # (batch, time, depth) => (time, batch, depth)
            flat_input = [
                ops.convert_to_tensor(input_) for input_ in flat_input
            ]
            flat_input = tuple(
                _transpose_batch_time(input_) for input_ in flat_input)

        parallel_iterations = parallel_iterations or 32
        if sequence_length is not None:
            sequence_length = math_ops.cast(sequence_length, dtypes.int32)
            if sequence_length.shape.rank not in (None, 1):
                raise ValueError(
                    "sequence_length must be a vector of length batch_size, "
                    "but saw shape: %s" % sequence_length.shape)
            sequence_length = array_ops.identity(  # Just to find it in the graph.
                sequence_length,
                name="sequence_length")

        batch_size = _best_effort_input_batch_size(flat_input)

        if initial_state is not None:
            state = initial_state
        else:
            if not dtype:
                raise ValueError(
                    "If there is no initial_state, you must give a dtype.")
            if getattr(cell, "get_initial_state", None) is not None:
                state = cell.get_initial_state(inputs=None,
                                               batch_size=batch_size,
                                               dtype=dtype)
            else:
                state = cell.zero_state(batch_size, dtype)

        def _assert_has_shape(x, shape):
            x_shape = array_ops.shape(x)
            packed_shape = array_ops.stack(shape)
            return control_flow_ops.Assert(
                math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)), [
                    "Expected shape for Tensor %s is " % x.name, packed_shape,
                    " but saw shape: ", x_shape
                ])

        if not context.executing_eagerly() and sequence_length is not None:
            # Perform some shape validation
            with ops.control_dependencies(
                [_assert_has_shape(sequence_length, [batch_size])]):
                sequence_length = array_ops.identity(sequence_length,
                                                     name="CheckSeqLen")

        inputs = nest.pack_sequence_as(structure=inputs,
                                       flat_sequence=flat_input)

        outputs, final_state = _dynamic_rnn_loop(
            cell,
            inputs,
            state,
            parallel_iterations=parallel_iterations,
            swap_memory=swap_memory,
            sequence_length=sequence_length,
            dtype=dtype)

        # Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth].
        # If we are performing batch-major calculations, transpose output back
        # to shape [batch, time, depth]
        if not time_major:
            # (time, batch, depth) => (batch, time, depth)
            outputs = nest.map_structure(_transpose_batch_time, outputs)
        outputs = tflite_wrapper.add_output(outputs, name="outputs")

        return outputs, final_state
    def _build_net(self):
        with tf.variable_scope(self.name):

            self.mb_size = tf.placeholder(tf.int32, [], name='batch_size')
            self.lr_rate1 = tf.placeholder(tf.float32, name='learning_rate1')
            self.lr_rate2 = tf.placeholder(tf.float32, name='learning_rate2')
            self.keep_prob = tf.placeholder(tf.float32,
                                            name='keep_probability')

            # Input and Output
            self.x = tf.placeholder(tf.float32,
                                    [None, self.max_length, self.x_dim],
                                    name='inputs')
            self.y = tf.placeholder(tf.float32,
                                    [None, self.max_length, self.y_dim],
                                    name='labels_onehot')

            # Embedding
            self.E = tf.placeholder(tf.float32, [self.K, self.z_dim],
                                    name='embeddings_input')
            self.EE = tf.Variable(self.E, name='embeddings_var')
            self.embeddings = tf.nn.tanh(self.EE)

            # self.embde         = tf.nn.tanh(self.EE)
            # self.EE         = tf.Variable(self.E, name='embeddings_var')

            self.s = tf.placeholder(tf.int32, [None], name='cluster_label')
            self.s_onehot = tf.one_hot(self.s, self.K)

            # LOSS PARAMETERS
            self.alpha = tf.placeholder(tf.float32,
                                        name='alpha')  #For sample-wise entropy
            self.beta = tf.placeholder(
                tf.float32, name='beta')  #For prediction loss (i.e., mle)
            self.gamma = tf.placeholder(tf.float32,
                                        name='gamma')  #For batch-wise entropy
            self.delta = tf.placeholder(tf.float32,
                                        name='delta')  #For embedding
            '''
                ### CREATE RNN MASK
                    - This is to flexibly handle sequences with different length
                    - rnn_mask1: last observation; [mb_size, max_length]
                    - rnn_mask2: all available observations; [mb_size, max_length]
            '''
            # CREATE RNN MASK:
            seq_length = get_seq_length(self.x)
            tmp_range = tf.expand_dims(tf.range(0, self.max_length, 1), axis=0)
            self.rnn_mask1 = tf.cast(
                tf.equal(tmp_range, tf.expand_dims(seq_length - 1, axis=1)),
                tf.float32)  #last observation
            self.rnn_mask2 = tf.cast(
                tf.less_equal(tmp_range, tf.expand_dims(seq_length - 1,
                                                        axis=1)),
                tf.float32)  #all available observation

            ### DEFINE SELECTOR
            def selector(x_,
                         o_dim_=self.K,
                         num_layers_=2,
                         h_dim_=self.h_dim_h,
                         activation_fn=self.fc_activate_fn,
                         reuse=tf.AUTO_REUSE):
                out_fn = tf.nn.softmax
                with tf.variable_scope('selector', reuse=reuse):
                    if num_layers_ == 1:
                        out = tf.contrib.layers.fully_connected(
                            inputs=x_,
                            num_outputs=o_dim_,
                            activation_fn=out_fn,
                            scope='selector_out')
                    else:  #num_layers > 1
                        for tmp_layer in range(num_layers_ - 1):
                            if tmp_layer == 0:
                                net = x_
                            net = tf.contrib.layers.fully_connected(
                                inputs=net,
                                num_outputs=h_dim_,
                                activation_fn=activation_fn,
                                scope='selector_' + str(tmp_layer))
                            net = tf.nn.dropout(net, keep_prob=self.keep_prob)
                        out = tf.contrib.layers.fully_connected(
                            inputs=net,
                            num_outputs=o_dim_,
                            activation_fn=out_fn,
                            scope='selector_out')
                return out

            ### DEFINE PREDICTOR
            def predictor(x_,
                          o_dim_=self.y_dim,
                          o_type_=self.y_type,
                          num_layers_=1,
                          h_dim_=self.h_dim_g,
                          activation_fn=self.fc_activate_fn,
                          reuse=tf.AUTO_REUSE):
                if o_type_ == 'continuous':
                    out_fn = None
                elif o_type_ == 'categorical':
                    out_fn = tf.nn.softmax  #for classification task
                elif o_type_ == 'binary':
                    out_fn = tf.nn.sigmoid
                else:
                    raise Exception(
                        'Wrong output type. The value {}!!'.format(o_type_))

                with tf.variable_scope('predictor', reuse=reuse):
                    if num_layers_ == 1:
                        out = tf.contrib.layers.fully_connected(
                            inputs=x_,
                            num_outputs=o_dim_,
                            activation_fn=out_fn,
                            scope='predictor_out')
                    else:  #num_layers > 1
                        for tmp_layer in range(num_layers_ - 1):
                            if tmp_layer == 0:
                                net = x_
                            net = tf.contrib.layers.fully_connected(
                                inputs=net,
                                num_outputs=h_dim_,
                                activation_fn=activation_fn,
                                scope='predictor_' + str(tmp_layer))
                            net = tf.nn.dropout(net, keep_prob=self.keep_prob)
                        out = tf.contrib.layers.fully_connected(
                            inputs=net,
                            num_outputs=o_dim_,
                            activation_fn=out_fn,
                            scope='predictor_out')
                return out

            ### DEFINE LOOP FUNCTION FOR ENCODRER (f-g, f-h relations are created here)
            def loop_fn(time, cell_output, cell_state, loop_state):

                emit_output = cell_output

                if cell_output is None:  # time == 0
                    next_cell_state = cell.zero_state(self.mb_size, tf.float32)
                    next_loop_state = loop_state_ta
                else:
                    next_cell_state = cell_state
                    tmp_z = utils.create_concat_state_h(
                        next_cell_state, self.num_layers_f, self.rnn_type)
                    tmp_y = predictor(tmp_z, self.y_dim, self.y_type,
                                      self.num_layers_g, self.h_dim_g,
                                      self.fc_activate_fn)
                    tmp_pi = selector(tmp_z, self.K, self.num_layers_h,
                                      self.h_dim_h, self.fc_activate_fn)

                    next_loop_state = (
                        loop_state[0].write(
                            time - 1, tmp_z),  # save all the hidden states
                        loop_state[1].write(time - 1,
                                            tmp_y),  # save all the output
                        loop_state[2].write(time - 1, tmp_pi)
                    )  # save all the selector_net output (i.e., pi)

                elements_finished = (time >= self.max_length)

                #this gives the break-point (no more recurrence after the max_length)
                finished = tf.reduce_all(elements_finished)
                next_input = tf.cond(
                    finished, lambda: tf.zeros([self.mb_size, self.x_dim],
                                               dtype=tf.float32),
                    lambda: inputs_ta.read(time))
                return (elements_finished, next_input, next_cell_state,
                        emit_output, next_loop_state)

            '''
                ##### CREATE RNN NETWORK
                    - (INPUT)  inputs_ta: TensorArray with [max_length, mb_size, x_dim] #x_dim included delta
                    - (OUTPUT) 
                        . zs     = rnn states (h) in LSTM/GRU             ; [mb_size, max_length z_dim]
                        . y_hats = output of predictor taking zs as inputs; [mb_size, max_length, y_dim]
                        . pis    = output of selector                     ; [mb_size, max_length, K]

            '''
            inputs = self.x
            inputs_ta = tf.TensorArray(dtype=tf.float32,
                                       size=self.max_length).unstack(
                                           _transpose_batch_time(inputs),
                                           name='rnn_input')

            cell = utils.create_rnn_cell(self.h_dim_f, self.num_layers_f,
                                         self.keep_prob, self.rnn_type,
                                         self.rnn_activate_fn)

            #define the loop_state TensorArray for information from rnn time steps
            loop_state_ta = (
                tf.TensorArray(size=self.max_length,
                               dtype=tf.float32,
                               clear_after_read=False),  #zs (j=1,...,J)
                tf.TensorArray(size=self.max_length,
                               dtype=tf.float32,
                               clear_after_read=False),  #y_hats (j=1,...,J)
                tf.TensorArray(size=self.max_length,
                               dtype=tf.float32,
                               clear_after_read=False)  #pis (j=1,...,J)
            )

            _, _, loop_state_ta = tf.nn.raw_rnn(
                cell, loop_fn)  #, parallel_iterations=1)

            self.zs = _transpose_batch_time(loop_state_ta[0].stack())
            self.y_hats = _transpose_batch_time(loop_state_ta[1].stack())
            self.pis = _transpose_batch_time(loop_state_ta[2].stack())

            ### SAMPLING PROCESS
            s_dist = tf.distributions.Categorical(probs=tf.reshape(
                self.pis, [-1, self.K]))  #define the categorical dist.
            s_sample = s_dist.sample()

            mask_e = tf.cast(
                tf.equal(tf.expand_dims(tf.range(0, self.K, 1), axis=0),
                         tf.expand_dims(s_sample, axis=1)), tf.float32)
            z_bars = tf.matmul(mask_e, self.embeddings)
            pi_sample = tf.reduce_sum(mask_e *
                                      tf.reshape(log(self.pis), [-1, self.K]),
                                      axis=1)

            with tf.variable_scope('rnn', reuse=True):
                y_bars = predictor(z_bars, self.y_dim, self.y_type,
                                   self.num_layers_g, self.h_dim_g,
                                   self.fc_activate_fn)

            self.z_bars = tf.reshape(z_bars, [-1, self.max_length, self.z_dim])
            self.y_bars = tf.reshape(y_bars, [-1, self.max_length, self.y_dim])
            self.pi_sample = tf.reshape(pi_sample, [-1, self.max_length])
            self.s_sample = tf.reshape(s_sample, [-1, self.max_length])

            ### DEFINE LOSS FUNCTIONS
            #\ell_{1}: KL divergence loss for regression and binary/categorical-classification task
            def loss_1(y_true_, y_pred_, y_type_=self.y_type):
                if y_type_ == 'continuous':
                    tmp_loss = tf.reduce_sum((y_true_ - y_pred_)**2, axis=-1)
                elif y_type_ == 'categorical':
                    tmp_loss = -tf.reduce_sum(y_true_ * log(y_pred_), axis=-1)
                elif y_type_ == 'binary':
                    tmp_loss = -tf.reduce_sum(
                        y_true_ * log(y_pred_) +
                        (1. - y_true_) * log(1. - y_pred_),
                        axis=-1)
                else:
                    raise Exception(
                        'Wrong output type. The value {}!!'.format(y_type_))
                return tmp_loss

            #batch-wise entropy
            tmp_pis = tf.tile(tf.expand_dims(self.rnn_mask2, axis=2),
                              [1, 1, self.K]) * self.pis
            mean_pis = tf.reduce_sum(tf.reduce_sum(tmp_pis, axis=1),
                                     axis=0) / tf.reduce_sum(tf.reduce_sum(
                                         self.rnn_mask2, axis=1),
                                                             axis=0,
                                                             keepdims=True)

            ## LOSS_MLE: MLE prediction loss (for initalization)
            self.LOSS_MLE = tf.reduce_mean(
                tf.reduce_sum(self.rnn_mask2 *
                              loss_1(self.y, self.y_hats, self.y_type),
                              axis=1))

            ## LOSS1: predictive clustering loss
            self.LOSS_1 = tf.reduce_mean(
                tf.reduce_sum(self.rnn_mask2 *
                              loss_1(self.y, self.y_bars, self.y_type),
                              axis=1))
            self.LOSS_1_AC = tf.reduce_mean(
                tf.reduce_sum(self.rnn_mask2 * self.pi_sample *
                              loss_1(self.y, self.y_bars, self.y_type),
                              axis=1))

            ## LOSS2: sample-wise entropy loss
            self.LOSS_2 = tf.reduce_mean(
                -tf.reduce_sum(self.rnn_mask2 *
                               tf.reduce_sum(self.pis * log(self.pis), axis=2),
                               axis=1))

            predictor_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                               scope=self.name +
                                               '/rnn/predictor')
            selecter_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                              scope=self.name +
                                              '/rnn/selector')
            embedding_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                               scope=self.name +
                                               '/embeddings_var')
            encoder_vars = [
                vars_
                for vars_ in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
                if vars_ not in predictor_vars + selecter_vars + embedding_vars
            ]

            ### EMBEDDING TRAINING
            with tf.variable_scope('rnn', reuse=True):
                Ey = predictor(self.embeddings, self.y_dim, self.y_type,
                               self.num_layers_g, self.h_dim_g,
                               self.fc_activate_fn)
                # Ey   = predictor(self.EE, self.y_dim, self.y_type, self.num_layers_g, self.h_dim_g, self.fc_activate_fn)

            ## LOSS3: embedding separation loss (prevents embedding from collapsing)
            self.LOSS_3 = 0
            for i in range(self.K):
                for j in range(i + 1, self.K):
                    self.LOSS_3 += -loss_1(
                        Ey[i, :], Ey[j, :], y_type_=self.y_type) / (
                            (self.K - 1) * (self.K - 2)
                        )  # negative because we want to increase this;

            ### DEFINE OPTIMIZATION SOLVERS
            self.solver_MLE = tf.train.AdamOptimizer(self.lr_rate1).minimize(
                self.LOSS_MLE, var_list=encoder_vars + predictor_vars)
            self.solver_L1_critic = tf.train.AdamOptimizer(
                self.lr_rate1).minimize(self.LOSS_1,
                                        var_list=encoder_vars + predictor_vars)
            self.solver_L1_actor = tf.train.AdamOptimizer(
                self.lr_rate2).minimize(self.LOSS_1_AC +
                                        self.alpha * self.LOSS_2,
                                        var_list=encoder_vars + selecter_vars)
            self.solver_E = tf.train.AdamOptimizer(self.lr_rate1).minimize(
                self.LOSS_1 + self.beta * self.LOSS_3, var_list=embedding_vars)

            ### INITIALIZE SELECTOR
            self.zz = tf.placeholder(tf.float32, [None, self.z_dim])
            with tf.variable_scope('rnn', reuse=True):
                self.yy = predictor(self.zz, self.y_dim, self.y_type,
                                    self.num_layers_g, self.h_dim_g,
                                    self.fc_activate_fn
                                    )  #to check the predictor output given z
                self.s_out = selector(self.zz, self.K, self.num_layers_h,
                                      self.h_dim_h, self.fc_activate_fn)

            ## LOSS_S: selector initialization (cross-entropy wrt initialized class)
            self.LOSS_S = tf.reduce_mean(
                -tf.reduce_sum(self.s_onehot * log(self.s_out), axis=1))
            self.solver_S = tf.train.AdamOptimizer(self.lr_rate1).minimize(
                self.LOSS_S, var_list=selecter_vars)
    def _build_net(self):
        with tf.variable_scope(self.name):
            self.mb_size     = tf.placeholder(tf.int32, [], name='batch_size')
            self.lr_rate     = tf.placeholder(tf.float32, name='learning_rate')
            self.keep_prob   = tf.placeholder(tf.float32, name='keep_probability')

            self.K           = tf.placeholder(tf.int32, [], name='num_Cluster')

            self.M           = tf.placeholder(tf.float32, shape=[None, self.max_length, self.num_Event], name='M_onehot')  
            self.D           = tf.placeholder(tf.float32, shape=[None, self.max_length, 1], name='delta')
            self.X           = tf.placeholder(tf.float32, shape=[None, self.num_Feature], name='X')
            self.Mask        = tf.placeholder(tf.float32, shape=[None, self.max_length], name='rnn_mask')

            self.MU          = tf.placeholder(tf.float32, [None, self.z_dim], name='MU') #this will become [K, z_dim]
            self.S           = tf.placeholder(tf.int64, [None], name='S')
            S_one_hot   = tf.one_hot(self.S, self.K, name='S_one_hot')
            
            self.delta_range = tf.placeholder(tf.float32, [self.L], name='delta_range')

            # LOSS PARAMETERS
            self.alpha      = tf.placeholder(tf.float32, name = 'alpha')
            self.beta       = tf.placeholder(tf.float32, name = 'beta')
            self.beta_ms    = tf.placeholder(tf.float32, name = 'beta_ms', shape=[self.num_Event - 1]) #(set [1, ..., 1] as a default)
            self.gamma      = tf.placeholder(tf.float32, name = 'gamma')


            # DECLARE FUNCTIONS FOR NETWORK CONSTRUCTION
            def prediction_network_softplus(h, delta, reuse=tf.AUTO_REUSE): #version 0
                with tf.variable_scope('prediction_net', reuse=reuse):
                    tmp = tf.contrib.layers.fully_connected(inputs=tf.concat([h, delta], axis=1), num_outputs=self.h_dim2, activation_fn=None) #layer1
                    tmp = tf.nn.dropout(tmp, keep_prob=self.keep_prob)
                    tmp = tf.contrib.layers.fully_connected(inputs=tmp, num_outputs=self.h_dim2, activation_fn=tf.nn.relu) #layer2
                    tmp = tf.nn.dropout(tmp, keep_prob=self.keep_prob)
                    tmp = tf.contrib.layers.fully_connected(inputs=tmp, num_outputs=self.num_Event, activation_fn=None)                          #layer2
                    out = tf.nn.softplus(tmp)
                return out

            ### DEFINE LOOP FUNCTION FOR RAW_RNN w/ TEMPORAL ATTENTION
            def loop_fn_MPP(time, cell_output, cell_state, loop_state):   
                emit_output = cell_output 

                if cell_output is None:  # time == 0
                    next_cell_state = cell.zero_state(self.mb_size, tf.float32)
                    next_loop_state = (tf.TensorArray(size=self.max_length, dtype=tf.float32),  #lambda(t_{j})
                                       tf.TensorArray(size=self.max_length, dtype=tf.float32),  #lambda(t_{j-1})
                                       tf.TensorArray(size=self.max_length, dtype=tf.float32))  #hidden states (h_{j})

                else:
                    next_cell_state = cell_state
                    tmp_h = utils.create_concat_state(next_cell_state, self.num_layers_RNN, self.RNN_type, BiRNN=None)

                    def fn_time_last(): #the last lambda_curr will not be included in the loss function (thus, time-1 is applied to remove the error)
                        d_next = tf.reshape(inputs_ta.read(time-1)[:,0], shape=[-1, 1]) #to prevent indexing error
                        l_next  = prediction_network_softplus(tmp_h, d_next)
                        l_curr  = prediction_network_softplus(tmp_h, tf.zeros_like(d_next))            
                        return l_curr, l_next

                    def fn_time_others(): #the last lambda_curr will not be included in the loss function (thus, time-1 is applied to remove the error)
                        d_next = tf.reshape(inputs_ta.read(time)[:,0], shape=[-1, 1]) #to prevent indexing error
                        l_next  = prediction_network_softplus(tmp_h, d_next)
                        l_curr  = prediction_network_softplus(tmp_h, tf.zeros_like(d_next))            
                        return l_curr, l_next

                    l_curr, l_next = tf.cond(
                        tf.equal(time, self.max_length), lambda: fn_time_last(), lambda: fn_time_others()
                    )

                    next_loop_state = (loop_state[0].write(time-1, l_next),   # save lambda(t_{j})
                                       loop_state[1].write(time-1, l_curr),      # save lambda(t_{j-1})
                                       loop_state[2].write(time-1, tmp_h))            # save all the h_ins

                elements_finished = (time >= seq_length)

                #this gives the break-point (no more recurrence after the max_length)
                finished = tf.reduce_all(elements_finished)


                def fn_input_embedding():
                    embedding = tf.concat([inputs_ta.read(time), self.X], axis=1)
            #         embedding = tf.nn.dropout(embedding, keep_prob=keep_prob)
                    embedding = tf.contrib.layers.fully_connected(inputs=embedding, num_outputs=self.h_dim2, activation_fn=tf.nn.relu)

                    return embedding

                next_input = tf.cond(
                    finished, lambda: tf.zeros([self.mb_size, self.h_dim2], dtype=tf.float32), lambda: fn_input_embedding()
                )

                return (elements_finished, next_input, next_cell_state, emit_output, next_loop_state)


        ### INPUTS
        inputs = tf.concat([self.D, self.M], axis=2, name='inputs')

        inputs_ta = tf.TensorArray(
            dtype=tf.float32,
            size=self.max_length,
            clear_after_read=False
        ).unstack(_transpose_batch_time(inputs), name='inputs_ta')

        seq_length = get_seq_length(inputs)


        ### RNNS
        cell = utils.create_rnn_cell(self.h_dim1, self.num_layers_RNN, self.keep_prob, self.RNN_type, self.RNN_active_fn)
        _, rnn_final_state, loop_state_ta = tf.nn.raw_rnn(cell, loop_fn_MPP)


        next_lambdas    = _transpose_batch_time(loop_state_ta[0].stack())
        curr_lambdas    = _transpose_batch_time(loop_state_ta[1].stack())
        rnn_states      = _transpose_batch_time(loop_state_ta[2].stack())


        self.Z = tf.reduce_sum(rnn_states * tf.tile(tf.expand_dims(self.Mask, axis=2), [1,1, self.z_dim]), axis=1)


        '''
            AFTER PUTTING (m_{0}, t_{0})
                - m_{0} = [1,0,0,...] (auxilary event type)
                - t_{0} = 0
                - Thus, no need to consider the first event issue i.e., putting an additional loss function for t=1
                - Instead, m=0, t=0 (i.e., the first index of event and timing) is not considered.
        '''
        tmp_MLE1 = tf.reduce_sum(tf.reduce_sum(tf.log(next_lambdas[:, :-1, 1:] + 1e-8) * self.M[:,1:,1:], axis=2), axis=1)

        #do not consider m=0 (this is indicator for BOS)
        for m in range(1, self.num_Event):
            if m == 1:
                tmp_MLE2 =  tf.reduce_sum(1/2 * (next_lambdas[:, :-1, m] + curr_lambdas[:, :-1, m]) * self.D[:, 1:, 0], axis=1)
            else:
                tmp_MLE2 += tf.reduce_sum(1/2 * (next_lambdas[:, :-1, m] + curr_lambdas[:, :-1, m]) * self.D[:, 1:, 0], axis=1)

        self.loss_MLE = - tf.reduce_mean(tmp_MLE1 - tmp_MLE2)


        ### LOSS - CLUSTERING

        # DISTANCE IN THE LATENT SPACE
        Z_expanded      = tf.tile(tf.expand_dims(self.Z, axis=1), [1, self.K, 1])     #[None, num_Cluster, 2]
        MU_expanded     = tf.tile(tf.expand_dims(self.MU, axis=0), [self.mb_size, 1, 1])        #[None, num_Cluster, 2]
        dist_z_expanded = tf.reduce_sum((Z_expanded - MU_expanded)**2, axis=2) #[None, num_Cluster]


        dist_z_homo     = tf.reduce_sum(dist_z_expanded * S_one_hot, axis=1) #[None]
        dist_z_hetero   = tf.reduce_sum(dist_z_expanded * (1. - S_one_hot), axis=1) #[None]

        self.dist_z_homo     = tf.reduce_mean(dist_z_homo, axis=0)
        self.dist_z_hetero   = tf.reduce_mean(dist_z_hetero, axis=0)


        # DISTANCE IN THE OUTPUT SPACE (LAMBDA)
        Y    = []
        PSI  = []
        for l in range(self.L):
            tmp_d1 = self.delta_range[l] * tf.ones_like(tf.reshape(self.Z[:,0], shape=[-1, 1]))
            tmp_d2 = self.delta_range[l] * tf.ones_like(tf.reshape(self.MU[:,0], shape=[-1, 1]))

            with tf.variable_scope('rnn', reuse=True):
                Y.append(prediction_network_softplus(self.Z, tmp_d1))
                PSI.append(prediction_network_softplus(self.MU, tmp_d2))


        self.Y_stacked = tf.stack(Y, axis=2)
        self.PSI_stacked = tf.stack(PSI, axis=2)

        Y_stacked_expanded     = tf.tile(tf.expand_dims(self.Y_stacked, axis=1), [1, self.K, 1, 1])     #[None, num_Cluster, num_Event, L]
        PSI_stacked_expanded   = tf.tile(tf.expand_dims(self.PSI_stacked, axis=0), [self.mb_size, 1, 1, 1])       #[None, num_Cluster, num_Event, L]

        tmp = ( Y_stacked_expanded - PSI_stacked_expanded )**2

        # tripazoidal approximation
        dist_y_expanded_ms = self.delta_range[-1]/(self.L-1) * (tf.reduce_sum(tmp, axis=3) - tmp[:, :, :, 0] - tmp[:, :, :, -1])
        dist_y_expanded    = tf.reduce_sum(dist_y_expanded_ms[:, :, 1:] * self.beta_ms, axis=2)

        dist_y_homo     = tf.reduce_sum(dist_y_expanded * S_one_hot, axis=1) #[None]
        dist_y_hetero   = tf.reduce_sum(dist_y_expanded * (1. - S_one_hot), axis=1) #[None]

        self.dist_y_homo     = tf.reduce_mean(dist_y_homo, axis=0)
        self.dist_y_hetero   = tf.reduce_mean(dist_y_hetero, axis=0)


        ### FOR USER-DEFINED DISTANCE MEASURE
        self.ZZ = tf.placeholder(tf.float32, shape=[None, self.z_dim])

        YY    = []
        for l in range(self.L):
            tmp_d1 = self.delta_range[l] * tf.ones_like(tf.reshape(self.ZZ[:,0], shape=[-1, 1]))
            
            with tf.variable_scope('rnn', reuse=True):
                YY.append(prediction_network_softplus(self.ZZ, tmp_d1))
        self.YY_stacked = tf.stack(YY, axis=2)


        ### FOR THINNING-ALGORITHM
        self.D_IN = tf.placeholder(tf.float32, shape=[None], name='delta_in')  
        self.Z_IN = tf.placeholder(tf.float32, shape=[None, self.z_dim])

        tmp_d_in = tf.reshape(self.D_IN * tf.ones_like(self.Z_IN[:,0]), shape=[-1, 1])

        with tf.variable_scope('rnn', reuse=True):
            self.Y_pred = prediction_network_softplus(self.Z_IN, tmp_d_in)


        global_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        pred_vars   = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='rnn/prediction_net')
        enc_vars    = [tmp_var for tmp_var in global_vars if tmp_var not in pred_vars]

        self.loss_CLU = self.alpha*self.dist_z_homo
        self.loss_COM = self.beta*self.dist_y_homo - self.gamma*self.dist_y_hetero

        self.loss_CLU_COM = self.loss_CLU+self.loss_COM

        self.loss_TOTAL = self.loss_MLE + self.loss_CLU + self.loss_COM

        self.solver_MLE     = tf.train.AdamOptimizer(self.lr_rate, beta1=0.9, beta2=0.999).minimize(self.loss_MLE, var_list=global_vars)
        self.solver_CLUSTER = tf.train.AdamOptimizer(self.lr_rate, beta1=0.9, beta2=0.999).minimize(self.loss_CLU_COM, var_list=enc_vars)
        self.solver_TOTAL   = tf.train.AdamOptimizer(self.lr_rate, beta1=0.9, beta2=0.999).minimize(self.loss_TOTAL, var_list=global_vars)
예제 #29
0
파일: rnn.py 프로젝트: kylin9872/tensorflow
def dynamic_rnn(cell,
                inputs,
                sequence_length=None,
                initial_state=None,
                dtype=None,
                parallel_iterations=None,
                swap_memory=False,
                time_major=True,
                scope=None):
  """Creates a recurrent neural network specified by RNNCell `cell`.

  Performs fully dynamic unrolling of `inputs`.

  Example:

  ```python
  # create a BasicRNNCell
  rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)

  # 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size]

  # defining initial state
  initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32)

  # 'state' is a tensor of shape [batch_size, cell_state_size]
  outputs, state = tf.nn.dynamic_rnn(rnn_cell, input_data,
                                     initial_state=initial_state,
                                     dtype=tf.float32)
  ```

  ```python
  # create 2 LSTMCells
  rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [128, 256]]

  # create a RNN cell composed sequentially of a number of RNNCells
  multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers)

  # 'outputs' is a tensor of shape [batch_size, max_time, 256]
  # 'state' is a N-tuple where N is the number of LSTMCells containing a
  # tf.contrib.rnn.LSTMStateTuple for each cell
  outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell,
                                     inputs=data,
                                     dtype=tf.float32)
  ```


  Args:
    cell: An instance of RNNCell.
    inputs: The RNN inputs.
      If `time_major == False` (default), this must be a `Tensor` of shape:
        `[batch_size, max_time, ...]`, or a nested tuple of such elements.
      If `time_major == True`, this must be a `Tensor` of shape: `[max_time,
        batch_size, ...]`, or a nested tuple of such elements. This may also be
        a (possibly nested) tuple of Tensors satisfying this property.  The
        first two dimensions must match across all the inputs, but otherwise the
        ranks and other shape components may differ. In this case, input to
        `cell` at each time-step will replicate the structure of these tuples,
        except for the time dimension (from which the time is taken). The input
        to `cell` at each time step will be a `Tensor` or (possibly nested)
        tuple of Tensors each with dimensions `[batch_size, ...]`.
    sequence_length: (optional) An int32/int64 vector sized `[batch_size]`. Used
      to copy-through state and zero-out outputs when past a batch element's
      sequence length.  So it's more for performance than correctness.
    initial_state: (optional) An initial state for the RNN. If `cell.state_size`
      is an integer, this must be a `Tensor` of appropriate type and shape
      `[batch_size, cell.state_size]`. If `cell.state_size` is a tuple, this
      should be a tuple of tensors having shapes `[batch_size, s] for s in
      cell.state_size`.
    dtype: (optional) The data type for the initial state and expected output.
      Required if initial_state is not provided or RNN state has a heterogeneous
      dtype.
    parallel_iterations: (Default: 32).  The number of iterations to run in
      parallel.  Those operations which do not have any temporal dependency and
      can be run in parallel, will be.  This parameter trades off time for
      space.  Values >> 1 use more memory but take less time, while smaller
      values use less memory but computations take longer.
    swap_memory: Transparently swap the tensors produced in forward inference
      but needed for back prop from GPU to CPU.  This allows training RNNs which
      would typically not fit on a single GPU, with very minimal (or no)
      performance penalty.
    time_major: The shape format of the `inputs` and `outputs` Tensors. If true,
      these `Tensors` must be shaped `[max_time, batch_size, depth]`. If false,
      these `Tensors` must be shaped `[batch_size, max_time, depth]`. Using
      `time_major = True` is a bit more efficient because it avoids transposes
      at the beginning and end of the RNN calculation.  However, most TensorFlow
      data is batch-major, so by default this function accepts input and emits
      output in batch-major form.
    scope: VariableScope for the created subgraph; defaults to "rnn".

  Returns:
    A pair (outputs, state) where:

    outputs: The RNN output `Tensor`.

      If time_major == False (default), this will be a `Tensor` shaped:
        `[batch_size, max_time, cell.output_size]`.

      If time_major == True, this will be a `Tensor` shaped:
        `[max_time, batch_size, cell.output_size]`.

      Note, if `cell.output_size` is a (possibly nested) tuple of integers
      or `TensorShape` objects, then `outputs` will be a tuple having the
      same structure as `cell.output_size`, containing Tensors having shapes
      corresponding to the shape data in `cell.output_size`.

    state: The final state.  If `cell.state_size` is an int, this
      will be shaped `[batch_size, cell.state_size]`.  If it is a
      `TensorShape`, this will be shaped `[batch_size] + cell.state_size`.
      If it is a (possibly nested) tuple of ints or `TensorShape`, this will
      be a tuple having the corresponding shapes. If cells are `LSTMCells`
      `state` will be a tuple containing a `LSTMStateTuple` for each cell.

  Raises:
    TypeError: If `cell` is not an instance of RNNCell.
    ValueError: If inputs is None or an empty list.
    RuntimeError: If not using control flow v2.
  """

  # Currently only support time_major == True case.
  assert time_major

  # TODO(b/123051275): We need to check if the cells are TfLiteLSTMCells or
  # TfLiteRNNCells.
  rnn_cell_impl.assert_like_rnncell("cell", cell)

  if not control_flow_util.ENABLE_CONTROL_FLOW_V2:
    raise RuntimeError("OpHint dynamic rnn only supports control flow v2.")

  parent_first_child_input = [{
      "parent_ophint_input_index": 0,
      "first_child_ophint_input_index": 0
  }]
  parent_last_child_output = [{
      "parent_output_index": 0,
      # For LstmCell, the index is 2.
      # For RnnCell, the index is 1.
      # So we use -1 meaning it's the last one.
      "child_output_index": -1
  }]
  internal_children_input_output = [{
      "child_input_index": 0,
      # For LstmCell, the index is 2.
      # For RnnCell, the index is 1.
      # So we use -1 meaning it's the last one.
      "child_output_index": -1
  }]
  inputs_outputs_mappings = {
      "parent_first_child_input": parent_first_child_input,
      "parent_last_child_output": parent_last_child_output,
      "internal_children_input_output": internal_children_input_output
  }
  tflite_wrapper = op_hint.OpHint(
      "TfLiteDynamicRnn",
      level=2,
      children_inputs_mappings=inputs_outputs_mappings)
  with vs.variable_scope(scope or "rnn") as varscope:
    # Create a new scope in which the caching device is either
    # determined by the parent scope, or is set to place the cached
    # Variable using the same placement as for the rest of the RNN.
    if _should_cache():
      if varscope.caching_device is None:
        varscope.set_caching_device(lambda op: op.device)

    inputs = tflite_wrapper.add_input(inputs, name="input", index_override=0)

    # By default, time_major==False and inputs are batch-major: shaped
    #   [batch, time, depth]
    # For internal calculations, we transpose to [time, batch, depth]
    flat_input = nest.flatten(inputs)

    if not time_major:
      # (batch, time, depth) => (time, batch, depth)
      flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input]
      flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input)

    parallel_iterations = parallel_iterations or 32
    if sequence_length is not None:
      sequence_length = math_ops.to_int32(sequence_length)
      if sequence_length.get_shape().rank not in (None, 1):
        raise ValueError(
            "sequence_length must be a vector of length batch_size, "
            "but saw shape: %s" % sequence_length.get_shape())
      sequence_length = array_ops.identity(  # Just to find it in the graph.
          sequence_length,
          name="sequence_length")

    batch_size = _best_effort_input_batch_size(flat_input)

    if initial_state is not None:
      state = initial_state
    else:
      if not dtype:
        raise ValueError("If there is no initial_state, you must give a dtype.")
      if getattr(cell, "get_initial_state", None) is not None:
        state = cell.get_initial_state(
            inputs=None, batch_size=batch_size, dtype=dtype)
      else:
        state = cell.zero_state(batch_size, dtype)

    def _assert_has_shape(x, shape):
      x_shape = array_ops.shape(x)
      packed_shape = array_ops.stack(shape)
      return control_flow_ops.Assert(
          math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)), [
              "Expected shape for Tensor %s is " % x.name, packed_shape,
              " but saw shape: ", x_shape
          ])

    if not context.executing_eagerly() and sequence_length is not None:
      # Perform some shape validation
      with ops.control_dependencies(
          [_assert_has_shape(sequence_length, [batch_size])]):
        sequence_length = array_ops.identity(
            sequence_length, name="CheckSeqLen")

    inputs = nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input)

    outputs, final_state = _dynamic_rnn_loop(
        cell,
        inputs,
        state,
        parallel_iterations=parallel_iterations,
        swap_memory=swap_memory,
        sequence_length=sequence_length,
        dtype=dtype)

    # Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth].
    # If we are performing batch-major calculations, transpose output back
    # to shape [batch, time, depth]
    if not time_major:
      # (time, batch, depth) => (batch, time, depth)
      outputs = nest.map_structure(_transpose_batch_time, outputs)
    outputs = tflite_wrapper.add_output(outputs, name="outputs")

    return outputs, final_state
예제 #30
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, VC, d, W ,EW, WOW= \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.len_new_emb_mat, config.char_vocab_size, config.hidden_size, \
            config.max_word_size,config.word_vocab_size-config.vw_wo_entity_size,config.vw_wo_entity_size
        JX = tf.shape(self.x)[2]  # words
        JQ = tf.shape(self.q)[1]  # words
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        with tf.variable_scope("emb"):
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat",
                                                   shape=[VC, dc],
                                                   dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cx)  # [N, M, JX, W, dc]
                    Acq = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cq)  # [N, JQ, W, dc]
                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                    filter_sizes = list(
                        map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco, (filter_sizes, dco)
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx,
                                          filter_sizes,
                                          heights,
                                          "VALID",
                                          self.is_train,
                                          config.keep_prob,
                                          scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="xx")
                        else:
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(qq, [-1, JQ, dco])

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        init_word_emb = tf.random_normal_initializer(-0.5, 0.5)
                        #entity_emb_mat = tf.get_variable("entity_emb_mat", dtype='float', shape=[EW, EW], initializer=get_initializer(config.onehot_encoded))
                        #entity_emb_out = _linear(entity_emb_mat, dw, True, bias_initializer=tf.constant_initializer(0.0))
                        #word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, dw], initializer=get_initializer(config.emb_mat))
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=init_word_emb)
                        #word_emb_mat = tf.concat(axis=0,values=[word_emb_mat, entity_emb_out])
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')

                    #if config.use_glove_for_unk:
                    #    word_emb_mat = tf.concat(axis=0, values=[word_emb_mat, self.new_emb_mat])
                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(
                        word_emb_mat, self.x
                    )  # [N, M, JX, d] i.e. [batch size, max sentences, max words, embedding size]
                    Aq = tf.nn.embedding_lookup(
                        word_emb_mat, self.q
                    )  # [N, JQ, d] i.e. [batch size, max words, embedding size]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                if config.use_char_emb:
                    xx = tf.concat(axis=3, values=[xx, Ax])  # [N, M, JX, di]
                    qq = tf.concat(axis=2, values=[qq, Aq])  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq

        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq
        #xx = tf.Print(xx,[tf.shape(xx),xx],message="DHRUV xx=",summarize=20)
        cell_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell_fw = SwitchableDropoutWrapper(
            cell_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell_bw = SwitchableDropoutWrapper(
            cell_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell2_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell2_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell2_fw = SwitchableDropoutWrapper(
            cell2_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell2_bw = SwitchableDropoutWrapper(
            cell2_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell3_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell3_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell3_fw = SwitchableDropoutWrapper(
            cell3_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell3_bw = SwitchableDropoutWrapper(
            cell3_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell4_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell4_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell4_fw = SwitchableDropoutWrapper(
            cell4_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell4_bw = SwitchableDropoutWrapper(
            cell4_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N,M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]

        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_,
                                         bw_u_f)) = bidirectional_dynamic_rnn(
                                             d_cell_fw,
                                             d_cell_bw,
                                             qq,
                                             q_len,
                                             dtype='float',
                                             scope='u1')  # [N, J, d], [N, d]
            u = tf.concat(axis=2, values=[fw_u, bw_u])
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), (fw_s, bw_s) = bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, xx, x_len, dtype='float',
                    scope='u1')  # [N, M, JX, 2d]
                h = tf.concat(axis=3, values=[fw_h, bw_h])  # [N, M, JX, 2d]

            else:
                (fw_h, bw_h), _ = bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, xx, x_len, dtype='float',
                    scope='h1')  # [N, M, JX, 2d]
                h = tf.concat(axis=3, values=[fw_h, bw_h])  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h

        with tf.variable_scope("main"):
            if config.dynamic_att:  # not true
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]),
                               [N * M, JQ, 2 * d])
                q_mask = tf.reshape(
                    tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]),
                    [N * M, JQ])
                first_cell_fw = AttentionCell(
                    cell2_fw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                first_cell_bw = AttentionCell(
                    cell2_bw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                second_cell_fw = AttentionCell(
                    cell3_fw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                second_cell_bw = AttentionCell(
                    cell3_bw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
            else:
                p0 = attention_layer(
                    config,
                    self.is_train,
                    h,
                    u,
                    h_mask=self.x_mask,
                    u_mask=self.q_mask,
                    scope="p0",
                    tensor_dict=self.tensor_dict)  # p0 seems to be G in paper
                first_cell_fw = d_cell2_fw
                second_cell_fw = d_cell3_fw
                first_cell_bw = d_cell2_bw
                second_cell_bw = d_cell3_bw

            #p1 = tf.reshape(p0,[N , M*JX, 8*d])
            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(
                first_cell_fw,
                first_cell_bw,
                p0,
                x_len,
                dtype='float',
                scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat(axis=3, values=[fw_g0, bw_g0])
            (fw_g1, bw_g1), (my_fw_final_state,
                             my_bw_final_state) = bidirectional_dynamic_rnn(
                                 second_cell_fw,
                                 second_cell_bw,
                                 g0,
                                 x_len,
                                 dtype='float',
                                 scope='g1')  # [N, M, JX, 2d]

            g1 = tf.concat(axis=3, values=[fw_g1,
                                           bw_g1])  # g1 seems to be M in paper
            #g1= tf.reshape(g1,[N, M , JX, 2*d]) #reshaping here again, since g1 is used ahead

            g1 = tf.Print(g1, [tf.shape(g1)],
                          message="g1 shape",
                          first_n=5,
                          summarize=200)
            p0 = tf.Print(p0, [tf.shape(p0)],
                          message="p0 shape",
                          first_n=5,
                          summarize=200)

            g11 = tf.reshape(g1, [N, -1, 2 * d])
            my_encoder_final_state_c = tf.concat(
                values=(my_fw_final_state.c, my_bw_final_state.c),
                axis=1,
                name="my_encoder_final_state_c")
            my_encoder_final_state_h = tf.concat(
                values=(my_fw_final_state.h, my_bw_final_state.h),
                axis=1,
                name="my_encoder_final_state_h")
            my_encoder_final_state = tf.contrib.rnn.LSTMStateTuple(
                c=my_encoder_final_state_c, h=my_encoder_final_state_h)

            #compute indices for finding span as the second task in multi task learning
            logits = get_logits([g1, p0],
                                d,
                                True,
                                wd=config.wd,
                                input_keep_prob=config.input_keep_prob,
                                mask=self.x_mask,
                                is_train=self.is_train,
                                func=config.answer_func,
                                scope='logits1')
            logits = tf.Print(logits, [tf.shape(logits)],
                              message="logits shape",
                              first_n=5,
                              summarize=200)
            a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]),
                          tf.reshape(logits, [N, M * JX]))
            a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1),
                          [1, M, JX, 1])

            (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(
                d_cell4_fw,
                d_cell4_bw,
                tf.concat(axis=3, values=[p0, g1, a1i, g1 * a1i]),
                x_len,
                dtype='float',
                scope='g2')  # [N, M, JX, 2d]
            g2 = tf.concat(axis=3, values=[fw_g2, bw_g2])
            logits2 = get_logits([g2, p0],
                                 d,
                                 True,
                                 wd=config.wd,
                                 input_keep_prob=config.input_keep_prob,
                                 mask=self.x_mask,
                                 is_train=self.is_train,
                                 func=config.answer_func,
                                 scope='logits2')

            flat_logits = tf.reshape(logits, [-1, M * JX])
            flat_logits = tf.Print(flat_logits,
                                   [tf.shape(flat_logits), flat_logits],
                                   message="flat_logits shape and contents",
                                   first_n=5,
                                   summarize=200)
            self.flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
            flat_logits2 = tf.reshape(logits2, [-1, M * JX])
            self.flat_yp2 = tf.nn.softmax(flat_logits2)

            tgt_vocab_size = config.len_new_emb_mat  # hparam # FIXME: Obtain embeddings differently?
            print("length is", config.len_new_emb_mat)
            nodes = d

            # Look up embedding
            decoder_emb_inp = tf.nn.embedding_lookup(
                word_emb_mat,
                self.decoder_inputs)  # [batch_size, max words, embedding_size]

            with tf.variable_scope("rnn_decoder", reuse=tf.AUTO_REUSE):
                init = tf.random_normal_initializer(0.0, 0.5)
                W_dense = tf.get_variable(name="W_dense",
                                          shape=[2 * nodes, tgt_vocab_size],
                                          dtype=tf.float32,
                                          initializer=init)
                b_dense = tf.get_variable(name="b_dense",
                                          shape=[tgt_vocab_size],
                                          dtype=tf.float32,
                                          initializer=tf.zeros_initializer)

                W_att_dec = tf.get_variable(name="W_att_dec",
                                            shape=[2 * nodes, 2 * nodes],
                                            dtype=tf.float32,
                                            initializer=init)
                W_att_enc = tf.get_variable(name="W_att_enc1",
                                            shape=[1, 1, 2 * nodes, 2 * nodes],
                                            dtype=tf.float32,
                                            initializer=init)
                v_blend = tf.get_variable(name="v_blend",
                                          shape=[1, 2 * nodes],
                                          dtype=tf.float32,
                                          initializer=init)

                pad_time_slice = tf.fill([N], 0, name='PAD')
                pad_step_embedded = tf.nn.embedding_lookup(
                    word_emb_mat, pad_time_slice)

                decoder_cell = tf.contrib.rnn.BasicLSTMCell(
                    2 * nodes, state_is_tuple=True
                )  # doesnt work without the factor of 2??
                '''Loop transition function is a mapping (time, previous_cell_output, previous_cell_state, previous_loop_state) -> 
                (elements_finished, input, cell_state, output, loop_state).
                 It is called before RNNCell to prepare its inputs and state. Everything is a Tensor except for initial call at time=0 
                 when everything is None (except time).'''
                def execute_pointer_network(attn_dist):
                    #this is to find the word in the summary, which recieved highest probability and pass it to the next step in decoder
                    index_pos = tf.argmax(attn_dist, axis=1)
                    index_pos = tf.expand_dims(index_pos, 1)
                    index_pos = tf.concat([
                        tf.reshape(tf.range(start=0, limit=N, dtype=tf.int64),
                                   [N, 1]),
                        tf.zeros([N, 1], tf.int64), index_pos
                    ],
                                          axis=1)
                    index_pos = tf.cast(tf.gather_nd(params=self.x,
                                                     indices=index_pos),
                                        dtype=tf.int64)
                    return index_pos

                def execute_normal_decoder(previous_output, W_dense, b_dense):
                    output_logits = tf.add(tf.matmul(previous_output, W_dense),
                                           b_dense)
                    return tf.argmax(output_logits, axis=1)

                def loop_fn_initial():
                    initial_elements_finished = (
                        0 >= self.target_sequence_length
                    )  # all False at the initial step
                    #initial_input = tf.concat([decoder_emb_inp[:,0], my_encoder_final_state_h], 1)
                    initial_input = decoder_emb_inp[:, 0]
                    initial_cell_state = my_encoder_final_state
                    #setting the correct shapes , as it is used to determine the emit structure
                    initial_cell_output = tf.cond(
                        self.pointer_gen,
                        lambda: tf.zeros([M * JX], tf.float32),
                        lambda: tf.zeros([2 * nodes], tf.float32))
                    initial_loop_state = None  # we don't need to pass any additional information
                    return (initial_elements_finished, initial_input,
                            initial_cell_state, initial_cell_output,
                            initial_loop_state)

                encoder_output = tf.expand_dims(g11, axis=2)

                def loop_fn_transition(time, previous_output, previous_state,
                                       previous_loop_state):
                    def get_next_input():
                        # compute Badhanau style attention
                        #performing convolution or reshaping input to (-1,2*d) and then doing matmul, is essentially the same operation
                        #see matrix_mult.py...conv2d might be faster??
                        #https://stackoverflow.com/questions/38235555/tensorflow-matmul-of-input-matrix-with-batch-data
                        encoder_features = tf.nn.conv2d(
                            encoder_output, W_att_enc, [1, 1, 1, 1], "SAME"
                        )  # shape (batch_size,max_enc_steps,1,attention_vec_size)
                        dec_portion = tf.matmul(previous_state.h, W_att_dec)
                        decoder_features = tf.expand_dims(
                            tf.expand_dims(dec_portion, 1), 1
                        )  # reshape to (batch_size, 1, 1, attention_vec_size)
                        #python broadcasting will alllow the two features to get added
                        e_not_masked = tf.reduce_sum(
                            v_blend *
                            tf.nn.tanh(encoder_features + decoder_features),
                            [2, 3])  # calculate e, (batch_size, max_enc_steps)
                        #The shape of output of a softmax is the same as the input: it just normalizes the values.
                        attn_dist = tf.nn.softmax(
                            e_not_masked)  # (batch_size, max_enc_steps)
                        attn_dist = tf.Print(attn_dist, [tf.shape(attn_dist)],
                                             message="attn_dist",
                                             first_n=5,
                                             summarize=200)

                        #Multiplying all the 2d vectors with same attn_dist values,and finally keeping 1 2d vector for every batch example
                        context_vector = tf.reduce_sum(
                            tf.reshape(attn_dist, [N, -1, 1, 1]) *
                            encoder_output,
                            [1, 2])  # shape (batch_size, attn_size).
                        context_vector = tf.reshape(context_vector,
                                                    [-1, 2 * nodes])
                        #next_input = tf.cond(self.is_train, lambda: tf.concat(
                        #    [tf.reshape(decoder_emb_inp[:, time], (N, dw)), context_vector], 1),
                        #                     lambda: tf.concat([tf.nn.embedding_lookup(word_emb_mat, prediction), context_vector], 1))
                        #output_logits = tf.add(tf.matmul(previous_output, W_dense), b_dense)
                        prediction = tf.cond(
                            self.pointer_gen,
                            lambda: execute_pointer_network(attn_dist),
                            lambda: execute_normal_decoder(
                                previous_output, W_dense, b_dense))

                        with tf.variable_scope("modified_dec_inputs",
                                               reuse=tf.AUTO_REUSE):
                            next_input = tf.cond(
                                self.is_train,
                                lambda: _linear(args=[context_vector] + [
                                    tf.reshape(decoder_emb_inp[:, time],
                                               (N, dw))
                                ],
                                                output_size=dw,
                                                bias=True),
                                lambda: _linear([context_vector] + [
                                    tf.nn.embedding_lookup(
                                        word_emb_mat, prediction)
                                ], dw, True))

                        return next_input, attn_dist

                    elements_finished = (
                        time >= self.target_sequence_length
                    )  # this operation produces boolean tensor of [batch_size]
                    # defining if corresponding sequence has ended
                    finished = tf.reduce_all(
                        elements_finished)  # -> boolean scalar
                    #input = tf.cond(finished, lambda: tf.concat([pad_step_embedded, my_encoder_final_state_h], 1),get_next_input)
                    input, attn_distribution = tf.cond(
                        finished, lambda:
                        (pad_step_embedded, tf.zeros([N, M * JX], tf.float32)),
                        get_next_input)
                    attn_distribution = tf.Print(attn_distribution,
                                                 [tf.shape(attn_distribution)],
                                                 message="attn_distribution",
                                                 first_n=5,
                                                 summarize=200)
                    state = previous_state
                    output = tf.cond(self.pointer_gen,
                                     lambda: attn_distribution,
                                     lambda: previous_output)
                    output = tf.Print(output, [tf.shape(output)],
                                      message="OUTPUT",
                                      first_n=5,
                                      summarize=200)

                    loop_state = None

                    return (elements_finished, input, state, output,
                            loop_state)

                def loop_fn(time, previous_output, previous_state,
                            previous_loop_state):
                    if previous_state is None:  # time == 0
                        assert previous_output is None and previous_state is None
                        return loop_fn_initial()
                    else:
                        return loop_fn_transition(time, previous_output,
                                                  previous_state,
                                                  previous_loop_state)

                decoder_outputs_ta, decoder_final_state, _ = tf.nn.raw_rnn(
                    decoder_cell, loop_fn)
                decoder_outputs = decoder_outputs_ta.stack()
                decoder_outputs = tf.Print(decoder_outputs,
                                           [tf.shape(decoder_outputs)],
                                           message="decoder_outputs",
                                           first_n=5,
                                           summarize=200)

                # To do output projection, we have to temporarilly flatten decoder_outputs from [max_steps, batch_size, hidden_dim] to
                #  [max_steps*batch_size, hidden_dim], as tf.matmul needs rank-2 tensors at most.
                decoder_max_steps, decoder_batch_size, decoder_dim = tf.unstack(
                    tf.shape(decoder_outputs))
                decoder_outputs_flat = tf.reshape(decoder_outputs,
                                                  (-1, decoder_dim))
                #if pointer networks, no need to pass through dense layer
                decoder_logits_flat = tf.cond(
                    self.pointer_gen,
                    lambda: decoder_outputs_flat, lambda: tf.add(
                        tf.matmul(decoder_outputs_flat, W_dense), b_dense))
                decoder_logits = tf.cond(
                    self.pointer_gen, lambda: tf.reshape(
                        decoder_logits_flat,
                        (decoder_max_steps, decoder_batch_size, decoder_dim)),
                    lambda: tf.reshape(decoder_logits_flat,
                                       (decoder_max_steps, decoder_batch_size,
                                        tgt_vocab_size)))
                decoder_logits = _transpose_batch_time(decoder_logits)
                #decoder_prediction = tf.argmax(decoder_logits, -1)

            #self.decoder_logits_train = final_outputs.rnn_output
            self.decoder_logits_train = decoder_logits
            self.index_start = flat_logits
            self.index_end = flat_logits2
예제 #31
0
def model_fn(features, labels, mode, params, config):
    cur_batch_D = params.num_char

    if mode == ModeKeys.TRAIN or mode == ModeKeys.EVAL:
        X_s, X_l, X_r, X_u = features
        cur_batch_B = tf.shape(X_s)[0]
        cur_batch_T = tf.shape(X_s)[1]

        Xs_embd = tf.one_hot(X_s, cur_batch_D)
        X_ta = tf.TensorArray(size=cur_batch_T, dtype=tf.float32).unstack(
            _transpose_batch_time(Xs_embd), 'TBD_Formatted_X')
    else:
        cur_batch_B = params.infer_batch_size
        cur_batch_T = params.infer_seq_length

    acell = {
        'lstm': lambda: LSTMCell(params.num_hidden),
        'sru': lambda: SRUCell(params.num_hidden)
    }[params.cell]()

    output_layer_info = {
        'units': cur_batch_D,  # this is the size of vocabulary
        'name': 'out_to_character',
        # linear 'activation': tf.nn.softmax
    }

    with tf.variable_scope('Shared_Dense', reuse=False) as dense_layer_scope:
        # this will be replaced by the cell_output later
        zeros_placeholder = tf.zeros([1, acell.output_size])
        tf.layers.dense(zeros_placeholder, **output_layer_info)

    def get_logits(cell_out):
        # useful when measuring the cross-entropy loss
        with tf.variable_scope(dense_layer_scope, reuse=True):
            return tf.layers.dense(cell_out, **output_layer_info)

    def get_dist(cell_out):
        return Categorical(logits=get_logits(cell_out),
                           name='categorical_dist',
                           allow_nan_stats=False,
                           dtype=tf.int32)

    def get_sample(cell_out):
        return tf.one_hot(get_dist(cell_out).sample(), cur_batch_D)

    def get_prob(cell_out, obs):
        # the observation is in
        return get_dist(cell_out).prob(obs)

    with tf.variable_scope('Initial_State'):
        h_init = tf.tile(
            tf.get_variable('init_state_h', [1, params.num_hidden],
                            initializer=tf.random_uniform_initializer(0)),
            [cur_batch_B, 1])
        c_init = tf.tile(
            tf.get_variable('init_state_c', [1, params.num_hidden],
                            initializer=tf.random_uniform_initializer(0)),
            [cur_batch_B, 1])
        cell_init_state = LSTMStateTuple(c_init, h_init)

        first_step = tf.zeros(shape=[cur_batch_B, cur_batch_D],
                              dtype=tf.float32,
                              name='first_character')

    with tf.name_scope('NADE'):
        output_ta = tf.TensorArray(size=cur_batch_T, dtype=tf.float32)

        def loop_fn(time, cell_output, cell_state, loop_state):
            emit_output = cell_output  # == None for time == 0

            if cell_output is None:
                next_cell_state = cell_init_state
                next_step = first_step
                next_loop_state = output_ta
            else:  # pass the last state to the next
                next_cell_state = cell_state
                if mode == ModeKeys.TRAIN or mode == ModeKeys.EVAL:
                    next_step = X_ta.read(time - 1)
                else:
                    next_step = get_sample(cell_output)
                next_loop_state = loop_state.write(time - 1, next_step)

            if mode == ModeKeys.TRAIN or mode == ModeKeys.EVAL:
                elements_finished = (time >= X_l)
            else:
                elements_finished = (time >= cur_batch_T)

            return elements_finished, next_step, next_cell_state, emit_output, next_loop_state

        output_ta, _, loop_state_ta = tf.nn.raw_rnn(acell, loop_fn)

    with tf.name_scope('Output'):
        outputs = _transpose_batch_time(output_ta.stack())
        logits = get_logits(outputs)

    if mode == ModeKeys.TRAIN or mode == ModeKeys.EVAL:
        logp_loss = -tf.reduce_mean(tf.log(1e-6 + get_prob(outputs, X_s)))
        xentropy_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
            labels=Xs_embd, logits=logits),
                                       name='xtropy_loss')

        train_op = tf.train.RMSPropOptimizer(
            learning_rate=params.learning_rate).minimize(
                loss=logp_loss, global_step=tf.train.get_global_step())

        logging_hook = tf.train.LoggingTensorHook(
            tensors={"xtropy_loss": "xtropy_loss"}, every_n_iter=100)

        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=logp_loss,
                                          train_op=train_op,
                                          training_chief_hooks=[logging_hook])
    else:
        X_sampled = tf.argmax(_transpose_batch_time(loop_state_ta.stack()),
                              axis=2)

        return tf.estimator.EstimatorSpec(mode=mode, predictions=X_sampled)