def stacked_rnn(inputs,
                hidden_sizes,
                cell_fn,
                scope=None,
                dtype=dtypes.float32,
                reuse=False):
    with variable_scope.variable_scope(scope or "stacked_rnn",
                                       reuse=reuse) as varscope:
        # Create a new scope in which the caching device is either
        # determined by the parent scope, or is set to place the cached
        # Variable using the same placement as for the rest of the RNN.
        if not context.executing_eagerly():
            if varscope.caching_device is None:
                varscope.set_caching_device(lambda op: op.device)

        layers = []
        fixed_hidden_sizes = hidden_sizes + [hidden_sizes[-1]]
        for idx, hidden_size in enumerate(fixed_hidden_sizes[:-1]):
            cell = cell_fn(hidden_size)
            if hidden_size != fixed_hidden_sizes[idx + 1]:
                cell = rnn.OutputProjectionWrapper(cell,
                                                   fixed_hidden_sizes[idx + 1])
            layers.append(cell)
        initial_states = tuple([
            create_initial_state_placeholder(cell.state_size, dtype)
            for cell in layers
        ])
        layers = rnn.MultiRNNCell(layers)
        outputs, states = rnn_ops.dynamic_rnn(layers,
                                              inputs,
                                              initial_state=initial_states,
                                              dtype=dtype,
                                              time_major=False)
        return outputs, states, initial_states, layers.zero_state
    def rnn_model(self):
        cell = rnn.BasicLSTMCell(num_units=self.n_units)
        multi_cell = rnn.MultiRNNCell([cell] * self.n_layers)
        # we only need one output so get it wrapped to out one value which is next word index
        cell_wrapped = rnn.OutputProjectionWrapper(multi_cell, output_size=1)

        # get input embed
        embedding = tf.Variable(initial_value=tf.random_uniform(
            [self.vocab_size, self.n_units], -1.0, 1.0))
        inputs = tf.nn.embedding_lookup(embedding, self.inputs)
        # what is inputs dim??

        outputs, states = tf.nn.dynamic_rnn(cell_wrapped,
                                            inputs=inputs,
                                            dtype=tf.float32)
        outputs = tf.reshape(
            outputs, [int(outputs.get_shape()[0]),
                      int(inputs.get_shape()[1])])

        w = tf.Variable(
            tf.truncated_normal([int(inputs.get_shape()[1]), self.vocab_size]))
        b = tf.Variable(tf.zeros([self.vocab_size]))

        logits = tf.nn.bias_add(tf.matmul(outputs, w), b)
        return logits
def embedding_rnn_seq2seq(encoder_inputs,
                          decoder_inputs,
                          cell,
                          num_encoder_symbols,
                          num_decoder_symbols,
                          embedding_size,
                          output_projection=None,
                          feed_previous=False,
                          dtype=None,
                          scope=None):
    with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq") as scope:
        if dtype is not None:
            scope.set_dtype(dtype)
        else:
            dtype = scope.dtype
        # Encoder
        encoder_cell = rnn.EmbeddingWrapper(cell, embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        _, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype)
        # Decoder
        if output_projection is None:
            cell = rnn.OutputProjectionWrapper(cell, num_decoder_symbols)
        if isinstance(feed_previous, bool):
            return embedding_rnn_decoder(decoder_inputs,
                encoder_state,
                cell,
                num_decoder_symbols,
                embedding_size,
                output_projection=output_projection,
                feed_previous=feed_previous)
        # 如果feed_previous是张量,我们构造2个图并进行cond
        def decoder(feed_previous_bool):
            if feed_previous_bool:
                reuse = None
            else:
                reuse = True
            with variable_scope.variable_scope(variable_scope.variable_scope.get_variable_scope(), reuse=reuse) as scope:
                outputs, state = embedding_rnn_decoder(decoder_inputs, encoder_state, cell, num_decoder_symbols,
                    embedding_size, output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False)
                state_list = [state]
                if nest.is_sequence(state):
                    state_list = nest.flatten(state)
                return outputs + state_list
        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        outputs_len = len(decoder_inputs)    # Outputs length same as decoder inputs.
        state_list = outputs_and_state[outputs_len:]
        state = state_list[0]
        if nest.is_sequence(encoder_state):
            state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list)
        return outputs_and_state[:outputs_len], state
def rnn_segment(features, targets, mode, params):
    seq_feature = features['seq_feature']
    seq_length = features['seq_length']
    with tf.variable_scope("emb"):
        embeddings = tf.get_variable(
            "char_emb", shape=[params['num_char'], params['emb_size']])
    seq_emb = tf.nn.embedding_lookup(embeddings, seq_feature)
    batch_size = tf.shape(seq_feature)[0]
    time_step = tf.shape(seq_feature)[1]
    flat_seq_emb = tf.reshape(
        seq_emb,
        shape=[batch_size, time_step, (params['k'] + 1) * params['emb_size']])
    cell = rnn.LSTMCell(params['rnn_units'])
    if mode == ModeKeys.TRAIN:
        cell = rnn.DropoutWrapper(cell, params['input_keep_prob'],
                                  params['output_keep_prob'])
    projection_cell = rnn.OutputProjectionWrapper(cell, params['num_class'])
    logits, _ = tf.nn.dynamic_rnn(projection_cell,
                                  flat_seq_emb,
                                  sequence_length=seq_length,
                                  dtype=tf.float32)
    weight_mask = tf.to_float(tf.sequence_mask(seq_length))
    loss = seq2seq.sequence_loss(logits, targets, weights=weight_mask)
    train_op = layers.optimize_loss(
        loss=loss,
        global_step=tf.contrib.framework.get_global_step(),
        learning_rate=params["learning_rate"],
        optimizer=tf.train.AdamOptimizer,
        clip_gradients=params['grad_clip'],
        summaries=[
            "learning_rate",
            "loss",
            "gradients",
            "gradient_norm",
        ])
    pred_classes = tf.to_int32(tf.argmax(input=logits, axis=2))
    pred_words = tf.logical_or(tf.equal(pred_classes, 0),
                               tf.equal(pred_classes, 3))
    target_words = tf.logical_or(tf.equal(targets, 0), tf.equal(targets, 3))
    precision = metrics.streaming_precision(pred_words,
                                            target_words,
                                            weights=weight_mask)
    recall = metrics.streaming_recall(pred_words,
                                      target_words,
                                      weights=weight_mask)
    predictions = {"classes": pred_classes}
    eval_metric_ops = {"precision": precision, "recall": recall}
    return learn.ModelFnOps(mode,
                            predictions,
                            loss,
                            train_op,
                            eval_metric_ops=eval_metric_ops)
    def rnn_model(self):
        # BasicLSTMCell 最基本的LSTM循环网络单元,添加forget_bias(默认值是1)到遗忘门的偏置。为了减少在开始训练时遗忘的规模,他
        # 不允许单元有一个裁剪,映射层不允许有peep-hole连接,这是基准。
        # BasicLSTMCell 的实现类在 rnn.python.ops下, core_rnn_cell_impl.py
        cell = rnn.BasicLSTMCell(num_units=self.n_units)

        # MultiRNNCell 这个函数有两个参数:第一个参数就是输入的RNN的实例形成的列表,第二个参数就是让状态是
        # 一个元组,官方推荐是True    state_is_tuple = True
        # 可以实现多层的LSTM网络,将前一层的输出作为后一层的输入
        multi_cell = rnn.MultiRNNCell([cell] * self.n_layers)

        # we only need one output so get it wrapped to out one value which is next word index
        # 将 rnn_cell 的输出映射成想要的维度 output_size是映射后的size 返回一个带Output_projection的rnn_cell
        cell_wrapped = rnn.OutputProjectionWrapper(multi_cell, output_size=1)

        # get input embed
        # tf.random_uniform(shape, minval, maxval, dtype, seed, name) : 返回一个 n*n的矩阵,值产生于minval 和 maxval 之间
        embedding = tf.Variable(initial_value=tf.random_uniform(
            [self.vocab_size, self.n_units], -1.0, 1.0))

        # tf.nn.embedding_lokkup(embedding, inputs_id) : 根据inputs_id寻找embedding中对应的元素。比如,input_ids=[1,3,5],则
        # 找出embedding中下标为1,3,5的向量组成一个矩阵返回。
        inputs = tf.nn.embedding_lookup(embedding, self.inputs)
        # what is inputs dim??

        # add initial state into dynamic rnn, if I am not result would be bad, I tried, don't know why
        if self.labels is not None:

            # zero_state ; 参数初始化
            initial_state = cell_wrapped.zero_state(int(inputs.get_shape()[0]),
                                                    tf.float32)
        else:
            initial_state = cell_wrapped.zero_state(1, tf.float32)

        # dynamic_rnn 实现的功能可以让不同迭代的batch是不同长度的数据,但同一次迭代一个batch内部的所有数据长度仍然是固定的。
        # dynamic_rnn 和 rnn 比较
        outputs, states = tf.nn.dynamic_rnn(cell_wrapped,
                                            inputs=inputs,
                                            dtype=tf.float32,
                                            initial_state=initial_state)
        outputs = tf.reshape(
            outputs, [int(outputs.get_shape()[0]),
                      int(inputs.get_shape()[1])])

        # truncated_normal : 截断分布,详见高斯分布
        w = tf.Variable(
            tf.truncated_normal([int(inputs.get_shape()[1]), self.vocab_size]))
        b = tf.Variable(tf.zeros([self.vocab_size]))

        logits = tf.nn.bias_add(tf.matmul(outputs, w), b)
        return logits, states
Beispiel #6
0
    def __init__(self, desynth, coding_size, neuron_count):
        cell = rnn.OutputProjectionWrapper(
            rnn.DropoutWrapper(rnn.LSTMCell(
                num_units=neuron_count,
                initializer=tf.variance_scaling_initializer(),
                activation=tf.nn.elu,
            ),
                               input_keep_prob=0.7),
            output_size=coding_size,
        )

        self.outputs, self.states = tf.nn.dynamic_rnn(cell,
                                                      desynth,
                                                      dtype=tf.float32)
Beispiel #7
0
 def testOutputProjectionWrapper(self):
   with self.test_session() as sess:
     with variable_scope.variable_scope(
         "root", initializer=init_ops.constant_initializer(0.5)):
       x = array_ops.zeros([1, 3])
       m = array_ops.zeros([1, 3])
       cell = contrib_rnn.OutputProjectionWrapper(rnn_cell_impl.GRUCell(3), 2)
       g, new_m = cell(x, m)
       sess.run([variables_lib.global_variables_initializer()])
       res = sess.run([g, new_m], {
           x.name: np.array([[1., 1., 1.]]),
           m.name: np.array([[0.1, 0.1, 0.1]])
       })
       self.assertEqual(res[1].shape, (1, 3))
       # The numbers in results were not calculated, this is just a smoke test.
       self.assertAllClose(res[0], [[0.231907, 0.231907]])
Beispiel #8
0
    def embedding_attention_bidirectional_seq2seq(self, encoder_inputs, decoder_inputs, input_cell1, input_cell2,
                                                  output_cell,
                                                  num_encoder_symbols,
                                                  num_decoder_symbols, embedding_size, num_heads=4,
                                                  output_projection=None, feed_previous=False, dtype=None, scope=None,
                                                  initial_state_attention=False):

        with tf.variable_scope(scope or "embedding_attention_bidirectional_seq2seq") as scope:
            # Encoder.
            encoder_cell1 = core_rnn_cell.EmbeddingWrapper(input_cell1, embedding_classes=num_encoder_symbols,
                                                           embedding_size=embedding_size)
            encoder_cell2 = core_rnn_cell.EmbeddingWrapper(input_cell2, embedding_classes=num_encoder_symbols,
                                                           embedding_size=embedding_size)

            encoder_outputs, encoder_state1, encoder_state2 = core_rnn.static_bidirectional_rnn(encoder_cell1,
                                                                                                encoder_cell2,
                                                                                                encoder_inputs,
                                                                                                dtype=tf.float32)

            # First calculate a concatenation of encoder outputs to put attention on.
            top_states = [array_ops.reshape(e, [-1, 1, input_cell1.output_size + input_cell2.output_size]) for e in
                          encoder_outputs]

            attention_states = array_ops.concat(top_states, 1)

            # Concatenate states of both enocders
            encoder_state = encoder_state1 + encoder_state2

            # Decoder.
            output_size = None
            if output_projection is None:
                output_cell = rnn.OutputProjectionWrapper(output_cell, num_decoder_symbols)
                output_size = num_decoder_symbols

            assert isinstance(feed_previous, bool)
            return seq2seq.embedding_attention_decoder(decoder_inputs, encoder_state, attention_states,
                                                       output_cell,
                                                       num_decoder_symbols, embedding_size, num_heads=num_heads,
                                                       output_size=output_size,
                                                       output_projection=output_projection,
                                                       feed_previous=feed_previous,
                                                       initial_state_attention=initial_state_attention)
Beispiel #9
0
    def _decoder(self, keep_prob, encoder_output, encoder_state, batch_size, scope, helper, reuse=None):
        with tf.variable_scope(scope, reuse=reuse):
            attention_states = encoder_output
            cell = rnn.MultiRNNCell([self._cell(keep_prob) for _ in range(self.lstm_dims)])
            attention_mechanism = seq2seq.BahdanauAttention(self.hidden_size, attention_states)  # attention
            decoder_cell = seq2seq.AttentionWrapper(cell, attention_mechanism,
                                                    attention_layer_size=self.hidden_size // 2)
            decoder_cell = rnn.OutputProjectionWrapper(decoder_cell, self.hidden_size, reuse=reuse,
                                                       activation=tf.nn.leaky_relu)
            decoder_initial_state = decoder_cell.zero_state(batch_size, tf.float32).clone(cell_state=encoder_state)

            output_layer = tf.layers.Dense(self.num_words,
                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                           activation=tf.nn.leaky_relu)
            decoder = seq2seq.BasicDecoder(decoder_cell, helper, decoder_initial_state, output_layer=output_layer)
            output, _, _ = seq2seq.dynamic_decode(decoder, maximum_iterations=self.max_sentence_length,
                                                  impute_finished=True)

            # tf.summary.histogram('decoder', output)
        return output
    def decoding_layer(self, rnn_inputs, encoder_output, encoder_state):
        decoder_cell = build_multicell(self.uni_layers, self.cell_size,
                                       self.keep_prob)

        attention_mech = seq2seq.BahdanauAttention(self.cell_size,
                                                   encoder_output,
                                                   self.in_length)
        attention_cell = seq2seq.AttentionWrapper(decoder_cell, attention_mech,
                                                  self.cell_size / 2)
        decoder_cell = rnn.OutputProjectionWrapper(attention_cell,
                                                   self.vocab_length)

        initial_state = decoder_cell.zero_state(self.batch_size, tf.float32)
        initial_state.clone(cell_state=encoder_state)

        with tf.variable_scope("decode"):
            train_logits = self.train_decoding_layer(rnn_inputs, decoder_cell,
                                                     initial_state)

        with tf.variable_scope("decode", reuse=True):
            inference_logits = self.inference_decoding_layer(
                self.embeddings, decoder_cell, initial_state)

        return train_logits, inference_logits
def embedding_tied_rnn_seq2seq(encoder_inputs,
                               decoder_inputs,
                               cell,
                               num_symbols,
                               embedding_size,
                               num_decoder_symbols=None,
                               output_projection=None,
                               feed_previous=False,
                               dtype=None,
                               scope=None):
    with variable_scope.variable_scope(scope or "embedding_tied_rnn_seq2seq", dtype=dtype) as scope:
        dtype = scope.dtype
        proj_weights = ops.convert_to_tensor(output_projection[0], dtype=dtype)
        proj_weights.get_shape().assert_is_compatible_with([None, num_symbols])
        proj_biases = ops.convert_to_tensor(output_projection[1], dtype=dtype)
        proj_biases.get_shape().assert_is_compatible_with([num_symbols])
        embedding = variable_scope.variable_scope.get_variable("embedding", [num_symbols, embedding_size], dtype=dtype)
        emb_encoder_inputs = [embedding_ops.embedding_lookup(embedding, x)
                              for x in encoder_inputs]
        emb_decoder_inputs = [embedding_ops.embedding_lookup(embedding, x)
                              for x in decoder_inputs]
        output_symbols = num_symbols
        if num_decoder_symbols is not None:
            output_symbols = num_decoder_symbols
        if output_projection is None:
            cell = rnn.OutputProjectionWrapper(cell, output_symbols)
        if isinstance(feed_previous, bool):
            loop_function = _extract_argmax_and_embed(
                embedding, output_projection, True) if feed_previous else None
            return tied_rnn_seq2seq(emb_encoder_inputs, emb_decoder_inputs, cell,
                                    loop_function=loop_function, dtype=dtype)
        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            loop_function = _extract_argmax_and_embed(
                embedding, output_projection, False) if feed_previous_bool else None
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(variable_scope.variable_scope.get_variable_scope(),reuse=reuse):
                outputs, state = tied_rnn_seq2seq(
                    emb_encoder_inputs, emb_decoder_inputs, cell,
                    loop_function=loop_function, dtype=dtype)
                state_list = [state]
                if nest.is_sequence(state):
                    state_list = nest.flatten(state)
                return outputs + state_list
        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        outputs_len = len(decoder_inputs)   # Outputs length same as decoder inputs.
        state_list = outputs_and_state[outputs_len:]
        state = state_list[0]
        # Calculate zero-state to know it's structure.
        static_batch_size = encoder_inputs[0].get_shape()[0]
        for inp in encoder_inputs[1:]:
            static_batch_size.merge_with(inp.get_shape()[0])
        batch_size = static_batch_size.value
        if batch_size is None:
            batch_size = array_ops.shape(encoder_inputs[0])[0]
        zero_state = cell.zero_state(batch_size, dtype)
        if nest.is_sequence(zero_state):
            state = nest.pack_sequence_as(structure=zero_state,
                                          flat_sequence=state_list)
        return outputs_and_state[:outputs_len], state
def _lstmnet(
        features,  # This is batch_features from input_fn
        labels,  # This is batch_labels from input_fn
        mode,  # An instance of tf.estimator.ModeKeys
        params,
        is_test):

    with tf.variable_scope('EncoderNet') as scope:
        if is_test:
            scope.reuse_variables()

        if (mode == tf.estimator.ModeKeys.TRAIN and not is_test):
            pkeep = params['pkeep']
        else:
            pkeep = 1.0

        x = tf.feature_column.input_layer(
            features, feature_columns=params['feature_columns'])
        X = tf.reshape(x,
                       shape=[
                           x.get_shape()[0], params['sequence_length'],
                           params['dimension']
                       ])
        # X: [ BATCH_SIZE, SEQUENCE_LENGTH, DIMENSION]
        encoder_Hin = params['encoder_Hin']
        # encoder_Hin: [ BATCH_SIZE, ENCODER_INTERNALSIZE * ENCODER_NLAYERS]

        encoder_cells = [
            rnn.GRUBlockCell(params['encoder_hidden_layer_size'])
            for _ in range(params['encoder_hidden_layer_depth'])
        ]
        # "naive dropout" implementation
        encoder_dropcells = [
            rnn.DropoutWrapper(cell, input_keep_prob=pkeep)
            for cell in encoder_cells
        ]
        encoder_multicell = rnn.MultiRNNCell(encoder_dropcells,
                                             state_is_tuple=False)
        # dropout for the softmax layer
        encoder_multicell = rnn.DropoutWrapper(encoder_multicell,
                                               output_keep_prob=pkeep)

        encoder_Yr, encoder_H = tf.nn.dynamic_rnn(
            encoder_multicell,
            X,
            dtype=tf.float32,
            initial_state=encoder_Hin,
            scope='EncoderNet',
            parallel_iterations=params['parallel_iters'])
        encoder_H = tf.identity(encoder_H,
                                name='encoder_H')  # just to give it a name
        # encoder_Yr: [ BATCH_SIZE, SEQUENCE_LENGTHLEN, ENCODER_INTERNALSIZE ]
        # encoder_H:  [ BATCH_SIZE, ENCODER_INTERNALSIZE * ENCODER_NLAYERS ] # this is the last state in the sequence

        # Select last output.
        encoder_output = tf.transpose(encoder_Yr, [1, 0, 2])
        # encoder_output: [ SEEQLEN, BATCH_SIZE, ENCODER_INTERNALSIZE ]
        last = tf.gather(encoder_output,
                         int(encoder_output.get_shape()[0]) - 1)
        # last: [ BATCH_SIZE , ENCODER_INTERNALSIZE ]

        # Last layer to evaluate INTERNALSIZE LSTM output to bottleneck representation
        bottleneck = layers.fully_connected(last,
                                            params['bottleneck_size'],
                                            activation_fn=tf.nn.relu)
        encoded_V = bottleneck
        # bottleneck: [ BATCH_SIZE, BOTTLENECK_SIZE ]

    with tf.variable_scope('NetDecoder') as scope:
        if is_test:
            scope.reuse_variables()

        if (mode == tf.estimator.ModeKeys.TRAIN and not is_test):
            pkeep = params['pkeep']
        else:
            pkeep = 1.0

        decoder_Hin = params['decoder_Hin']
        # decoder_Hin: [ BATCH_SIZE, DECODER_INTERNALSIZE * DECODER_NLAYERS]

        # tile bottleneck layer
        tiled_bottleneck = tf.tile(tf.expand_dims(bottleneck, axis=1),
                                   multiples=[1, params['sequence_length'], 1])
        # bottleneck_tiled: [ BATCH_SIZE, SEQUENCE_LENGTH, BOTTLENECK_SIZE ]

        decoder_cells = [
            rnn.GRUBlockCell(params['decoder_hidden_layer_size'])
            for _ in range(params['decoder_hidden_layer_depth'])
        ]
        # "naive dropout" implementation
        decoder_dropcells = [
            rnn.DropoutWrapper(cell, input_keep_prob=pkeep)
            for cell in decoder_cells
        ]
        decoder_multicell = rnn.MultiRNNCell(decoder_dropcells,
                                             state_is_tuple=False)
        # dropout for the softmax layer
        decoder_multicell = rnn.DropoutWrapper(decoder_multicell,
                                               output_keep_prob=pkeep)
        # dense layer to adjust dimensions
        decoder_multicell = rnn.OutputProjectionWrapper(
            decoder_multicell, params['dimension'])

        decoded_Yr, decoded_H = tf.nn.dynamic_rnn(
            decoder_multicell,
            tiled_bottleneck,
            dtype=tf.float32,
            initial_state=decoder_Hin,
            scope='NetDecoder',
            parallel_iterations=params['parallel_iters'])
        decoded_H = tf.identity(decoded_H,
                                name='decoded_H')  # just to give it a name
        # decoder_Yr: [ BATCH_SIZE, SEQUENCE_LENGTHLEN, DIMENSION ]
        # decoder_H:  [ BATCH_SIZE, DECODER_INTERNALSIZE * DECODER_NLAYERS ] # this is the last state in the sequence

    return decoded_Yr, encoded_V  # = bottleneck
Beispiel #13
0
def embedding_attention_seq2seq(encoder_inputs,
                                decoder_inputs,
                                enc_cell,
                                dec_cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,
                                output_projection=None,
                                feed_previous=False,
                                dtype=None,
                                scope=None,
                                initial_state_attention=False):
    """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Warning: when output_projection is None, the size of the attention vectors
  and variables will be made proportional to num_decoder_symbols, can be large.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: tf.nn.rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    num_heads: Number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states.

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
    with tf.variable_scope(scope or "embedding_attention_seq2seq",
                           dtype=dtype) as scope:
        dtype = scope.dtype
        # Encoder.

        encoder_cell = enc_cell

        encoder_cell = rnn.EmbeddingWrapper(
            encoder_cell,
            embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        encoder_outputs, encoder_state = rnn.static_rnn(encoder_cell,
                                                        encoder_inputs,
                                                        dtype=dtype)

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            tf.reshape(e, [-1, 1, encoder_cell.output_size])
            for e in encoder_outputs
        ]
        attention_states = tf.concat(top_states, 1)

        # Decoder.
        output_size = None
        if output_projection is None:
            dec_cell = rnn.OutputProjectionWrapper(dec_cell,
                                                   num_decoder_symbols)
            output_size = num_decoder_symbols

        if isinstance(feed_previous, bool):
            return embedding_attention_decoder(
                decoder_inputs,
                encoder_state,
                attention_states,
                dec_cell,
                num_decoder_symbols,
                embedding_size,
                num_heads=num_heads,
                output_size=output_size,
                output_projection=output_projection,
                feed_previous=feed_previous,
                initial_state_attention=initial_state_attention)

        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with tf.variable_scope(tf.get_variable_scope(), reuse=reuse):
                outputs, state = embedding_attention_decoder(
                    decoder_inputs,
                    encoder_state,
                    attention_states,
                    dec_cell,
                    num_decoder_symbols,
                    embedding_size,
                    num_heads=num_heads,
                    output_size=output_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False,
                    initial_state_attention=initial_state_attention)
                state_list = [state]
                if nest.is_sequence(state):
                    state_list = nest.flatten(state)
                return outputs + state_list

        outputs_and_state = tf.cond(feed_previous, lambda: decoder(True),
                                    lambda: decoder(False))
        outputs_len = len(
            decoder_inputs)  # Outputs length same as decoder inputs.
        state_list = outputs_and_state[outputs_len:]
        state = state_list[0]
        if nest.is_sequence(encoder_state):
            state = nest.pack_sequence_as(structure=encoder_state,
                                          flat_sequence=state_list)
        return outputs_and_state[:outputs_len], state
Beispiel #14
0
INPUT_SIZE = 28
OUTPUT_SIZE = 10
NUM_STEPS = 28
BATCH_SIZE = 50
LEARNING_RATE = 0.0003
ITERATIONS = 2000

x = tf.placeholder(dtype=tf.float32,shape=[None,NUM_STEPS * INPUT_SIZE])
y = tf.placeholder(dtype=tf.float32,shape=[None,1,OUTPUT_SIZE])

reshape = tf.reshape(x,shape=[-1,NUM_STEPS,INPUT_SIZE])

cell = rnn.GRUCell(num_units=50,activation=tf.nn.relu)

cell = rnn.OutputProjectionWrapper(cell,OUTPUT_SIZE)
outputs,states = tf.nn.dynamic_rnn(cell,reshape,dtype=tf.float32)

outputs = outputs[:,-1,:]

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=outputs,labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
train = optimizer.minimize(loss)

with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    for i in range(ITERATIONS) :
        batch_x,batch_y = mnist.train.next_batch(BATCH_SIZE)
        batch_y = batch_y.reshape([-1,1,OUTPUT_SIZE])
        sess.run(train,feed_dict={x:batch_x,y:batch_y})
Beispiel #15
0
    def _build_forward(self):
        config = self.config
        N, M, JX, JQ, VW, VC, d, W = \
            config.batch_size, config.max_num_sents, config.max_sent_size, \
            config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \
            config.max_word_size
        beam_width = config.beam_width
        GO_TOKEN = 0
        EOS_TOKEN = 1

        JX = tf.shape(self.x)[2]
        JQ = tf.shape(self.q)[1]
        M = tf.shape(self.x)[1]
        dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size

        with tf.variable_scope("emb"):
            if config.use_char_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    char_emb_mat = tf.get_variable("char_emb_mat",
                                                   shape=[VC, dc],
                                                   dtype='float')

                with tf.variable_scope("char"):
                    Acx = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cx)  # [N, M, JX, W, dc]
                    Acq = tf.nn.embedding_lookup(char_emb_mat,
                                                 self.cq)  # [N, JQ, W, dc]
                    Acx = tf.reshape(Acx, [-1, JX, W, dc])
                    Acq = tf.reshape(Acq, [-1, JQ, W, dc])

                    filter_sizes = list(
                        map(int, config.out_channel_dims.split(',')))
                    heights = list(map(int, config.filter_heights.split(',')))
                    assert sum(filter_sizes) == dco, (filter_sizes, dco)
                    with tf.variable_scope("conv"):
                        xx = multi_conv1d(Acx,
                                          filter_sizes,
                                          heights,
                                          "VALID",
                                          self.is_train,
                                          config.keep_prob,
                                          scope="xx")
                        if config.share_cnn_weights:
                            tf.get_variable_scope().reuse_variables()
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="xx")
                        else:
                            qq = multi_conv1d(Acq,
                                              filter_sizes,
                                              heights,
                                              "VALID",
                                              self.is_train,
                                              config.keep_prob,
                                              scope="qq")
                        xx = tf.reshape(xx, [-1, M, JX, dco])
                        qq = tf.reshape(qq, [-1, JQ, dco])

            if config.use_word_emb:
                with tf.variable_scope("emb_var"), tf.device("/cpu:0"):
                    if config.mode == 'train':
                        word_emb_mat = tf.get_variable(
                            "word_emb_mat",
                            dtype='float',
                            shape=[VW, dw],
                            initializer=get_initializer(config.emb_mat),
                            trainable=True)
                    else:
                        word_emb_mat = tf.get_variable("word_emb_mat",
                                                       shape=[VW, dw],
                                                       dtype='float')
                    if config.use_glove_for_unk:
                        word_emb_mat = tf.concat(
                            axis=0, values=[word_emb_mat, self.new_emb_mat])
                with tf.name_scope("word"):
                    Ax = tf.nn.embedding_lookup(word_emb_mat,
                                                self.x)  # [N, M, JX, d]
                    Aq = tf.nn.embedding_lookup(word_emb_mat,
                                                self.q)  # [N, JQ, d]
                    self.tensor_dict['x'] = Ax
                    self.tensor_dict['q'] = Aq
                if config.use_char_emb:
                    xx = tf.concat(axis=3, values=[xx, Ax])  # [N, M, JX, di]
                    qq = tf.concat(axis=2, values=[qq, Aq])  # [N, JQ, di]
                else:
                    xx = Ax
                    qq = Aq

        # highway network
        if config.highway:
            with tf.variable_scope("highway"):
                xx = highway_network(xx,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                qq = highway_network(qq,
                                     config.highway_num_layers,
                                     True,
                                     wd=config.wd,
                                     is_train=self.is_train)

        self.tensor_dict['xx'] = xx
        self.tensor_dict['qq'] = qq

        cell_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell_fw = SwitchableDropoutWrapper(
            cell_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell_bw = SwitchableDropoutWrapper(
            cell_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell2_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell2_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell2_fw = SwitchableDropoutWrapper(
            cell2_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell2_bw = SwitchableDropoutWrapper(
            cell2_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell3_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell3_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell3_fw = SwitchableDropoutWrapper(
            cell3_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell3_bw = SwitchableDropoutWrapper(
            cell3_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        cell4_fw = BasicLSTMCell(d, state_is_tuple=True)
        cell4_bw = BasicLSTMCell(d, state_is_tuple=True)
        d_cell4_fw = SwitchableDropoutWrapper(
            cell4_fw, self.is_train, input_keep_prob=config.input_keep_prob)
        d_cell4_bw = SwitchableDropoutWrapper(
            cell4_bw, self.is_train, input_keep_prob=config.input_keep_prob)
        x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2)  # [N, M]
        q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1)  # [N]

        with tf.variable_scope("prepro"):
            (fw_u, bw_u), ((_, fw_u_f), (_,
                                         bw_u_f)) = bidirectional_dynamic_rnn(
                                             d_cell_fw,
                                             d_cell_bw,
                                             qq,
                                             q_len,
                                             dtype='float',
                                             scope='u1')  # [N, J, d], [N, d]
            u = tf.concat(axis=2, values=[fw_u, bw_u])
            if config.share_lstm_weights:
                tf.get_variable_scope().reuse_variables()
                (fw_h, bw_h), ((_, fw_h_f),
                               (_, bw_h_f)) = bidirectional_dynamic_rnn(
                                   cell_fw,
                                   cell_bw,
                                   xx,
                                   x_len,
                                   dtype='float',
                                   scope='u1')  # [N, M, JX, 2d]
                h = tf.concat(axis=3, values=[fw_h, bw_h])  # [N, M, JX, 2d]
            else:
                (fw_h, bw_h), ((_, fw_h_f),
                               (_, bw_h_f)) = bidirectional_dynamic_rnn(
                                   cell_fw,
                                   cell_bw,
                                   xx,
                                   x_len,
                                   dtype='float',
                                   scope='h1')  # [N, M, JX, 2d]
                h = tf.concat(axis=3, values=[fw_h, bw_h])  # [N, M, JX, 2d]
            self.tensor_dict['u'] = u
            self.tensor_dict['h'] = h

        with tf.variable_scope("main"):
            if config.dynamic_att:
                p0 = h
                u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]),
                               [N * M, JQ, 2 * d])
                q_mask = tf.reshape(
                    tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]),
                    [N * M, JQ])
                first_cell_fw = AttentionCell(
                    cell2_fw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                first_cell_bw = AttentionCell(
                    cell2_bw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                second_cell_fw = AttentionCell(
                    cell3_fw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
                second_cell_bw = AttentionCell(
                    cell3_bw,
                    u,
                    mask=q_mask,
                    mapper='sim',
                    input_keep_prob=self.config.input_keep_prob,
                    is_train=self.is_train)
            else:
                p0 = attention_layer(config,
                                     self.is_train,
                                     h,
                                     u,
                                     h_mask=self.x_mask,
                                     u_mask=self.q_mask,
                                     scope="p0",
                                     tensor_dict=self.tensor_dict)
                first_cell_fw = d_cell2_fw
                second_cell_fw = d_cell3_fw
                first_cell_bw = d_cell2_bw
                second_cell_bw = d_cell3_bw

            (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn(
                first_cell_fw,
                first_cell_bw,
                p0,
                x_len,
                dtype='float',
                scope='g0')  # [N, M, JX, 2d]
            g0 = tf.concat(axis=3, values=[fw_g0, bw_g0])
            (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn(
                second_cell_fw,
                second_cell_bw,
                g0,
                x_len,
                dtype='float',
                scope='g1')  # [N, M, JX, 2d]
            g1 = tf.concat(axis=3, values=[fw_g1, bw_g1])

            logits = get_logits([g1, p0],
                                d,
                                True,
                                wd=config.wd,
                                input_keep_prob=config.input_keep_prob,
                                mask=self.x_mask,
                                is_train=self.is_train,
                                func=config.answer_func,
                                scope='logits1')
            a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]),
                          tf.reshape(logits, [N, M * JX]))
            a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1),
                          [1, M, JX, 1])

            (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn(
                d_cell4_fw,
                d_cell4_bw,
                tf.concat(axis=3, values=[p0, g1, a1i, g1 * a1i]),
                x_len,
                dtype='float',
                scope='g2')  # [N, M, JX, 2d]
            g2 = tf.concat(axis=3, values=[fw_g2, bw_g2])
            logits2 = get_logits([g2, p0],
                                 d,
                                 True,
                                 wd=config.wd,
                                 input_keep_prob=config.input_keep_prob,
                                 mask=self.x_mask,
                                 is_train=self.is_train,
                                 func=config.answer_func,
                                 scope='logits2')

            flat_logits = tf.reshape(logits, [-1, M * JX])
            flat_yp = tf.nn.softmax(flat_logits)  # [-1, M*JX]
            flat_logits2 = tf.reshape(logits2, [-1, M * JX])
            flat_yp2 = tf.nn.softmax(flat_logits2)

            if config.na:
                na_bias = tf.get_variable("na_bias", shape=[], dtype='float')
                na_bias_tiled = tf.tile(tf.reshape(na_bias, [1, 1]),
                                        [N, 1])  # [N, 1]
                concat_flat_logits = tf.concat(
                    axis=1, values=[na_bias_tiled, flat_logits])
                concat_flat_yp = tf.nn.softmax(concat_flat_logits)
                na_prob = tf.squeeze(tf.slice(concat_flat_yp, [0, 0], [-1, 1]),
                                     [1])
                flat_yp = tf.slice(concat_flat_yp, [0, 1], [-1, -1])

                concat_flat_logits2 = tf.concat(
                    axis=1, values=[na_bias_tiled, flat_logits2])
                concat_flat_yp2 = tf.nn.softmax(concat_flat_logits2)
                na_prob2 = tf.squeeze(
                    tf.slice(concat_flat_yp2, [0, 0], [-1, 1]), [1])  # [N]
                flat_yp2 = tf.slice(concat_flat_yp2, [0, 1], [-1, -1])

                self.concat_logits = concat_flat_logits
                self.concat_logits2 = concat_flat_logits2
                self.na_prob = na_prob * na_prob2

            yp = tf.reshape(flat_yp, [-1, M, JX])
            yp2 = tf.reshape(flat_yp2, [-1, M, JX])
            wyp = tf.nn.sigmoid(logits2)

            self.tensor_dict['g1'] = g1
            self.tensor_dict['g2'] = g2

            self.logits = flat_logits
            self.logits2 = flat_logits2
            self.yp = yp
            self.yp2 = yp2
            self.wyp = wyp

        with tf.variable_scope("q_gen"):
            # Question Generation Using (Paragraph & Predicted Ans Pos)
            NM = config.max_num_sents * config.batch_size

            # Separated encoder
            #ss = tf.reshape(xx, (-1, JX, dw+dco))

            q_worthy = tf.reduce_sum(
                tf.to_int32(self.y), axis=2
            )  # so we get probability distribution of answer-likely. (N, M)
            q_worthy = tf.expand_dims(tf.to_int32(tf.argmax(q_worthy, axis=1)),
                                      axis=1)  # (N) -> (N, 1)
            q_worthy = tf.concat([
                tf.expand_dims(tf.range(0, N, dtype=tf.int32), axis=1),
                q_worthy
            ],
                                 axis=1)
            # example : [0, 9], [1, 11], [2, 8], [3, 5], [4, 0], [5, 1] ...

            ss = tf.gather_nd(xx, q_worthy)
            syp = tf.expand_dims(tf.gather_nd(yp, q_worthy), axis=-1)
            syp2 = tf.expand_dims(tf.gather_nd(yp2, q_worthy), axis=-1)
            ss_with_ans = tf.concat([ss, syp, syp2], axis=2)

            qg_dim = 600
            cell_fw, cell_bw = rnn.DropoutWrapper(rnn.GRUCell(qg_dim), input_keep_prob=config.input_keep_prob), \
                               rnn.DropoutWrapper(rnn.GRUCell(qg_dim), input_keep_prob=config.input_keep_prob)
            s_outputs, s_states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw, cell_bw, ss_with_ans, dtype=tf.float32)
            s_outputs = tf.concat(s_outputs, axis=2)
            s_states = tf.concat(s_states, axis=1)

            start_tokens = tf.zeros([N], dtype=tf.int32)
            self.inp_q_with_GO = tf.concat(
                [tf.expand_dims(start_tokens, axis=1), self.q], axis=1)
            # supervise if mode is train
            if config.mode == "train":
                emb_q = tf.nn.embedding_lookup(params=word_emb_mat,
                                               ids=self.inp_q_with_GO)
                #emb_q = tf.reshape(tf.tile(tf.expand_dims(emb_q, axis=1), [1, M, 1, 1]), (NM, JQ+1, dw))
                train_helper = seq2seq.TrainingHelper(emb_q, [JQ] * N)
            else:
                s_outputs = seq2seq.tile_batch(s_outputs,
                                               multiplier=beam_width)
                s_states = seq2seq.tile_batch(s_states, multiplier=beam_width)

            cell = rnn.DropoutWrapper(rnn.GRUCell(num_units=qg_dim * 2),
                                      input_keep_prob=config.input_keep_prob)
            attention_mechanism = seq2seq.BahdanauAttention(num_units=qg_dim *
                                                            2,
                                                            memory=s_outputs)
            attn_cell = seq2seq.AttentionWrapper(cell,
                                                 attention_mechanism,
                                                 attention_layer_size=qg_dim *
                                                 2,
                                                 output_attention=True,
                                                 alignment_history=False)
            total_glove_vocab_size = 78878  #72686
            out_cell = rnn.OutputProjectionWrapper(attn_cell,
                                                   VW + total_glove_vocab_size)
            if config.mode == "train":
                decoder_initial_states = out_cell.zero_state(
                    batch_size=N, dtype=tf.float32).clone(cell_state=s_states)
                decoder = seq2seq.BasicDecoder(
                    cell=out_cell,
                    helper=train_helper,
                    initial_state=decoder_initial_states)
            else:
                decoder_initial_states = out_cell.zero_state(
                    batch_size=N * beam_width,
                    dtype=tf.float32).clone(cell_state=s_states)
                decoder = seq2seq.BeamSearchDecoder(
                    cell=out_cell,
                    embedding=word_emb_mat,
                    start_tokens=start_tokens,
                    end_token=EOS_TOKEN,
                    initial_state=decoder_initial_states,
                    beam_width=beam_width,
                    length_penalty_weight=0.0)
            outputs = seq2seq.dynamic_decode(decoder=decoder,
                                             maximum_iterations=JQ)
            if config.mode == "train":
                gen_q = outputs[0].sample_id
                gen_q_prob = outputs[0].rnn_output
                gen_q_states = outputs[1]
            else:
                gen_q = outputs[0].predicted_ids[:, :, 0]
                gen_q_prob = tf.nn.embedding_lookup(
                    params=word_emb_mat, ids=outputs[0].predicted_ids[:, :, 0])
                gen_q_states = outputs[1]

            self.gen_q = gen_q
            self.gen_q_prob = gen_q_prob
            self.gen_q_states = gen_q_states
def predict_stock():
    def time_series(t):
        return t * np.sin(t) / 3 + 2 * np.sin(t * 5)

    def next_batch(batch_size, n_steps):
        t0 = np.random.rand(batch_size,
                            1) * (t_max - t_min - n_steps * resolution)
        Ts = t0 + np.arange(0., n_steps + 1) * resolution
        ys = time_series(Ts)
        return ys[:, :-1].reshape(-1, n_steps,
                                  1), ys[:, 1:].reshape(-1, n_steps, 1)

    t_min, t_max = 0, 30
    resolution = 0.1
    t = np.linspace(t_min, t_max, int((t_max - t_min) / resolution))
    # t = np.arange(t_min, t_max + resolution, resolution)

    n_steps = 20
    n_outputs = 1
    n_neurons = 100
    n_inputs = 1
    use_projection_wrapper = False

    X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
    y = tf.placeholder(tf.float32, [None, n_steps, n_outputs])
    if use_projection_wrapper:
        cell = rnn.OutputProjectionWrapper(rnn.BasicRNNCell(
            num_units=n_neurons, activation=tf.nn.relu),
                                           output_size=n_outputs)
        outputs, _ = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
    else:
        cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons,
                                           activation=tf.nn.relu)
        outputs, _ = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
        stacked_rnn_outputs = tf.reshape(outputs, [-1, n_neurons])
        stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)
        outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])

    loss = tf.reduce_mean(tf.square(outputs - y))
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
    training_op = optimizer.minimize(loss)

    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

    n_epochs = 1500
    batch_size = 50

    with tf.Session() as sess:
        init.run()
        for epoch in range(n_epochs):
            X_batch, y_batch = next_batch(batch_size, n_steps)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            if epoch % 100 == 0:
                mse_val = loss.eval(feed_dict={X: X_batch, y: y_batch})
                print('Epoch: {}, mse: {}'.format(epoch, mse_val))
        saver.save(sess, './14_stock_predict_model')

    with tf.Session() as sess:
        saver.restore(sess, './14_stock_predict_model')

        is_predict_sequence = True
        if is_predict_sequence:
            seq_len = 300
            seq = np.zeros(n_steps, dtype=np.float32)
            for i in range(seq_len):
                X_batch = seq[-n_steps:].reshape(1, n_steps, 1)
                y_pred = sess.run(outputs, feed_dict={X: X_batch})
                seq = np.append(seq, y_pred[0, -1, 0])
            plt.plot(seq, 'b-')
            plt.xlabel('Time')
        else:
            t_instance = np.linspace(12.2, 12.2 + resolution * (n_steps + 1),
                                     n_steps + 1)
            X_new = time_series(
                np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs)))
            y_pred = sess.run(outputs, feed_dict={X: X_new})
            print(X_new.shape, y_pred.shape)
            plt.plot(t_instance[:-1],
                     time_series(t_instance[:-1]),
                     'bo',
                     markersize=10,
                     label='instance')
            plt.plot(t_instance[1:],
                     time_series(t_instance[1:]),
                     'y*',
                     markersize=10,
                     label='target')
            plt.plot(t_instance[1:],
                     y_pred[0, :, 0],
                     'r.',
                     markersize=10,
                     label='prediction')
            plt.legend()

        plt.show()
Beispiel #17
0
slice_size = 612
fft_size = slice_size // 2 + 1
steps_seconds = 2.0
n_steps = math.ceil(steps_seconds * prepare.samples_per_second / slice_size)
n_inputs = 2 * fft_size
n_neurons = 20
n_outputs = 2 * fft_size

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs], name='X')
y = tf.placeholder(tf.float32, [None, n_steps, n_outputs], name='y')

cell = rnn.OutputProjectionWrapper(
    rnn.DropoutWrapper(rnn.LSTMCell(
        num_units=n_neurons,
        initializer=tf.variance_scaling_initializer(),
        activation=tf.nn.elu,
    ),
                       input_keep_prob=0.7),
    output_size=n_outputs,
)

outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)

loss = tf.reduce_mean(tf.square(outputs - y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()

n_epochs = 2000
n_iterations = 150
Beispiel #18
0
    def build_GRU(self):
        with tf.variable_scope("vae_model", reuse=tf.AUTO_REUSE):
            x = self.input
            acti = None

            cat = tf.layers.dense(self.category, 16)
            big_cat = tf.tile(tf.expand_dims(cat, axis=1), (1, self.leng, 1))
            print("pre", x)  #x_t = tf.tile(x, (1, 1, leng))
            x_t = tf.concat([x, big_cat], axis=-1)
            print("post", x_t)

            cell_fw = tfn.MultiRNNCell(
                [
                    tf.nn.rnn_cell.DropoutWrapper(
                        tfn.GRUCell(self.enc_size,
                                    name="fow_cell0",
                                    activation=acti,
                                    kernel_initializer=self.initia),
                        output_keep_prob=self.dropout),
                    tf.nn.rnn_cell.DropoutWrapper(
                        tfn.GRUCell(self.enc_size,
                                    name="fow_cell1",
                                    activation=acti,
                                    kernel_initializer=self.initia),
                        output_keep_prob=self.dropout)
                ]
            )  #(self.enc_size, name="fow_cell",activation=self.act,reuse=tf.AUTO_REUSE,initializer=self.initia,forget_bias=0.9)
            cell_bw = tfn.MultiRNNCell(
                [
                    tf.nn.rnn_cell.DropoutWrapper(
                        tfn.GRUCell(self.enc_size,
                                    name="bow_cell0",
                                    activation=acti,
                                    kernel_initializer=self.initia),
                        output_keep_prob=self.dropout),
                    tf.nn.rnn_cell.DropoutWrapper(
                        tfn.GRUCell(self.enc_size,
                                    name="bow_cell1",
                                    activation=acti,
                                    kernel_initializer=self.initia),
                        output_keep_prob=self.dropout)
                ]
            )  #(self.enc_size, name="fow_cell",activation=self.act,reuse=tf.AUTO_REUSE,initializer=self.initia,forget_bias=0.9)
            #cell_fw =tf.nn.rnn_cell.DropoutWrapper(tfn.GRUCell(self.enc_size,name="fow_cell0"),output_keep_prob=self.dropout)
            #cell_bw =tf.nn.rnn_cell.DropoutWrapper(tfn.GRUCell(self.enc_size,name="bow_cell0"),output_keep_prob=self.dropout)

            #cell_fw =tf.nn.rnn_cell.DropoutWrapper(tfn.LSTMCell(self.enc_size,name="fow_cell0",initializer=self.initia),output_keep_prob=self.dropout)
            #cell_fw =self.cell_enc(self.enc_size, name="baw_cell",activation=self.act)
            outputs, state = tf.nn.bidirectional_dynamic_rnn(
                cell_fw,
                cell_bw,
                inputs=x_t,
                dtype=tf.float32,
                sequence_length=self.input_size,
                time_major=False,
                scope="encoder")
            #outputs,state=tf.nn.dynamic_rnn(cell_fw,inputs=x, dtype=tf.float32,sequence_length=self.input_size, time_major=False, scope="encoder")
            #outputs, state = tf.nn.bidirectional_dynamic_rnn(cell_fw,cell_bw, inputs=x_t, dtype=tf.float32, sequence_length=self.input_size,time_major=False, scope="encoder")

            self.latent_h = tf.layers.dense(tf.concat(
                [state[0][0], state[1][0], state[0][1], state[1][1], cat],
                axis=-1,
                name="latent_concat"),
                                            self.latent_size,
                                            kernel_initializer=self.initia)

            self.z_mean_c = tf.layers.dense(self.latent_h,
                                            self.latent_size,
                                            kernel_initializer=self.initia,
                                            name="MEAN")
            self.z_std_c = tf.layers.dense(self.latent_h,
                                           self.latent_size,
                                           kernel_initializer=self.initia,
                                           name="STD")
            mu_c = self.z_mean_c
            sigma_c = self.z_std_c
            self.samples_c = tf.random_normal(
                [self.batch_size, self.latent_size],
                0.0,
                1.0,
                dtype=tf.float32)

            self.sampled_z_c = mu_c + tf.exp(sigma_c / 2) * self.samples_c
            self.sampled_z_c = tf.nn.tanh(self.sampled_z_c)
            #self.sampled_z_c = mu_c + sigma_c * self.samples_c
            next_state = tf.concat([self.sampled_z_c, cat], axis=-1)
            latent_state = tf.layers.dense(next_state, 512)
        with tf.variable_scope("dec", reuse=False):
            print("SAMPLED ", self.sampled_z_c)

            res = tf.zeros_like(x)
            second = tfn.OutputProjectionWrapper(tf.nn.rnn_cell.DropoutWrapper(
                tfn.GRUCell(512, kernel_initializer=self.initia),
                output_keep_prob=self.dropout),
                                                 2,
                                                 activation=tf.nn.tanh)

            print("RES", res)
            coord_outs, dec_state = tf.nn.dynamic_rnn(
                second,
                res,
                initial_state=latent_state,
                sequence_length=self.input_size,
                time_major=False,
                dtype=tf.float32,
                scope='RNN_cord')

            print("x", latent_state)
            state_outs, _ = tf.nn.dynamic_rnn(
                #self.cell_dec(self.latent_size, name="dec",initializer=self.initia),
                tfn.OutputProjectionWrapper(
                    tf.nn.rnn_cell.DropoutWrapper(
                        tfn.GRUCell(512, kernel_initializer=self.initia),
                        output_keep_prob=self.dropout), 3),
                coord_outs,
                initial_state=latent_state,
                sequence_length=self.input_size,
                time_major=False,
                dtype=tf.float32,
                scope='RNN_stat')

            print("OUT : ", coord_outs)
            print("OUT : ", state_outs)
            coord_outs = tf.concat([coord_outs, state_outs], axis=-1)
            self.out_cat = tf.layers.dense(tf.layers.flatten(coord_outs), 7)
            #flat_out=tf.reshape(coord_outs,[self.batch_size,self.leng*self.latent_size])
            #out=tf.layers.dense(coord_outs, self.leng*5)
            return coord_outs
Beispiel #19
0
    def build(self):
        with tf.variable_scope("vae_model", reuse=tf.AUTO_REUSE):
            x = self.input
            cat = tf.layers.dense(self.category, 16)
            big_cat = tf.tile(tf.expand_dims(self.category, axis=1),
                              (1, self.leng, 1))
            print("pre", x)
            #x_t = tf.tile(x, (1, 1, leng))
            x_t = tf.concat([x, big_cat], axis=-1)
            print("post", x_t)

            cell_fw = tfn.MultiRNNCell(
                [
                    tf.nn.rnn_cell.DropoutWrapper(
                        tfn.LSTMCell(self.enc_size,
                                     name="fow_cell0",
                                     initializer=self.initia),
                        output_keep_prob=self.dropout),
                    tf.nn.rnn_cell.DropoutWrapper(
                        tfn.LSTMCell(self.enc_size,
                                     name="fow_cell1",
                                     initializer=self.initia),
                        output_keep_prob=self.dropout),
                    tf.nn.rnn_cell.DropoutWrapper(
                        tfn.LSTMCell(self.enc_size,
                                     name="fow_cell2",
                                     initializer=self.initia,
                                     activation=tf.nn.tanh),
                        output_keep_prob=self.dropout)
                ]
            )  #(self.enc_size, name="fow_cell",activation=self.act,reuse=tf.AUTO_REUSE,initializer=self.initia,forget_bias=0.9)
            cell_bw = tfn.MultiRNNCell(
                [
                    tf.nn.rnn_cell.DropoutWrapper(
                        tfn.LSTMCell(self.enc_size,
                                     name="bow_cell0",
                                     initializer=self.initia),
                        output_keep_prob=self.dropout),
                    tf.nn.rnn_cell.DropoutWrapper(
                        tfn.LSTMCell(self.enc_size,
                                     name="bow_cell1",
                                     initializer=self.initia),
                        output_keep_prob=self.dropout),
                    tf.nn.rnn_cell.DropoutWrapper(
                        tfn.LSTMCell(self.enc_size,
                                     name="bow_cell2",
                                     initializer=self.initia,
                                     activation=tf.nn.tanh),
                        output_keep_prob=self.dropout)
                ]
            )  #(self.enc_size, name="fow_cell",activation=self.act,reuse=tf.AUTO_REUSE,initializer=self.initia,forget_bias=0.9)

            #cell_fw =tf.nn.rnn_cell.DropoutWrapper(tfn.LSTMCell(self.enc_size,name="fow_cell0",initializer=self.initia),output_keep_prob=self.dropout)
            #cell_fw =self.cell_enc(self.enc_size, name="baw_cell",activation=self.act)
            #outputs, state = tf.nn.bidirectional_dynamic_rnn(cell_fw,cell_bw, inputs=x_t, dtype=tf.float32,sequence_length=self.input_size, time_major=False, scope="encoder")
            #outputs,state=tf.nn.dynamic_rnn(cell_fw,inputs=x, dtype=tf.float32,sequence_length=self.input_size, time_major=False, scope="encoder")
            outputs, state = tf.nn.bidirectional_dynamic_rnn(
                cell_fw,
                cell_bw,
                inputs=x,
                dtype=tf.float32,
                sequence_length=self.input_size,
                time_major=False,
                scope="encoder")

            if (self.cell_enc == tfn.LSTMCell):
                #latent_h=tf.concat([tf.concat([state[1].c,state[2].c],axis=-1)+state[0].c,cat],axis=-1,name="latent_concat")
                ##self.latent_h =tf.layers.dense(tf.concat([state[0].c,state[1].c,state[2].c], axis=-1,name="latent_concat"),self.latent_size)
                self.latent_h = tf.layers.dense(tf.concat(
                    [
                        state[0][0].c + state[1][0].c, state[0][1].c +
                        state[1][1].c, state[0][2].c + state[0][2].c
                    ],
                    axis=-1,
                    name="latent_concat"),
                                                self.latent_size,
                                                kernel_initializer=self.initia)

                #self.latent_h =tf.layers.dense(state[0].c+state[1].c+state[2].c,self.latent_size)
                #self.latent_h=tf.layers.dense(state.c,self.latent_size)
                #post=tf.concat([self.latent_h,cat],axis=-1)
                #latent_h=tf.reshape(tf.concat([outputs[0],outputs[1]],axis=-1),shape=[self.batch_size,-1])
                #latent_c = tf.concat([state.c, state.h], axis=-1)
                #print("LATENT",latent_c)
            else:
                self.latent = tf.concat([state[0], state[1]], axis=-1)
                print("ELSE LATENT ", self.latent)

            self.z_mean_c = tf.layers.dense(self.latent_h,
                                            self.latent_size,
                                            kernel_initializer=self.initia,
                                            name="MEAN")
            self.z_std_c = tf.layers.dense(self.latent_h,
                                           self.latent_size,
                                           kernel_initializer=self.initia,
                                           name="STD")

            mu_c = self.z_mean_c
            sigma_c = self.z_std_c
            self.samples_c = tf.random_normal(
                [self.batch_size, self.latent_size],
                mu_c,
                sigma_c,
                dtype=tf.float32)

            self.sampled_z_c = mu_c + tf.exp(sigma_c / 2) * self.samples_c
            next_state = tf.concat([self.sampled_z_c, cat], axis=-1)
        with tf.variable_scope("dec", reuse=False):
            print("SAMPLED ", self.sampled_z_c)
            if (self.cell_dec == tfn.LSTMCell):
                latent_state = tfn.LSTMStateTuple(
                    next_state,
                    tf.zeros_like(next_state))  #,tfn.LSTMStateTuple(mu,sigma))
            else:
                latent_state = self.sampled_z_c

            res = tf.zeros_like(x)
            second = tfn.OutputProjectionWrapper(tf.nn.rnn_cell.DropoutWrapper(
                tf.nn.rnn_cell.LSTMCell(self.latent_size + 16,
                                        initializer=self.initia),
                output_keep_prob=self.dropout),
                                                 5,
                                                 activation=tf.nn.tanh)

            print("RES", res)
            coord_outs, dec_state = tf.nn.dynamic_rnn(
                #self.cell_dec(self.latent_size, name="dec",initializer=self.initia),
                second,
                #tfn.OutputProjectionWrapper(tfn.MultiRNNCell([self.cell_enc(self.latent_size, name="decc", use_peepholes=True),self.cell_dec(self.latent_size, name="decc2", use_peepholes=True)]),2),
                res,
                initial_state=latent_state,
                sequence_length=self.input_size,
                time_major=False,
                dtype=tf.float32,
                scope='RNN_cord')

            print("OUT : ", coord_outs)
            self.out_cat = tf.layers.dense(tf.layers.flatten(coord_outs), 17)
            #flat_out=tf.reshape(coord_outs,[self.batch_size,self.leng*self.latent_size])
            #out=tf.layers.dense(coord_outs, self.leng*5)
            return coord_outs
def embedding_attention_seq2seq(encoder_inputs,  # [T, batch_size]
                                decoder_inputs,  # [T, batch_size]
                                cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,     # attention的抽头数量
                                output_projection=None,  #decoder的投影矩阵
                                feed_previous=False,
                                dtype=None,
                                scope=None,
                                initial_state_attention=False,
                                loop_fn_factory=_extract_argmax_and_embed):
    """
    :param encoder_inputs: encoder的输入,int32型 id tensor list
    :param decoder_inputs: decoder的输入,int32型id tensor list
    :param cell: RNN_Cell的实例
    :param num_encoder_symbols: 编码的符号数,即词表大小
    :param num_decoder_symbols: 解码的符号数,即词表大小
    :param embedding_size: 词向量的维度
    :param num_heads: attention的抽头数量,一个抽头算一种加权求和方式
    :param output_projection: decoder的output向量投影到词表空间时,用到的投影矩阵和偏置项(W, B);W的shape是[output_size, num_decoder_symbols],B的shape是[num_decoder_symbols];若此参数存在且feed_previous=True,上一个decoder的输出先乘W再加上B作为下一个decoder的输入
    :param feed_previous: 若为True, 只有第一个decoder的输入(“GO"符号)有用,所有的decoder输入都依赖于上一步的输出;一般在测试时用
    :param dtype:
    :param scope:
    :param initial_state_attention: 默认为False, 初始的attention是零;若为True,将从initial state和attention states开始attention
    :param loop_fn_factory:
    :return:
    """
    with variable_scope.variable_scope(scope or "embedding_attention_seq2seq", dtype=dtype) as scope:
        dtype = scope.dtype
        # Encoder.
        # 创建了一个embedding matrix.
        # 计算encoder的output和state
        # 生成attention states,用于计算attention
        encoder_cell = rnn.EmbeddingWrapper(  # EmbeddingWrapper, 是RNNCell的前面加一层embedding,作为encoder_cell, input就可以是word的id.
            cell, embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        encoder_outputs, encoder_state = rnn.static_rnn(
            encoder_cell, encoder_inputs, dtype=dtype)  #  [T,batch_size,size]

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [array_ops.reshape(e, [-1, 1, cell.output_size])
                      for e in encoder_outputs]  # T * [batch_size, 1, size]
        attention_states = array_ops.concat(top_states, 1)  # [batch_size,T,size]

        # Decoder.
        # 生成decoder的cell,通过OutputProjectionWrapper类对输入参数中的cell实例包装实现
        output_size = None
        if output_projection is None:
            cell = rnn.OutputProjectionWrapper(cell, num_decoder_symbols)  # OutputProjectionWrapper将输出映射成想要的维度
            output_size = num_decoder_symbols
        if isinstance(feed_previous, bool):
            return embedding_attention_decoder(
                decoder_inputs,
                encoder_state,
                attention_states,
                cell,
                num_decoder_symbols,
                embedding_size,
                num_heads=num_heads,
                output_size=output_size,
                output_projection=output_projection,
                feed_previous=feed_previous,
                initial_state_attention=initial_state_attention,
                loop_fn_factory=loop_fn_factory)

        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(variable_scope.variable_scope.get_variable_scope(), reuse=reuse) as scope:
                outputs, state = embedding_attention_decoder(
                    decoder_inputs,
                    encoder_state,
                    attention_states,
                    cell,
                    num_decoder_symbols,
                    embedding_size,
                    num_heads=num_heads,
                    output_size=output_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False,
                    initial_state_attention=initial_state_attention,
                    loop_fn_factory=loop_fn_factory)
                state_list = [state]
                if nest.is_sequence(state):
                    state_list = nest.flatten(state)
                return outputs + state_list

        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        outputs_len = len(decoder_inputs)  # Outputs length same as decoder inputs.
        state_list = outputs_and_state[outputs_len:]
        state = state_list[0]
        if nest.is_sequence(encoder_state):
            state = nest.pack_sequence_as(structure=encoder_state,
                                          flat_sequence=state_list)
        return outputs_and_state[:outputs_len], state
def one2many_rnn_seq2seq(encoder_inputs,
                         decoder_inputs_dict,
                         cell,
                         num_encoder_symbols,
                         num_decoder_symbols_dict,
                         embedding_size,
                         feed_previous=False,
                         dtype=None,
                         scope=None):
    outputs_dict = {}
    state_dict = {}

    with variable_scope.variable_scope(
            scope or "one2many_rnn_seq2seq", dtype=dtype) as scope:
        dtype = scope.dtype

        # Encoder.
        encoder_cell = rnn.EmbeddingWrapper(
            cell, embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        _, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype)

        # Decoder.
        for name, decoder_inputs in decoder_inputs_dict.items():
            num_decoder_symbols = num_decoder_symbols_dict[name]

            with variable_scope.variable_scope("one2many_decoder_" + str(name)) as scope:
                decoder_cell = rnn.OutputProjectionWrapper(cell, num_decoder_symbols)
                if isinstance(feed_previous, bool):
                    outputs, state = embedding_rnn_decoder(
                        decoder_inputs, encoder_state, decoder_cell, num_decoder_symbols,
                        embedding_size, feed_previous=feed_previous)
                else:
                    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
                    def filled_embedding_rnn_decoder(feed_previous):
                        """The current decoder with a fixed feed_previous parameter."""
                        # pylint: disable=cell-var-from-loop
                        reuse = None if feed_previous else True
                        vs = variable_scope.get_variable_scope()
                        with variable_scope.variable_scope(vs, reuse=reuse):
                            outputs, state = embedding_rnn_decoder(
                                decoder_inputs, encoder_state, decoder_cell,
                                num_decoder_symbols, embedding_size,
                                feed_previous=feed_previous)
                        # pylint: enable=cell-var-from-loop
                        state_list = [state]
                        if nest.is_sequence(state):
                            state_list = nest.flatten(state)
                        return outputs + state_list

                    outputs_and_state = control_flow_ops.cond(
                        feed_previous,
                        lambda: filled_embedding_rnn_decoder(True),
                        lambda: filled_embedding_rnn_decoder(False))
                    # Outputs length is the same as for decoder inputs.
                    outputs_len = len(decoder_inputs)
                    outputs = outputs_and_state[:outputs_len]
                    state_list = outputs_and_state[outputs_len:]
                    state = state_list[0]
                    if nest.is_sequence(encoder_state):
                        state = nest.pack_sequence_as(structure=encoder_state,
                                                      flat_sequence=state_list)
            outputs_dict[name] = outputs
            state_dict[name] = state

    return outputs_dict, state_dict
Beispiel #22
0
def _lstmnet(
        features,  # This is batch_features from input_fn
        labels,  # This is batch_labels from input_fn
        mode,  # An instance of tf.estimator.ModeKeys
        params,
        is_test):

    with tf.variable_scope('EncoderNet') as scope:
        if is_test:
            scope.reuse_variables()

        if (mode == tf.estimator.ModeKeys.TRAIN and not is_test):
            # Train graph
            pkeep = params['pkeep']
        else:
            # Test or inference graph
            pkeep = 1.0

        x = tf.feature_column.input_layer(
            features, feature_columns=params['feature_columns'])
        X = tf.reshape(x,
                       shape=[
                           x.get_shape()[0], params['sequence_length'],
                           params['dimension']
                       ])
        X = tf.identity(X, name='X')
        # X: [ BATCH_SIZE, SEQUENCE_LENGTH, DIMENSION]
        if labels is not None:
            Labels = tf.reshape(labels,
                                shape=[
                                    x.get_shape()[0],
                                    params['sequence_length'],
                                    params['dimension']
                                ])
        else:
            Labels = None
        encoder_Hin = params['encoder_Hin']
        # encoder_Hin: [ BATCH_SIZE, ENCODER_INTERNALSIZE * ENCODER_NLAYERS]
        seqlen = tf.Variable(params['sequence_length'], name='seqlen')
        seqlen = tf.reshape(seqlen, shape=[1])
        seqdescr = tf.tile(seqlen, multiples=[x.get_shape()[0]])
        # seqdescr: [ BATCHSIZE ]
        inital_time_sample = params['decoder_inital_time_sample']
        # inital_time_sample: [ BATCH_SIZE, DIMENSION ]

        encoder_cells = [
            rnn.GRUBlockCell(params['encoder_hidden_layer_size'])
            for _ in range(params['encoder_hidden_layer_depth'])
        ]
        # "naive dropout" implementation
        encoder_dropcells = [
            rnn.DropoutWrapper(cell, input_keep_prob=pkeep)
            for cell in encoder_cells
        ]
        encoder_multicell = rnn.MultiRNNCell(encoder_dropcells,
                                             state_is_tuple=False)
        # Input wrapper to keep symmetry with decoder
        encoder_multicell = rnn.InputProjectionWrapper(
            encoder_multicell,
            num_proj=params['bottleneck_size'],
            activation=None)
        # dropout for the softmax layer
        # No dropout in bottleneck layer!
        # encoder_multicell = rnn.DropoutWrapper(encoder_multicell, output_keep_prob=pkeep)

        encoded_Yr, encoded_H = tf.nn.dynamic_rnn(
            encoder_multicell,
            X,
            dtype=tf.float32,
            initial_state=encoder_Hin,
            scope='EncoderNet',
            parallel_iterations=params['parallel_iters'])
        encoded_H = tf.identity(encoded_H,
                                name='encoded_H')  # just to give it a name
        encoded_Yr = tf.identity(encoded_Yr, name='endoded_Yr')
        # encoder_Yr: [ BATCH_SIZE, SEQUENCE_LENGTHLEN, ENCODER_INTERNALSIZE ]
        # encoder_H:  [ BATCH_SIZE, ENCODER_INTERNALSIZE * ENCODER_NLAYERS ] # this is the last state in the sequence

        encoded_V = tf.reshape(encoded_H, [x.get_shape()[0], -1])
        # encoded_V: [ BATCH_SIZE, BOTTLENECK_SIZE ]

    with tf.variable_scope('NetDecoder') as scope:
        if is_test:
            scope.reuse_variables()

        if (mode == tf.estimator.ModeKeys.TRAIN and not is_test):
            pkeep = params['pkeep']
        else:
            pkeep = 1.0

        decoder_Hin = encoded_H
        # decoder_Hin: [ BATCH_SIZE, DECODER_INTERNALSIZE * DECODER_NLAYERS]

        decoder_cells = [
            rnn.GRUBlockCell(params['decoder_hidden_layer_size'])
            for _ in range(params['decoder_hidden_layer_depth'])
        ]
        # "naive dropout" implementation
        decoder_dropcells = [
            rnn.DropoutWrapper(cell, input_keep_prob=pkeep)
            for cell in decoder_cells
        ]
        decoder_multicell = rnn.MultiRNNCell(decoder_dropcells,
                                             state_is_tuple=False)
        # dropout for the softmax layer
        decoder_multicell = rnn.DropoutWrapper(decoder_multicell,
                                               output_keep_prob=pkeep)
        # dense layer to adjust dimensions
        decoder_multicell = rnn.OutputProjectionWrapper(decoder_multicell,
                                                        params['dimension'],
                                                        activation=None)

        custom_Helper = create_fixed_len_numeric_training_helper(
            inital_time_sample, params['sequence_length'], X.dtype)
        #helper = tf.contrib.seq2seq.TrainingHelper(inputs=Labels,
        #                                           sequence_length=seqdescr,
        #                                           time_major=False)
        decoder = seq2seq.BasicDecoder(cell=decoder_multicell,
                                       helper=custom_Helper,
                                       initial_state=decoder_Hin)
        decoded_Yr, decoded_H, _ = tf.contrib.seq2seq.dynamic_decode(
            decoder=decoder,
            output_time_major=False,
            impute_finished=False,
            maximum_iterations=None,
            parallel_iterations=params['parallel_iters'])

        decoded_Yr = decoded_Yr.rnn_output
        print('decoded_Yr')
        print(decoded_Yr)
        decoded_Yr.set_shape([
            decoded_Yr.get_shape()[0], params['sequence_length'],
            decoded_Yr.get_shape()[2]
        ])
        print(decoded_Yr)
        decoded_H = tf.identity(decoded_H, name='decoded_H')
        decoded_Yr = tf.identity(decoded_Yr, name='decoded_Yr')
        # decoder_Yr: [ BATCH_SIZE, SEQUENCE_LENGTHLEN, DIMENSION ]
        # decoder_H:  [ BATCH_SIZE, DECODER_INTERNALSIZE * DECODER_NLAYERS ] # this is the last state in the sequence

    return decoded_Yr, encoded_V  # = encoded_H reshaped
def lstm_sentence_embedding(features, labels, mode, params):
    '''
    :param features: dict of sentence features with shape (batch_size, max_words, dim_of_word)
    features['seq1'] return batch of query sentence
    features['seq2'] return batch of positive response sentence
    features['seq3'] return batch of negative response sentence
    :param labels: nothing
    :param mode:
    :param params:
    :return:
    '''
    print('CURRENT MODE: %s' % mode.upper())

    M = params['M']  # a constant for computed with loss
    input_keep_prob = params['input_keep_prob']
    output_keep_prob = params['output_keep_prob']
    n_lstm_units = 100  # number of hidden units

    # create a LSTM cell (only 1 cell but train both query, pos_response, neg_response)
    with tf.variable_scope("emb_cell"):
        cell = rnn.LSTMCell(num_units=n_lstm_units, activation=tf.nn.softmax)
        if mode == ModeKeys.TRAIN:
            cell = rnn.DropoutWrapper(cell=cell, input_keep_prob=input_keep_prob,
                                      output_keep_prob=output_keep_prob)
        projection_cell = rnn.OutputProjectionWrapper(cell=cell, output_size=lstm_emb_size, activation=tf.nn.softmax)


    def lstm_embed_sentence(x):
        # (outputs, final_state) is returned from tf.nn.dynamic_rnn()
        #     |         └→ final_state = (c_state, h_state) final
        #     └→ outputs is an collection of all outputs in every step emitted
        #        which shape = (batch, time_step, n_output_size)
        # but in this project, we care only outputs
        outputs, _ = tf.nn.dynamic_rnn(cell=projection_cell, inputs=x, time_major=False, dtype=tf.float32)

        # transpose (batch, time_step, n_output_size) -> (time_step, batch, n_output_size)
        #   └→ unpack to list [(batch, outputs)..] * steps
        outputs = tf.transpose(outputs, [1, 0, 2])

        # get the last output from last time_step only.
        # shape = (batch, n_output_size)
        outputs = outputs[-1]

        # assume that this outputs is a embed_vector
        return outputs

    def cosine_similarity(vec1, vec2):
        '''
        Calculate cosine_similarity of each sample
        by A•B / (norm(A) * norm(B))
        :param vec1: batch of vector1
        :param vec2: batch of vector2
        :return:
        '''

        # calculate (norm(A) * norm(B))
        # output.shape = [n_sample, ]
        vec_norm = tf.norm(vec1, axis=1) * tf.norm(vec2, axis=1)

        # multiply sub_vec vs sub_vec.
        # output.shape = [n_sample , emb_dim]
        mul = tf.multiply(vec1, vec2)

        # sum values in emb_dim for each sample so output.shape = [n_sample, ]
        reduce_sum = tf.reduce_sum(mul, axis=1)

        # calculate cosine similarity.
        # output.shape = [n_sample, ]
        cosine_sim = reduce_sum / vec_norm

        return cosine_sim

    loss = None
    train_op = None

    # every mode must push seq1 be one of features dict
    seq1 = features[QUERY_KEY]

    # Calculate Loss (for TRAIN, EVAL modes)
    if mode != ModeKeys.INFER:
        seq2 = features[POS_RESP_KEY]  # get a pos_response
        seq3 = features[NEG_RESP_KEY]  # get a neg_response

        # get embedded vector: output.shape = [n_sample , emb_dim]
        vec1 = lstm_embed_sentence(seq1)  # query
        vec2 = lstm_embed_sentence(seq2)  # pos_response
        vec3 = lstm_embed_sentence(seq3)  # neg_response

        # calculate cosine similarity of each vec pairs, output.shape = [n_sample, ]
        cosine_sim_pos = cosine_similarity(vec1, vec2)  # need a large value
        cosine_sim_neg = cosine_similarity(vec1, vec3)  # need a tiny value

        # LOSS
        # calculate loss of each pair pos_neg. output.shape = [n_sample,]
        losses = tf.maximum(0., M - cosine_sim_pos + cosine_sim_neg)  # << too small too good

        # final_loss = sum all loss. and get output be scalar
        loss = tf.reduce_mean(losses)

    # Configure the Training Optimizer (for TRAIN modes)
    if mode == ModeKeys.TRAIN:
        # configuration the training Op
        train_op = tf.contrib.layers.optimize_loss(
            loss=loss,
            global_step=tf.contrib.framework.get_global_step(),
            optimizer=tf.train.AdamOptimizer,
            learning_rate=params['learning_rate'],
            summaries=[
                'learning_rate',
                'loss',
                "gradients",
                "gradient_norm",
            ]
        )

    # Generate Predictions which is a embedding of given sentence
    predictions = {}
    if mode == ModeKeys.INFER:
        predictions = {'emb_vec': lstm_embed_sentence(seq1)}

    # Return a ModelFnOps object
    return ModelFnOps(predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=None, mode=mode)
def _convlstmnet(
        features,  # This is batch_features from input_fn
        labels,  # This is batch_labels from input_fn
        mode,  # An instance of tf.estimator.ModeKeys
        params,
        is_test):

    with tf.variable_scope('EncoderNet') as scope:
        if is_test:
            scope.reuse_variables()

        if (mode == tf.estimator.ModeKeys.TRAIN and not is_test):
            pkeep = params['pkeep']
        else:
            pkeep = 1.0

        x = tf.feature_column.input_layer(
            features, feature_columns=params['feature_columns'])
        X = tf.reshape(x,
                       shape=[
                           x.get_shape()[0], params['sequence_length'],
                           params['dimension'], 1
                       ])
        # X: [ BATCH_SIZE, SEQUENCE_LENGTH, DIMENSION, 1 ]
        print(X)

        # Convolutional Layer 1
        conv1 = tf.layers.conv2d(inputs=X,
                                 filters=6,
                                 kernel_size=[5, 1],
                                 padding="same",
                                 activation=tf.nn.relu)
        # conv1: [ BATCH_SIZE, SEQUENCE_LENGTH, DIMENSION, 12 ]
        print(conv1)

        # Conv Layer 2 with some stride
        conv2 = tf.layers.conv2d(inputs=conv1,
                                 filters=10,
                                 kernel_size=[5, 1],
                                 padding="same",
                                 strides=(2, 1),
                                 activation=tf.nn.relu)
        # conv2: [ BATCH_SIZE, SEQUENCE_LENGTH/2, DIMENSION, 24 ]
        print(conv2)

        # Conv Layer 3 with big filter size and stride
        conv3 = tf.layers.conv2d(inputs=conv2,
                                 filters=15,
                                 kernel_size=[8, 1],
                                 padding="same",
                                 strides=(4, 1),
                                 activation=tf.nn.relu)
        # last: [ BATCH_SIZE , SEQUENCE_LENGTH/(2*8), DIMENSION, 48 ]
        print(conv3)

        # flatten:
        conv3_flat = tf.reshape(
            conv3, [conv3.get_shape()[0], 7 * params['dimension'] * 15])
        dense = tf.layers.dense(inputs=conv3_flat,
                                units=128,
                                activation=tf.nn.relu)
        dropout = tf.layers.dropout(
            inputs=dense,
            rate=params['pkeep'],
            training=mode == tf.estimator.ModeKeys.TRAIN)

        # Last layer to evaluate INTERNALSIZE LSTM output to bottleneck representation
        bottleneck = layers.fully_connected(dropout,
                                            params['bottleneck_size'],
                                            activation_fn=tf.nn.relu)
        encoded_V = bottleneck
        # bottleneck: [ BATCH_SIZE, BOTTLENECK_SIZE ]

    with tf.variable_scope('NetDecoder') as scope:
        if is_test:
            scope.reuse_variables()

        if (mode == tf.estimator.ModeKeys.TRAIN and not is_test):
            pkeep = params['pkeep']
        else:
            pkeep = 1.0

        decoder_Hin = params['decoder_Hin']
        # decoder_Hin: [ BATCH_SIZE, DECODER_INTERNALSIZE * DECODER_NLAYERS]

        # tile bottleneck layer
        tiled_bottleneck = tf.tile(tf.expand_dims(bottleneck, axis=1),
                                   multiples=[1, params['sequence_length'], 1])
        # bottleneck_tiled: [ BATCH_SIZE, SEQUENCE_LENGTH, BOTTLENECK_SIZE ]

        decoder_cells = [
            rnn.GRUBlockCell(params['decoder_hidden_layer_size'])
            for _ in range(params['decoder_hidden_layer_depth'])
        ]
        # "naive dropout" implementation
        decoder_dropcells = [
            rnn.DropoutWrapper(cell, input_keep_prob=pkeep)
            for cell in decoder_cells
        ]
        decoder_multicell = rnn.MultiRNNCell(decoder_dropcells,
                                             state_is_tuple=False)
        # dropout for the softmax layer
        decoder_multicell = rnn.DropoutWrapper(decoder_multicell,
                                               output_keep_prob=pkeep)
        # dense layer to adjust dimensions
        decoder_multicell = rnn.OutputProjectionWrapper(
            decoder_multicell, params['dimension'])

        decoder_Yr, decoder_H = tf.nn.dynamic_rnn(
            decoder_multicell,
            tiled_bottleneck,
            dtype=tf.float32,
            initial_state=decoder_Hin,
            scope='NetDecoder',
            parallel_iterations=params['parallel_iters'])
        decoder_H = tf.identity(decoder_H,
                                name='decoder_H')  # just to give it a name
        # decoder_Yr: [ BATCH_SIZE, SEQUENCE_LENGTHLEN, DIMENSION ]
        # decoder_H:  [ BATCH_SIZE, DECODER_INTERNALSIZE * DECODER_NLAYERS ] # this is the last state in the sequence

    return decoder_Yr, encoded_V
Beispiel #25
0
    def _model(self):
        graph = tf.Graph()
        with graph.as_default():
            embedding = tf.Variable(np.zeros(
                shape=[self.num_words, self.embedding_size], dtype=np.float32),
                                    trainable=False,
                                    name='embedding')  # 词向量
            lr = tf.placeholder(tf.float32, [], name='learning_rate')
            # 输入数据
            x_input = tf.placeholder(tf.int32, [None, None],
                                     name='x_input')  # 输入数据X
            x_sequence_length = tf.placeholder(tf.int32, [None],
                                               name='x_length')  # 输入数据每一条的长度
            x_embedding = tf.nn.embedding_lookup(embedding,
                                                 x_input)  # 将输入的one-hot编码转换成向量
            y_input = tf.placeholder(tf.int32, [None, None],
                                     name='y_input')  # 输入数据Y
            y_sequence_length = tf.placeholder(tf.int32, [None],
                                               name='y_length')  # 每一个Y的长度
            y_embedding = tf.nn.embedding_lookup(embedding, y_input)  # 对Y向量化
            batch_size = tf.placeholder(tf.int32, [], name='batch_size')
            # batch_size = tf.shape(x_input)[0]
            # 使用gru代替LSTM, 4层cell堆叠
            encoder_cell = rnn.MultiRNNCell(
                [rnn.GRUCell(128, activation=tf.tanh) for _ in range(4)])
            decoder_cell = rnn.MultiRNNCell(
                [rnn.GRUCell(128, activation=tf.tanh) for _ in range(4)])
            # 计算encoder
            output, encoder_state = tf.nn.dynamic_rnn(
                cell=encoder_cell,
                inputs=x_embedding,
                initial_state=encoder_cell.zero_state(batch_size, tf.float32),
                sequence_length=x_sequence_length)

            attention_mechanism = seq2seq.BahdanauAttention(
                128, output, x_sequence_length)
            attention_cell = seq2seq.AttentionWrapper(decoder_cell,
                                                      attention_mechanism)
            decoder_cell = rnn.OutputProjectionWrapper(attention_cell,
                                                       128,
                                                       activation=tf.tanh)
            encoder_state = decoder_cell.zero_state(
                batch_size, tf.float32).clone(cell_state=encoder_state)

            output_layer = tf.layers.Dense(
                self.num_words,
                kernel_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                   stddev=0.1))

            with tf.variable_scope(tf.get_variable_scope(),
                                   reuse=tf.AUTO_REUSE):
                # 定义training decoder
                training_helper = seq2seq.TrainingHelper(
                    inputs=y_embedding, sequence_length=y_sequence_length)
                training_decoder = seq2seq.BasicDecoder(
                    decoder_cell, training_helper, encoder_state, output_layer)
                # impute_finish 标记为True时,序列读入<eos>后不再进行计算,保持state不变并且输出全0
                training_output, _, _ = seq2seq.dynamic_decode(
                    training_decoder,
                    # 加上<GO>和<EOS>
                    maximum_iterations=self.max_sentence_length + 2,
                    impute_finished=True)

                # predict decoder
                predict_helper = seq2seq.GreedyEmbeddingHelper(
                    embedding, tf.fill([batch_size], self.word2index['GO']),
                    self.word2index['EOS'])
                predict_decoder = seq2seq.BasicDecoder(decoder_cell,
                                                       predict_helper,
                                                       encoder_state,
                                                       output_layer)
                predict_output, _, _ = seq2seq.dynamic_decode(
                    predict_decoder,
                    maximum_iterations=self.max_sentence_length + 2,
                    impute_finished=True)

            # loss function
            training_logits = tf.identity(training_output.rnn_output,
                                          name='training_logits')
            predicting_logits = tf.identity(predict_output.rnn_output,
                                            name='predicting')

            masks = tf.sequence_mask(y_sequence_length,
                                     dtype=tf.float32,
                                     name='mask')

            with tf.variable_scope('optimization'):
                loss = seq2seq.sequence_loss(training_logits, y_input, masks)
                optimizer = tf.train.AdamOptimizer(lr)
                gradients = optimizer.compute_gradients(loss)
                capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var)
                                    for grad, var in gradients
                                    if grad is not None]
                train_op = optimizer.apply_gradients(capped_gradients)

        return graph, loss, train_op, predicting_logits
Beispiel #26
0
sess = tf.InteractiveSession()

lstm_size = 300
str_len = 50
batch_size = 200
learning_rate = 0.001

x = tf.placeholder(tf.float32, [None, None, num_chars], name='x')
y = tf.placeholder(tf.float32, [None, None, num_chars], name='y')

num_cells = 2

## lstm:
cells = [rnn.BasicLSTMCell(lstm_size) for _ in range(num_cells)]
multicell = rnn.MultiRNNCell(cells)
projection = rnn.OutputProjectionWrapper(multicell, num_chars)

# outputs for training:
rnn_outputs, final_state = tf.nn.dynamic_rnn(projection, x, dtype=tf.float32)

xe = tf.nn.softmax_cross_entropy_with_logits(logits=rnn_outputs, labels=y)
total_loss = tf.reduce_mean(xe)

train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)

# outputs for sequential text generation:
seq_init = projection.zero_state(1, dtype=tf.float32)
seq_len = tf.placeholder(dtype=tf.int32, name='seq_len')
seq_output, seq_state = tf.nn.dynamic_rnn(projection,
                                          x,
                                          initial_state=seq_init,