def RNN(x, weights, biases, dropout):
    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, timesteps, n_input)
    # Required shape: 'timesteps' tensors list of shape (batch_size, n_input)

    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
    x = tf.unstack(x, timesteps, 1)

    #lstm_fw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
    # Backward direction cell
    #lstm_bw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
    ####################################New Code
    cell =tf.nn.rnn_cell.MultiRNNCell([IndRNNCell(num_hidden, recurrent_max_abs=recurrent_max),
                     IndRNNCell(num_hidden, recurrent_max_abs=recurrent_max),
                     IndRNNCell(num_hidden, recurrent_max_abs=recurrent_max)])
    #lstm_fw_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_rnn_cell(num_hidden, dropout = 1-dropout) for _ in range(num_layers)], state_is_tuple = True)
    #lstm_bw_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_rnn_cell(num_hidden, dropout = 1-dropout) for _ in range(num_layers)], state_is_tuple = True)
    # Get lstm cell output
    #try:
    outputs, state = tf.nn.static_rnn(cell, x, dtype=tf.float32)
    #outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
    #                                          dtype=tf.float32)

    ######
    # lstm_cell2 = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
    # dropout2  = tf.nn.rnn_cell.DropoutWrapper(lstm_cell2, output_keep_prob=0.35)
    # dropout   = tf.nn.rnn_cell.DropoutWrapper
    # Get lstm cell output
    #outputs, states = rnn.static_rnn(dropoutOut, x, dtype=tf.float32)
    #sum = tf.reduce_mean(outputs,axis=0)#.reduce_sum(outputs, axis=1)
    # Linear activation, using rnn inner loop last output
    #act = tf.matmul(sum, weights['out']) + biases['out']
    act = tf.matmul(outputs[-1], weights['out']) + biases['out']
    tf.summary.histogram("activations", act)
    return act
예제 #2
0
def indRNN_model(feed_in, hidden_units = 128):
    from ind_rnn_cell import IndRNNCell
    with tf.variable_scope('indRNN-layer'):
        batch_size, seq_length, num_features = feed_in.get_shape().as_list()
        TIME_STEPS = seq_length
        input_init = tf.random_uniform_initializer(-0.001, 0.001)
        LAST_LAYER_LOWER_BOUND = pow(0.5, 1 / TIME_STEPS)
        # Init only the last layer's recurrent weights around 1
        recurrent_init_lower_0 = 0
        recurrent_init_lower_1 = LAST_LAYER_LOWER_BOUND
        # Regulate each neuron's recurrent weight as recommended in the paper
        RECURRENT_MAX = pow(2, 1 / TIME_STEPS)

        recurrent_init_0 = tf.random_uniform_initializer(recurrent_init_lower_0, RECURRENT_MAX)
        recurrent_init_1 = tf.random_uniform_initializer(recurrent_init_lower_1, RECURRENT_MAX)

        indRnnCells = tf.contrib.rnn.MultiRNNCell([IndRNNCell(hidden_units,
                                                              recurrent_max_abs=RECURRENT_MAX,
                                                              input_kernel_initializer=input_init,
                                                              recurrent_kernel_initializer=recurrent_init_0
                                                              ),
                                                    IndRNNCell(hidden_units,
                                                               recurrent_max_abs=RECURRENT_MAX,
                                                               input_kernel_initializer=input_init,
                                                               recurrent_kernel_initializer=recurrent_init_1
                                                               )])
        outputs, _ = tf.nn.dynamic_rnn(indRnnCells, feed_in, dtype=tf.float32)
        outputs = tf.reshape(outputs, (batch_size * seq_length, hidden_units))
    return outputs
예제 #3
0
    def __init__(self, features, recurrent_max_abs):
        super(AttentionCell, self).__init__()

        self._in_channels = features.get_shape()[2].value  # DICT_SIZE
        self._features = features
        self._bias = self.add_variable("bias",
                                       shape=[1],
                                       initializer=tf.zeros_initializer())
        self._filt_shape = (-1, 5, self._in_channels, 1)
        self._indrnn = IndRNNCell(self._filt_shape[1] * self._in_channels,
                                  recurrent_max_abs=recurrent_max_abs)
예제 #4
0
def simple_indRNN_model(feed_in, hidden_units = 128):
    from ind_rnn_cell import IndRNNCell
    with tf.variable_scope('indRNN-layer'):
        batch_size, seq_length, num_features = feed_in.get_shape().as_list()
        TIME_STEPS = seq_length
        # Regulate each neuron's recurrent weight as recommended in the paper
        RECURRENT_MAX = pow(2, 1 / TIME_STEPS)
        indRnnCells = tf.contrib.rnn.MultiRNNCell([IndRNNCell(hidden_units, recurrent_max_abs=RECURRENT_MAX),
                                                    IndRNNCell(hidden_units, recurrent_max_abs=RECURRENT_MAX)])
        outputs, _ = tf.nn.dynamic_rnn(indRnnCells, feed_in, dtype=tf.float32)
        outputs = tf.reshape(outputs, (batch_size * seq_length, hidden_units))
    return outputs
예제 #5
0
    def testIndRNNCellBounds(self):
        """Tests cell with recurrent weights exceeding the bounds."""

        with self.test_session() as sess:
            with variable_scope.variable_scope(
                    "root", initializer=init_ops.constant_initializer(1.)):
                x = array_ops.zeros([1, 4])
                m = array_ops.zeros([1, 4])

                # Create the cell with input weights = 1 and constant recurrent weights
                recurrent_init = init_ops.constant_initializer(
                    [-5., -2., 0.1, 5.])
                cell = IndRNNCell(4,
                                  recurrent_min_abs=1.,
                                  recurrent_max_abs=3.,
                                  recurrent_initializer=recurrent_init,
                                  activation=array_ops.identity)
                output, _ = cell(x, m)

                sess.run([variables.global_variables_initializer()])
                res = sess.run(
                    [output], {
                        x.name: np.array([[1., 0., 0., 0.]]),
                        m.name: np.array([[2., 2., 2., 2.]])
                    })
                # Recurrent weights should be clipped to -3, -2, 1, 3
                # (Pre)activations (1*1 + 2*rec_weight) should be -5, -3, 3, 7
                self.assertAllEqual(res[0], [[-5., -3., 3., 7.]])
예제 #6
0
    def testIndRNNCell(self):
        """Tests basic cell functionality"""

        with self.test_session() as sess:
            with variable_scope.variable_scope(
                    "root", initializer=init_ops.constant_initializer(1.)):
                x = array_ops.zeros([1, 4])
                m = array_ops.zeros([1, 4])

                # Create the cell with input weights = 1 and constant recurrent weights
                recurrent_init = init_ops.constant_initializer(
                    [-3., -2., 1., 3.])
                cell = IndRNNCell(4,
                                  recurrent_initializer=recurrent_init,
                                  activation=array_ops.identity)
                output, _ = cell(x, m)

                sess.run([variables.global_variables_initializer()])
                res = sess.run(
                    [output], {
                        x.name: np.array([[1., 0., 0., 0.]]),
                        m.name: np.array([[2., 2., 2., 2.]])
                    })
                # (Pre)activations (1*1 + 2*rec_weight) should be -5, -3, 3, 7
                self.assertAllEqual(res[0], [[-5., -3., 3., 7.]])
예제 #7
0
def main():
  # Placeholders for training data
  inputs_ph = tf.placeholder(tf.float32, shape=(BATCH_SIZE, TIME_STEPS, 2))
  targets_ph = tf.placeholder(tf.float32, shape=BATCH_SIZE)

  # Build the graph
  first_input_init = tf.random_uniform_initializer(-RECURRENT_MAX,
                                                   RECURRENT_MAX)
  first_layer = IndRNNCell(NUM_UNITS, recurrent_max_abs=RECURRENT_MAX,
                           recurrent_kernel_initializer=first_input_init)
  second_layer = IndRNNCell(NUM_UNITS, recurrent_max_abs=RECURRENT_MAX)

  cell = tf.nn.rnn_cell.MultiRNNCell([first_layer, second_layer])
  # cell = tf.nn.rnn_cell.BasicLSTMCell(NUM_UNITS) uncomment this for LSTM runs

  output, state = tf.nn.dynamic_rnn(cell, inputs_ph, dtype=tf.float32)
  last = output[:, -1, :]

  weight = tf.get_variable("softmax_weight", shape=[NUM_UNITS, 1])
  bias = tf.get_variable("softmax_bias", shape=[1],
                         initializer=tf.constant_initializer(0.1))
  prediction = tf.squeeze(tf.matmul(last, weight) + bias)

  loss_op = tf.losses.mean_squared_error(tf.squeeze(targets_ph), prediction)

  global_step = tf.get_variable("global_step", shape=[], trainable=False,
                                initializer=tf.zeros_initializer)
  learning_rate = tf.train.exponential_decay(LEARNING_RATE_INIT, global_step,
                                             LEARNING_RATE_DECAY_STEPS, 0.1,
                                             staircase=True)
  optimizer = tf.train.AdamOptimizer(learning_rate)
  optimize = optimizer.minimize(loss_op, global_step=global_step)

  # Train the model
  with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    step = 0
    while True:
      losses = []
      for _ in range(100):
        # Generate new input data
        inputs, targets = get_batch()
        loss, _ = sess.run([loss_op, optimize],
                           {inputs_ph: inputs, targets_ph: targets})
        losses.append(loss)
        step += 1
      print("Step [x100] {} MSE {}".format(int(step / 100), np.mean(losses)))
예제 #8
0
class AttentionCell(tf.nn.rnn_cell.RNNCell):
    def __init__(self, features, recurrent_max_abs):
        super(AttentionCell, self).__init__()

        self._in_channels = features.get_shape()[2].value  # DICT_SIZE
        self._features = features
        self._bias = self.add_variable("bias",
                                       shape=[1],
                                       initializer=tf.zeros_initializer())
        self._filt_shape = (-1, 5, self._in_channels, 1)
        self._indrnn = IndRNNCell(self._filt_shape[1] * self._in_channels,
                                  recurrent_max_abs=recurrent_max_abs)

    @property
    def state_size(self):
        return self._indrnn.state_size

    @property
    def output_size(self):
        return self._in_channels + 2

    def build(self, inputs_shape):
        self._indrnn.build(inputs_shape)

    def __call__(self, inputs, state, scope=None):
        filt, new_state = self._indrnn(inputs, state, scope)

        filt = tf.reshape(filt, self._filt_shape)
        # filt has shape (B, width, in_channels, out_channels)

        conv = batchwise_conv_2(self._features, filt)
        # conv has shape (B, width, out_channels)

        conv = tf.nn.relu(conv + self._bias)  # (B, width, 1)

        # TODO: try other methods for squashing or normalizing, etc
        attention = tf.nn.softmax(conv, axis=1)  # (B, width, 1)

        output = tf.multiply(attention, conv)  # (B, width, dict_size)
        output = tf.reduce_mean(output, axis=1)  # (B, dict_size)

        output = tf.concat([output, inputs], axis=1)  # (B, dict_size + 2)

        return output, new_state
예제 #9
0
def indrnn_model(first_input_init, inputs_ph):
	"""indrnn模型:搭建两层indrnn模型,每层神经元的数量为NUM_UNITS,两层总参数为:TIME_STEPS*NUM_UNITS+NUM_UNITS*NUM_UNITS+2*NUM_UNITS
	
	"""
	first_layer = IndRNNCell(NUM_UNITS, recurrent_max_abs=RECURRENT_MAX,
	                         recurrent_kernel_initializer=first_input_init)
	
	second_layer = IndRNNCell(NUM_UNITS, recurrent_max_abs=RECURRENT_MAX)
	
	cell = tf.nn.rnn_cell.MultiRNNCell([first_layer, second_layer])
	# cell = tf.nn.rnn_cell.BasicLSTMCell(NUM_UNITS) uncomment this for LSTM runs
	output, state = tf.nn.dynamic_rnn(cell, inputs_ph, dtype=tf.float32)
	last = output[:, -1, :]
	
	weight = tf.get_variable("softmax_weight", shape=[NUM_UNITS, 1])
	bias = tf.get_variable("softmax_bias", shape=[1],
	                       initializer=tf.constant_initializer(0.1))
	prediction = tf.squeeze(tf.matmul(last, weight) + bias)
	return prediction
예제 #10
0
 def _get_lstm_cell(self, config, is_training):
   #if config.rnn_mode == BASIC:
   #  return tf.contrib.rnn.BasicLSTMCell(
   #      config.hidden_size, forget_bias=0.0, state_is_tuple=True,
   #      reuse=not is_training)
   #if config.rnn_mode == BLOCK:
   #  return tf.contrib.rnn.LSTMBlockCell(
   #      config.hidden_size, forget_bias=0.0)
   #if config.rnn_mode == INDRNN:
   return IndRNNCell(config.hidden_size, recurrent_max_abs=RECURRENT_MAX)
   raise ValueError("rnn_mode %s not supported" % config.rnn_mode)
예제 #11
0
def main():
    # Placeholders for training data
    inputs_ph = tf.placeholder(tf.float32, shape=(BATCH_SIZE, TIME_STEPS, 2))
    targets_ph = tf.placeholder(tf.float32, shape=BATCH_SIZE)

    # Build the graph
    cell = MultiRNNCell([
        IndRNNCell(NUM_UNITS, RECURRENT_MAX),
        IndRNNCell(NUM_UNITS, RECURRENT_MAX)
    ])

    output, state = tf.nn.dynamic_rnn(cell, inputs_ph, dtype=tf.float32)
    last = output[:, -1, :]

    weight = tf.Variable(tf.truncated_normal([NUM_UNITS, 1], stddev=0.01))
    bias = tf.Variable(tf.constant(0.1, shape=[1]))
    prediction = tf.squeeze(tf.matmul(last, weight) + bias)

    loss_op = tf.losses.mean_squared_error(tf.squeeze(targets_ph), prediction)
    optimize = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss_op)

    # Train the model
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        step = 0
        while True:
            losses = []
            for _ in range(100):
                # Generate new input data
                inputs, targets = get_batch()
                loss, _ = sess.run([loss_op, optimize], {
                    inputs_ph: inputs,
                    targets_ph: targets
                })
                losses.append(loss)
                step += 1

            print("Step [x100] {} MSE {}".format(int(step / 100),
                                                 np.mean(losses)))
예제 #12
0
class IndCatCell(tf.nn.rnn_cell.RNNCell):
    def __init__(self, num_units, recurrent_max_abs):
        super(IndCatCell, self).__init__()
        self._indrnn = IndRNNCell(
            num_units,
            recurrent_max_abs=recurrent_max_abs)

    @property
    def state_size(self):
        return self._indrnn.state_size

    @property
    def output_size(self):
        return self._indrnn.output_size

    def build(self, inputs_shape):
        self._indrnn.build(inputs_shape)

    def __call__(self, inputs, state, scope=None):
        out, state = self._indrnn(inputs, state, scope)
        pad_size = self._indrnn.output_size - tf.shape(inputs)[1]
        out = tf.pad(inputs, [[0, 0], [0, pad_size]]) # residual connection
        return out, state
예제 #13
0
def build_rnn(inputs, phase):
  # Build the RNN with sequence-wise batch normalization. We cannot use
  # MultiRNNCell here, because we have to add batch normalization layers after
  # each RNN layer. Thus, we need to unroll each RNN layer separately.
  layer_input = inputs
  layer_output = None
  input_init = tf.random_uniform_initializer(-0.001, 0.001)
  for layer in range(1, NUM_LAYERS + 1):
    # Init only the last layer's recurrent weights around 1
    recurrent_init_lower = 0 if layer < NUM_LAYERS else LAST_LAYER_LOWER_BOUND
    recurrent_init = tf.random_uniform_initializer(recurrent_init_lower,
                                                   RECURRENT_MAX)
    # Build the layer
    cell = IndRNNCell(NUM_UNITS,
                      recurrent_max_abs=RECURRENT_MAX,
                      input_kernel_initializer=input_init,
                      recurrent_kernel_initializer=recurrent_init)
    # Unroll the layer
    layer_output, _ = tf.nn.dynamic_rnn(cell, layer_input,
                                        dtype=tf.float32,
                                        scope="rnn%d" % layer)

    is_training = tf.logical_or(tf.equal(phase, PHASE_TRAIN),
                                tf.equal(phase, PHASE_BN_STATS))
    layer_output = tf.layers.batch_normalization(layer_output,
                                                 training=is_training,
                                                 momentum=0)

    # Tie the BN population statistics updates to the layer_output op only, when
    # we are in the PHASE_BN_STATS phase
    def update_population_stats():
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
      with tf.control_dependencies(update_ops):
        return tf.identity(layer_output)

    layer_output = tf.cond(tf.equal(phase, PHASE_BN_STATS),
                           true_fn=update_population_stats,
                           false_fn=lambda: layer_output)

    layer_input = layer_output

  # Return the output of the last layer in the last time step
  # layer_output has shape [?, TIME_STEPS, NUM_UNITS]
  return layer_output[:, -1, :]
예제 #14
0
 def testIndRNNCell(self):
     with self.test_session() as sess:
         with variable_scope.variable_scope(
                 "root", initializer=init_ops.constant_initializer(1.)):
             x = array_ops.zeros([1, 4])
             m = array_ops.zeros([1, 4])
             recurrent_init = init_ops.constant_initializer(
                 [-5., -2., 0.1, 5.])
             cell = IndRNNCell(4,
                               recurrent_min_abs=1.,
                               recurrent_max_abs=3.,
                               recurrent_initializer=recurrent_init)
             output, _ = cell(x, m)
             sess.run([variables_lib.global_variables_initializer()])
             res = sess.run(
                 [output], {
                     x.name: np.array([[1., 1., 1., 1.]]),
                     m.name: np.array([[2., 2., 2., 2.]])
                 })
             # Recurrent Weights u should be -3, -2, 1, 3
             # Pre-activations (4 + 2*u) should be -2, 0, 6, 10
             self.assertAllEqual(res[0], [[0., 0., 6., 10.]])
예제 #15
0
파일: model.py 프로젝트: NBGroupp/NewModel
    def __init__(self, is_training, batch_size):
        """
        :param is_training: is or not training, True/False
        :param batch_size: the size of one batch
        :param num_steps: the length of one lstm
        """
        # 定义网络参数
        self.learning_rate = tf.Variable(float(LEARNING_RATE),
                                         trainable=False,
                                         dtype=tf.float32)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * LEARNING_RATE_DECAY_FACTOR)
        self.global_step = 0
        self.global_epoch = 0
        self.batch_size = batch_size

        # 定义输入层,其维度是batch_size * num_steps
        self.pre_input = tf.placeholder(tf.int32, [batch_size, None])
        self.pre_input_seq_length = tf.placeholder(tf.int32, [
            batch_size,
        ])
        self.fol_input = tf.placeholder(tf.int32, [batch_size, None])
        self.fol_input_seq_length = tf.placeholder(tf.int32, [
            batch_size,
        ])

        self.candidate_words_input = tf.placeholder(tf.int32,
                                                    [batch_size, None])
        self.is_candidate = tf.placeholder(tf.float32, [batch_size, None])

        self.one_hot_labels = tf.placeholder(tf.float32, [batch_size, None])

        # 定义预期输出,它的维度和上面维度相同
        self.targets = tf.placeholder(tf.int32, [
            batch_size,
        ])
        embedding = tf.get_variable("embedding",
                                    [VOCAB_SIZE, HIDDEN_SIZE])  # embedding矩阵
        self.embedding = embedding

        input_init = tf.random_uniform_initializer(-0.001, 0.001)
        recurrent_init = tf.random_uniform_initializer(0, RECURRENT_MAX)

        # pre_context_model
        with tf.variable_scope('Pre') as scope:
            pre_cell = IndRNNCell(num_units=PRE_CONTEXT_HIDDEN_SIZE,
                                  recurrent_max_abs=RECURRENT_MAX,
                                  input_kernel_initializer=input_init,
                                  recurrent_kernel_initializer=recurrent_init)
            if is_training:
                pre_cell = tf.contrib.rnn.DropoutWrapper(
                    pre_cell, output_keep_prob=KEEP_PROB)
            pre_lstm_cell = tf.contrib.rnn.MultiRNNCell([pre_cell] *
                                                        PRE_CONTEXT_NUM_LAYERS,
                                                        state_is_tuple=True)

            pre_input = tf.nn.embedding_lookup(
                embedding, self.pre_input)  # 将原本单词ID转为单词向量。
            if is_training:
                pre_input = tf.nn.dropout(pre_input, KEEP_PROB)
            pre_outputs, pre_states = tf.nn.dynamic_rnn(
                pre_lstm_cell,
                pre_input,
                sequence_length=self.pre_input_seq_length,
                dtype=tf.float32)
            pre_outputs = pre_states
            self.pre_final_state = pre_states  # 上文LSTM的最终状态

        # fol_context_model
        with tf.variable_scope('Fol') as scope:
            fol_cell = IndRNNCell(num_units=PRE_CONTEXT_HIDDEN_SIZE,
                                  recurrent_max_abs=RECURRENT_MAX,
                                  input_kernel_initializer=input_init,
                                  recurrent_kernel_initializer=recurrent_init)
            if is_training:
                fol_cell = tf.contrib.rnn.DropoutWrapper(
                    fol_cell, output_keep_prob=KEEP_PROB)
            fol_lstm_cell = tf.contrib.rnn.MultiRNNCell([fol_cell] *
                                                        FOL_CONTEXT_NUM_LAYERS,
                                                        state_is_tuple=True)

            fol_input = tf.nn.embedding_lookup(
                embedding, self.fol_input)  # 将原本单词ID转为单词向量。
            if is_training:
                fol_input = tf.nn.dropout(fol_input, KEEP_PROB)
            fol_outputs, fol_states = tf.nn.dynamic_rnn(
                fol_lstm_cell,
                fol_input,
                sequence_length=self.fol_input_seq_length,
                dtype=tf.float32)
            fol_outputs = fol_states
            self.fol_final_state = fol_states  # 下文lstm的最终状态

        # 简单拼接
        concat_output = tf.concat([pre_outputs[-1], fol_outputs[-1]], axis=-1)

        # 双线性attention
        with tf.variable_scope('bilinear'):  # Bilinear Layer (Attention Step)
            candidate_words_input_vector = tf.nn.embedding_lookup(
                embedding, self.candidate_words_input)
            bilinear_weight = tf.get_variable("bilinear_weight",
                                              [2 * HIDDEN_SIZE, HIDDEN_SIZE])
            '''计算候选词与上下文的匹配度'''
            M = candidate_words_input_vector * tf.expand_dims(
                tf.matmul(concat_output, bilinear_weight),
                axis=1)  # M = [batch_size,candi_num,hidden_size]
            # attention概率(匹配度)
            alpha = tf.nn.softmax(tf.reduce_sum(
                M, axis=2))  # [batch_size,candi_num]

        # 非候选词概率置0
        tmp_prob = alpha * self.is_candidate

        # 重算概率
        self.logits = tmp_prob / tf.expand_dims(
            tf.reduce_sum(tmp_prob, axis=1), axis=1)
        self.logits = tf.clip_by_value(self.logits, 1e-7, 1.0 - 1e-7)

        # 求交叉熵
        loss = -tf.reduce_sum(self.one_hot_labels * tf.log(self.logits),
                              reduction_indices=1)

        # 记录cost
        with tf.variable_scope('cost'):
            self.cost = tf.reduce_mean(loss)
            self.ave_cost = tf.Variable(0.0, trainable=False, dtype=tf.float32)
            self.ave_cost_op = self.ave_cost.assign(
                tf.divide(
                    tf.add(tf.multiply(self.ave_cost, self.global_step),
                           self.cost), self.global_step + 1))
            # global_step从0开始
            tf.summary.scalar('cost', self.cost)
            tf.summary.scalar('ave_cost', self.ave_cost)
        # 只在训练模型时定义反向传播操作。

        # 记录accuracy
        with tf.variable_scope('accuracy'):
            correct_prediction = tf.equal(
                self.targets, tf.cast(tf.argmax(self.logits, -1), tf.int32))
            self.accuracy = tf.reduce_mean(
                tf.cast(correct_prediction, tf.float32))
            self.ave_accuracy = tf.Variable(0.0,
                                            trainable=False,
                                            dtype=tf.float32)
            self.ave_accuracy_op = self.ave_accuracy.assign(
                tf.divide(
                    tf.add(tf.multiply(self.ave_accuracy, self.global_step),
                           self.accuracy), self.global_step + 1))
            # global_step从0开始
            tf.summary.scalar('accuracy', self.accuracy)
            tf.summary.scalar('ave_accuracy', self.ave_accuracy)
            # 只在训练模型时定义反向传播操作。
        # 只在训练模型时定义反向传播操作。
        if not is_training: return

        self.train_op = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate).minimize(self.cost)
        # optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate)
        # self.train_op = optimizer.minimize(self.cost)

        self.merged_summary_op = tf.summary.merge_all()  # 收集节点
예제 #16
0
def main():
    inputs_ph = tf.placeholder(tf.int64, shape=(None, None))
    labels_ph = tf.placeholder(tf.int64, shape=(None, None))

    embedding = tf.get_variable("embedding", [vocab_size, NUM_UNITS],
                                dtype=tf.float32)
    inputs = tf.nn.embedding_lookup(embedding, inputs_ph)
    in_training = True
    #if in_training:
    #  inputs = tf.nn.dropout(inputs, 0.75)

    cell = MultiRNNCell([
        IndRNNCell(NUM_UNITS,
                   recurrent_max_abs=RECURRENT_MAX,
                   batch_norm=False,
                   in_training=in_training) for _ in range(NUM_LAYERS)
    ])
    # cell = tf.nn.rnn_cell.BasicLSTMCell(NUM_UNITS) #uncomment this for LSTM runs

    output, state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)
    softmax_w = tf.get_variable("softmax_w", [NUM_UNITS, vocab_size],
                                dtype=tf.float32)
    softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=tf.float32)
    output = tf.reshape(output, [-1, NUM_UNITS])
    logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
    print(logits)
    # Reshape logits to be a 3-D tensor for sequence loss
    logits = tf.reshape(logits, [BATCH_SIZE, -1, vocab_size])

    # Use the contrib sequence loss and average over the batches
    loss = tf.contrib.seq2seq.sequence_loss(logits,
                                            labels_ph,
                                            tf.ones([BATCH_SIZE, 50],
                                                    dtype=tf.float32),
                                            average_across_timesteps=False,
                                            average_across_batch=True)

    # Update the cost
    _cost = tf.reduce_sum(loss)
    _final_state = state
    #########
    if not in_training:
        return

    global_step = tf.get_variable("global_step",
                                  shape=[],
                                  trainable=False,
                                  initializer=tf.zeros_initializer)
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_INIT,
                                               global_step,
                                               LEARNING_RATE_DECAY_STEPS,
                                               0.1,
                                               staircase=True)
    optimizer = tf.train.AdamOptimizer(learning_rate)
    optimize = optimizer.minimize(_cost, global_step=global_step)

    # Train the model

    fout = open('ptb_ind.txt', 'w')
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(NUM_EPOCHS):
            train_per = []
            for iteration in range(ITERATIONS_PER_EPOCH):
                x, y = ptb.train.next_batch(BATCH_SIZE)
                cost, _ = sess.run([_cost, optimize],
                                   feed_dict={
                                       inputs_ph: x,
                                       labels_ph: y
                                   })
                train_per.append(cost)
                if iteration % ITERATIONS_PER_EPOCH == 20:
                    print("%d/%d  %f" % (iteration, ITERATIONS_PER_EPOCH,
                                         np.mean(train_per[-20:])))
                    sys.stdout.flush()

            valid_per = []
            for _ in range(VAL_ITERS):
                x, y = ptb.valid.next_batch()
                cost = sess.run(_cost, feed_dict={inputs_ph: x, labels_ph: y})
                valid_per.append(cost)

            #test_per = []
            #for _ in range(VAL_ITERS):
            #  x, y = ptb.test.next_batch()
            #  cost = sess.run(_cost, feed_dict={inputs_ph: x, labels_ph: y})
            #  test_per.append(cost)

            print("epoch %d, train=%f, valid=%f, test=%f" %
                  (epoch, np.mean(train_per), np.mean(valid_per),
                   np.mean(test_per)))
            fout.write("%d %.4f %.4f %.4f\n" %
                       (epoch, np.mean(train_per), np.mean(valid_per),
                        np.mean(test_per)))
            sys.stdout.flush()
            fout.flush()
예제 #17
0
 def __init__(self, num_units, recurrent_max_abs):
     super(IndCatCell, self).__init__()
     self._indrnn = IndRNNCell(
         num_units,
         recurrent_max_abs=recurrent_max_abs)
예제 #18
0
def main():
    # Placeholders for training data
    print("here")
    sys.stdout.flush()
    inputs_ph = tf.placeholder(tf.float32, shape=(None, TIME_STEPS))
    targets_ph = tf.placeholder(tf.int64, shape=(None))
    inputs_ph1 = tf.expand_dims(inputs_ph, -1)

    in_training = tf.placeholder(tf.bool, shape=[])
    input_init = tf.random_uniform_initializer(-0.001, 0.001)

    cells = []
    for layer in range(1, NUM_LAYERS + 1):
        recurrent_init_lower = 0 if layer < NUM_LAYERS else LAST_LAYER_LOWER_BOUND
        recurrent_init = tf.random_uniform_initializer(recurrent_init_lower,
                                                       RECURRENT_MAX)
        single_cell = IndRNNCell(NUM_UNITS,
                                 recurrent_max_abs=RECURRENT_MAX,
                                 batch_norm=False,
                                 in_training=in_training,
                                 layer_idx=layer - 1)
        cells.append(single_cell)
        #input_initializer=input_init,
        #recurrent_initializer=recurrent_init))
    print("here1")
    sys.stdout.flush()

    # Build the graph
    #cell = tf.nn.rnn_cell.MultiRNNCell([
    cell = MultiRNNCell(cells, BATCH_SIZE)
    # cell = tf.nn.rnn_cell.BasicLSTMCell(NUM_UNITS) #uncomment this for LSTM runs

    output, state = tf.nn.dynamic_rnn(cell, inputs_ph1, dtype=tf.float32)

    #print ( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='rnn/multi_rnn_cell/cell_0'))
    #print ( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='cell_1'))
    #print (tf.global_variables())
    #exit()

    #print ( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='rnn/multi_rnn_cell/cell_1' ))
    #exit()
    #is_training = True
    #output = tf.layers.batch_normalization(output, training=is_training, momentum=0)
    last = output[:, -1, :]

    weight = tf.get_variable("softmax_weight", shape=[NUM_UNITS, OUTPUT_SIZE])
    bias = tf.get_variable("softmax_bias",
                           shape=[1],
                           initializer=tf.constant_initializer(0.1))
    prediction = tf.squeeze(tf.matmul(last, weight) + bias)
    loss_op = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction,
                                                             labels=targets_ph)
    accuracy = tf.reduce_mean(
        tf.cast(tf.equal(tf.argmax(prediction, 1), targets_ph), tf.float32))
    print("here2")
    sys.stdout.flush()

    global_step = tf.get_variable("global_step",
                                  shape=[],
                                  trainable=False,
                                  initializer=tf.zeros_initializer)
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_INIT,
                                               global_step,
                                               LEARNING_RATE_DECAY_STEPS,
                                               0.1,
                                               staircase=True)
    optimizer = tf.train.AdamOptimizer(learning_rate)
    optimize = optimizer.minimize(loss_op, global_step=global_step)

    # Train the model
    np.random.seed(1234)
    perm = np.random.permutation(TIME_STEPS)
    print("here3")
    sys.stdout.flush()
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
    #fout = open('ind_semi_W_ckipnorm.txt', 'w')
    #fout = open('ind_input_init.txt', 'w')
    #fout = open('ind_bn.txt', 'w')
    #fout = open('ind_bn_2init.txt', 'w')
    #fout = open('ind_bn_after.txt', 'w')
    #fout = open('ind_bn3.txt', 'w')
    #fout = open('ind_semi_W_clipl2norm_bn.txt', 'w')
    fout = open('ind_semi_W_clipcrossnorm_bn.txt', 'w')
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                          log_device_placement=False)) as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(NUM_EPOCHS):
            print("epoch:", epoch)
            sys.stdout.flush()
            train_acc = []
            for iteration in range(ITERATIONS_PER_EPOCH):
                x, y = mnist.train.next_batch(BATCH_SIZE)
                loss, _, acc = sess.run([loss_op, optimize, accuracy], {
                    inputs_ph: x[:, perm],
                    targets_ph: y,
                    in_training: False
                })
                train_acc.append(acc)
                print(iteration, ITERATIONS_PER_EPOCH)
                sys.stdout.flush()

            valid_acc = []
            for iteration in range(VAL_ITERS):
                x, y = mnist.validation.next_batch(BATCH_SIZE)
                loss, acc = sess.run([loss_op, accuracy], {
                    inputs_ph: x[:, perm],
                    targets_ph: y,
                    in_training: False
                })
                valid_acc.append(acc)

            test_acc = []
            for iteration in range(TEST_ITERS):
                x, y = mnist.test.next_batch(BATCH_SIZE)
                loss, acc = sess.run([loss_op, accuracy], {
                    inputs_ph: x[:, perm],
                    targets_ph: y,
                    in_training: False
                })
                test_acc.append(acc)

            print("epoch %d, train=%f, valid=%f, test=%f" %
                  (epoch, np.mean(train_acc), np.mean(valid_acc),
                   np.mean(test_acc)))
            fout.write("%d %.4f %.4f %.4f\n" %
                       (epoch, np.mean(train_acc), np.mean(valid_acc),
                        np.mean(test_acc)))
            sys.stdout.flush()
            fout.flush()
def _INDRNNCells(unit_list, time_steps):
    recurrent_max = pow(2, 1 / time_steps)
    return MultiRNNCell([
        IndRNNCell(unit, recurrent_max_abs=recurrent_max) for unit in unit_list
    ],
                        state_is_tuple=True)
예제 #20
0
def main():
    # Placeholders for training data
    inputs_ph = tf.placeholder(tf.float32, shape=(BATCH_SIZE, TIME_STEPS))

    # Build the graph
    cell = tf.nn.rnn_cell.MultiRNNCell([
        tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.ResidualWrapper(
            IndRNNCell(NUM_UNITS, recurrent_max_abs=RECURRENT_MAX), lambda i,
            o: o + tf.pad(i, [[0, 0], [0, tf.shape(o)[1] - tf.shape(i)[1]]])),
                                      output_keep_prob=0.75)
        for _ in range(NUM_LAYERS)
    ])
    #cell = tf.nn.rnn_cell.BasicLSTMCell(NUM_UNITS) uncomment this for LSTM runs

    output, state = tf.nn.dynamic_rnn(cell,
                                      tf.expand_dims(inputs_ph, 2),
                                      dtype=tf.float32)

    logits = output[:, :-1]
    targets = tf.cast((inputs_ph + 1) / 2 * 127, tf.int32)[:, 1:]
    loss_op = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets,
                                                       logits=logits))

    kernels = tf.get_collection("recurrent_kernel")
    penalty = sum(
        tf.reduce_mean(tf.maximum(0.0, (k * (k - RECURRENT_MAX))))
        for k in kernels) / len(kernels)

    summary = tf.summary.merge([
        tf.summary.scalar('loss', loss_op),
        tf.summary.histogram('distribution', tf.nn.softmax(logits)),
        tf.summary.scalar('penalty', penalty)
    ])

    global_step = tf.get_variable("global_step",
                                  shape=[],
                                  trainable=False,
                                  initializer=tf.zeros_initializer)
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_INIT,
                                               global_step,
                                               LEARNING_RATE_DECAY_STEPS,
                                               0.1,
                                               staircase=True)
    optimizer = tf.train.AdamOptimizer(learning_rate)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        optimize = optimizer.minimize(loss_op + 10 * penalty,
                                      global_step=global_step)

    # Train the model
    with tf.Session() as sess:
        train_writer = tf.summary.FileWriter('../train_logs/correlated_noise',
                                             sess.graph)
        sess.run(tf.global_variables_initializer())
        step = 0
        while True:
            losses = []
            for _ in range(100):
                # Generate new input data
                noise = get_batch()
                loss, _, progress = sess.run([loss_op, optimize, summary],
                                             {inputs_ph: noise})
                losses.append(loss)
                train_writer.add_summary(progress, step)
                step += 1
            print("Step {} loss {}".format(int(step), np.mean(losses)))
예제 #21
0
def main():
    # Placeholders for training data
    inputs_ph = tf.placeholder(tf.float32, shape=(BATCH_SIZE, TIME_STEPS, 2))
    targets_ph = tf.placeholder(tf.float32, shape=BATCH_SIZE)

    # Build the graph
    first_input_init = tf.random_uniform_initializer(0, RECURRENT_MAX)
    first_layer = IndRNNCell(2,
                             recurrent_max_abs=RECURRENT_MAX,
                             recurrent_kernel_initializer=first_input_init)
    second_layer = IndRNNCell(2, recurrent_max_abs=RECURRENT_MAX)

    cell = tf.nn.rnn_cell.MultiRNNCell([
        first_layer,
        second_layer,
    ])
    #cell = tf.nn.rnn_cell.BasicLSTMCell(NUM_UNITS) uncomment this for LSTM runs

    output, state = tf.nn.dynamic_rnn(cell, inputs_ph, dtype=tf.float32)
    last = output[:, -1, :]

    last = tf.layers.batch_normalization(
        tf.contrib.layers.fully_connected(last, NUM_UNITS))

    targets_int = tf.cast(targets_ph * 127, tf.int32)
    loss_op = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets_int,
                                                       logits=last))

    kernels = tf.get_collection("recurrent_kernel")
    penalty = sum(
        tf.reduce_mean(tf.maximum(0.0, (k * (k - RECURRENT_MAX))))
        for k in kernels) / len(kernels)

    summary = tf.summary.merge([
        tf.summary.scalar('loss', loss_op),
        tf.summary.histogram('distribution', tf.nn.softmax(last)),
        tf.summary.scalar('penalty', penalty)
    ])

    global_step = tf.get_variable("global_step",
                                  shape=[],
                                  trainable=False,
                                  initializer=tf.zeros_initializer)
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_INIT,
                                               global_step,
                                               LEARNING_RATE_DECAY_STEPS,
                                               0.1,
                                               staircase=True)
    optimizer = tf.train.AdamOptimizer(learning_rate)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        optimize = optimizer.minimize(loss_op + 10 * penalty,
                                      global_step=global_step)

    # Train the model
    with tf.Session() as sess:
        train_writer = tf.summary.FileWriter(
            '../train_logs/addition_with_penalty_decay_100000', sess.graph)
        sess.run(tf.global_variables_initializer())
        step = 0
        while True:
            losses = []
            for _ in range(100):
                # Generate new input data
                inputs, targets = get_batch()
                loss, _, progress = sess.run([loss_op, optimize, summary], {
                    inputs_ph: inputs,
                    targets_ph: targets
                })
                losses.append(loss)
                train_writer.add_summary(progress, step)
                step += 1
            print("Step {} loss {}".format(int(step), np.mean(losses)))