Esempio n. 1
0
def build_cudnn_encoder(encoder_cell,
                        num_layers,
                        encoder_inputs,
                        encoder_inputs_length,
                        time_major=False,
                        scope=None):
    assert num_layers > 0
    batch_size = encoder_inputs.get_shape()[0]
    num_units = encoder_cell.output_size
    input_size = encoder_inputs.get_shape()[-1]
    with tf.variable_scope(scope):
        model = CudnnLSTM(num_layers, num_units, input_size)
        params_size_t = calc_cudnn_num_params(num_layers, num_units,
                                              input_size)
        #params_size_t = model.params_size()
        if not time_major:
            encoder_inputs = tf.transpose(encoder_inputs, [1, 0, 2])
        input_h = tf.zeros([num_layers, batch_size, num_units])
        input_c = tf.zeros([num_layers, batch_size, num_units])
        params = tf.Variable(tf.random_normal([params_size_t]))
        output, output_h, output_c = model(is_training=True,
                                           input_data=encoder_inputs,
                                           input_h=input_h,
                                           input_c=input_c,
                                           params=params)
        print("output", output)
        print("output_h", output_h)

        if not time_major:
            output = tf.transpose(output, [1, 0, 2])

    return output, LSTMStateTuple(c=output_c[0], h=output_h[0])
Esempio n. 2
0
File: lstm2.py Progetto: eblics/ea
def lstm():  #参数:输入网络批次数目
    global input_x, input_rnn, output
    w_in = weights['in']
    b_in = biases['in']
    input_x = tf.reshape(X,
                         [-1, input_size])  #需要将tensor转成2维进行计算,计算后的结果作为隐藏层的输入
    input_rnn = tf.matmul(input_x, w_in) + b_in
    input_rnn = tf.reshape(
        input_rnn, [-1, num_steps, num_units])  #将tensor转成3维,作为lstm cell的输入
    rnn = CudnnLSTM(num_layers,
                    num_units,
                    input_size,
                    input_mode='linear_input',
                    direction='unidirectional',
                    dropout=0.5 if is_training else keep_prob,
                    seed=0)
    params_size_t = rnn.params_size()
    params = tf.Variable(tf.random_uniform([params_size_t],
                                           minval=-0.1,
                                           maxval=0.1,
                                           dtype=tf.float32),
                         validate_shape=False)
    output, output_h, output_c = rnn(is_training=is_training,
                                     input_data=input_rnn,
                                     input_h=H,
                                     input_c=C,
                                     params=params)
    output = tf.reshape(output, [-1, num_units])  #作为输出层的输入
    w_out = weights['out']
    b_out = biases['out']
    pred = tf.matmul(output, w_out) + b_out
    return pred
Esempio n. 3
0
	def __init__(self, num_units, input_size, direction='unidirectional',
				use_peepholes=False, cell_clip=None,
				initializer=None, num_proj=None, proj_clip=None,
				num_unit_shards=None, num_proj_shards=None,
				forget_bias=0.0, state_is_tuple=True,
				activation=None, reuse=None):
		super(cudnn_LSTMCell, self).__init__(_reuse=reuse)
		if not state_is_tuple:
			logging.warn("%s: Using a concatenated state is slower and will soon be "
						"deprecated.  Use state_is_tuple=True.", self)
		if num_unit_shards is not None or num_proj_shards is not None:
			logging.warn(
				"%s: The num_unit_shards and proj_unit_shards parameters are "
				"deprecated and will be removed in Jan 2017.  "
				"Use a variable scope with a partitioner instead.", self)

		### Error Message
		if use_peepholes:
			raise ValueError("Using peepholes is not supported. ")
		###

		self._num_units = num_units
		self._input_size = input_size
		self._direction = direction
		self._use_peepholes = use_peepholes
		self._cell_clip = cell_clip
		self._initializer = initializer
		self._num_proj = num_proj
		self._proj_clip = proj_clip
		self._num_unit_shards = num_unit_shards
		self._num_proj_shards = num_proj_shards
		self._forget_bias = forget_bias
		self._state_is_tuple = state_is_tuple
		self._activation = activation or math_ops.tanh

		self._CudnnLSTM = CudnnLSTM(num_layers=1, 
									num_units=num_units,
									input_size=input_size,
									direction=direction)
		self._params_size = LSTM_params_size(1, num_units, input_size)
		if direction == 'bidirectional':
			self._params_size *= 2
		self._params = vs.get_variable(
			"params", shape=[self._params_size], dtype=tf.float32)

		if num_proj:
			self._state_size = (
				rnn_cell_impl.LSTMStateTuple(num_units, num_proj)
				if state_is_tuple else num_units + num_proj)
			self._output_size = num_proj
		else:
			self._state_size = (
				rnn_cell_impl.LSTMStateTuple(num_units, num_units)
				if state_is_tuple else 2 * num_units)
			self._output_size = num_units
Esempio n. 4
0
  def __init__(self, is_training, config, debug=False):
    self.batch_size = batch_size = config.batch_size
    self.num_steps = num_steps = config.num_steps
    self.size = size = config.hidden_size
    vocab_size = config.vocab_size
    self.num_layers = config.num_layers

    self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
    self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

    embedding = tf.get_variable("embedding", [vocab_size, size], dtype=data_type(is_lstm_layer=False))
    inputs = tf.nn.embedding_lookup(embedding, self._input_data, name="inputs_to_rnn")
    if debug:
        variable_summaries(inputs, "inputs_to_rnn")

    if is_training and config.keep_prob < 1:
        inputs = tf.nn.dropout(inputs, config.keep_prob)

    rnn = CudnnLSTM(config.num_layers, size, size, input_mode='linear_input', direction='unidirectional',
                                         dropout=config.keep_prob, seed=0, seed2=0)
    params_size_t = rnn.params_size()
    self._initial_input_h = tf.placeholder(data_type(is_lstm_layer=True), shape=[config.num_layers, batch_size, size]) #self._initial_input_h = tf.Variable(tf.zeros([config.num_layers, batch_size, size]))
    self._initial_input_c = tf.placeholder(data_type(is_lstm_layer=True), shape=[config.num_layers, batch_size, size]) #self._initial_input_c = tf.Variable(tf.zeros([config.num_layers, batch_size, size]))
    #self.params = tf.get_variable("params", [params_size_t], validate_shape=False, dtype=data_type(is_lstm_layer=False))
    self.params = tf.Variable(tf.random_uniform([params_size_t], minval=-config.init_scale, maxval=config.init_scale, dtype=data_type(is_lstm_layer=True)), validate_shape=False)
    self.params_size_t = rnn.params_size()

    outputs, output_h, output_c = rnn(is_training=is_training, input_data=tf.transpose(tf.cast(inputs, dtype=data_type(is_lstm_layer=True)), [1, 0, 2]), input_h=self.input_h,
                                     input_c=self.input_c, params=self.params)

    self._output_h = output_h
    self._output_c = output_c

    output = tf.reshape(tf.concat(values=tf.transpose(outputs, [1, 0, 2]), axis=1), [-1, size])

    if debug:
        variable_summaries(output, 'multiRNN_output')

    softmax_w = tf.get_variable("softmax_w", [size, vocab_size], dtype=data_type(is_lstm_layer=False))
    softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type(is_lstm_layer=False))
    logits = tf.matmul(output if output.dtype == data_type(is_lstm_layer=False) else tf.cast(output, data_type(is_lstm_layer=False)), softmax_w) + softmax_b

    if debug:
       variable_summaries(logits, 'logits')

    #loss = tf.contrib.nn.seq2seq.sequence_loss_by_example(
    loss = sequence_loss_by_example(   
        [logits],
        [tf.reshape(self._targets, [-1])],
        [tf.ones([batch_size * num_steps], dtype=data_type(is_lstm_layer=False))])

    self._cost = cost = tf.reduce_sum(loss) / batch_size
    if FLAGS.cost_function == 'avg':
      self._cost_to_optimize = cost_to_optimize = tf.reduce_mean(loss)
    else:
      self._cost_to_optimize = cost_to_optimize = cost

    tvars = tf.trainable_variables()
    for v in tvars:
        cost_to_optimize += FLAGS.reg_term * tf.cast(tf.nn.l2_loss(v), dtype=data_type(False)) / (batch_size*config.num_steps)
        self._cost_to_optimize = cost_to_optimize

    if debug:
        tf.summary.scalar('cost no regularization', cost)
        tf.summary.scalar('cost_to_optimize', cost_to_optimize)

    #self._final_state = state

    if not is_training:
        self.merged = tf.summary.merge_all()
        return

    self._lr = tf.Variable(0.0, trainable=False, dtype=data_type(is_lstm_layer=False))
    #if debug:
    #        tf.scalar_summary('learning rate', self._lr)

    #tvars = tf.trainable_variables()
    type2vars = dict()
    print("**************************")
    print("Trainable Variables")
    print("**************************")
    for var in tvars:
        print('Variable name: %s. With dtype: %s and shape: %s' % (var.name, var.dtype, var.get_shape()))
        if var.dtype not in type2vars:
            type2vars[var.dtype] = [var]
        else:
            type2vars[var.dtype].append(var)

    print("**************************")
    print("Gradients Variables")
    print("**************************")
    _grads = tf.gradients(cost_to_optimize, tvars)
    type2grads = dict()
    for g in _grads:
        print('Gradient name: %s. With dtype: %s' % (g.name, g.dtype))
        if g.dtype not in type2grads:
            type2grads[g.dtype] = [g]
        else:
            type2grads[g.dtype].append(g)

    type2clippedGrads = dict()
    for dtype in type2grads:
        cgrads, _ = tf.clip_by_global_norm(type2grads[dtype], config.max_grad_norm)
        type2clippedGrads[dtype] = cgrads


    if debug:
        for (gkey, vkey) in zip(type2clippedGrads.keys(),type2vars.keys()):
            for (clipped_gradient, variable) in zip(type2clippedGrads[gkey], type2vars[vkey]):
                variable_summaries(clipped_gradient, "clipped_dcost/d"+variable.name)
                variable_summaries(variable, variable.name)


    if FLAGS.optimizer == 'MomentumOptimizer':
        optimizer = tf.train.MomentumOptimizer(learning_rate=self._lr, momentum=0.9)
    elif FLAGS.optimizer == 'AdamOptimizer':
        optimizer = tf.train.AdamOptimizer()
    elif FLAGS.optimizer == 'RMSPropOptimizer':
        optimizer = tf.train.RMSPropOptimizer(learning_rate=self._lr)
    elif FLAGS.optimizer == 'AdagradOptimizer':
        optimizer = tf.train.AdagradOptimizer(learning_rate=self._lr)
    else:
        optimizer = tf.train.GradientDescentOptimizer(self._lr)

    allgrads = []
    allvars = []
    for dtype in type2clippedGrads:
        allgrads += type2clippedGrads[dtype]

    #WARNING: key order assumption
    for dtype in type2vars:
        allvars += type2vars[dtype]

    self._train_op = optimizer.apply_gradients(zip(allgrads, allvars))

    self._new_lr = tf.placeholder(dtype=data_type(False), shape=[], name="new_learning_rate")
    self._lr_update = tf.assign(self._lr, self._new_lr)
    self.merged = tf.summary.merge_all()
Esempio n. 5
0
    def __init__(self, is_training, config, debug=False):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        self.size = size = config.hidden_size
        self.num_layers = config.num_layers

        self._input_data = tf.placeholder(tf.float32,
                                          [batch_size, num_steps, 1])
        self._targets = tf.placeholder(tf.float32, [batch_size, num_steps])

        inputs = self._input_data

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        rnn = CudnnLSTM(config.num_layers,
                        size,
                        size,
                        input_mode='linear_input',
                        direction='unidirectional',
                        dropout=config.keep_prob,
                        seed=0)
        params_size_t = rnn.params_size()
        self._initial_input_h = tf.placeholder(
            data_type(is_lstm_layer=True),
            shape=[config.num_layers, config.num_steps, size]
        )  #self._initial_input_h = tf.Variable(tf.zeros([config.num_layers, batch_size, size]))
        self._initial_input_c = tf.placeholder(
            data_type(is_lstm_layer=True),
            shape=[config.num_layers, config.num_steps, size]
        )  #self._initial_input_c = tf.Variable(tf.zeros([config.num_layers, batch_size, size]))
        #self.params = tf.get_variable("params", [params_size_t], validate_shape=False, dtype=data_type(is_lstm_layer=False))
        self.params = tf.Variable(tf.random_uniform(
            [params_size_t],
            minval=-config.init_scale,
            maxval=config.init_scale,
            dtype=data_type(is_lstm_layer=True)),
                                  validate_shape=False)
        self.params_size_t = rnn.params_size()

        # outputs, output_h, output_c = rnn(is_training=is_training, input_data=tf.transpose(tf.cast(inputs, dtype=data_type(is_lstm_layer=True)), [1, 0, 2]), input_h=self.input_h,
        # input_c=self.input_c, params=self.params)
        output, output_h, output_c = rnn(is_training=is_training,
                                         input_data=inputs,
                                         input_h=self.input_h,
                                         input_c=self.input_c,
                                         params=self.params)
        self._output_h = output_h
        self._output_c = output_c
        # output = tf.reshape(tf.transpose(outputs, [1, 0, 2]), [-1, size])

        softmax_w = tf.get_variable("softmax_w", [batch_size, size, 1],
                                    dtype=data_type(is_lstm_layer=False))
        softmax_b = tf.get_variable("softmax_b", [batch_size, num_steps],
                                    dtype=data_type(is_lstm_layer=False))
        # logits = tf.matmul(output if output.dtype == data_type(is_lstm_layer=False) else tf.cast(output, data_type(is_lstm_layer=False)), softmax_w) + softmax_b
        logits = tf.matmul(output, softmax_w)
        logits = tf.reshape(logits, [batch_size, num_steps])
        logits = logits + softmax_b
        self._logits = logits

        loss = tf.sqrt(tf.losses.mean_squared_error(logits, self._targets))
        self._cost = cost = loss
        if not is_training:
            return

        self._lr = tf.Variable(0.0,
                               trainable=False,
                               dtype=data_type(is_lstm_layer=False))

        tvars = tf.trainable_variables()
        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self._cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self._lr)
        self._train_op = optimizer.apply_gradients(
            zip(grads, tvars),
            global_step=tf.contrib.framework.get_or_create_global_step())

        self._new_lr = tf.placeholder(tf.float32,
                                      shape=[],
                                      name="new_learning_rate")
        self._lr_update = tf.assign(self._lr, self._new_lr)