def build(self, input_number, sequence_length, layers_number, units_number, output_number): self.x = tf.placeholder("float", [None, sequence_length, input_number]) self.y = tf.placeholder("float", [None, output_number]) self.sequence_length = sequence_length self.weights = { 'out': tf.Variable(tf.random_normal([units_number, output_number])) } self.biases = { 'out': tf.Variable(tf.random_normal([output_number])) } x = tf.transpose(self.x, [1, 0, 2]) x = tf.reshape(x, [-1, input_number]) x = tf.split(x, sequence_length, 0) lstm_layers = [] for i in range(0, layers_number): lstm_layer = rnn.BasicLSTMCell(units_number) lstm_layers.append(lstm_layer) deep_lstm = rnn.MultiRNNCell(lstm_layers) self.outputs, states = rnn.static_rnn(deep_lstm, x, dtype=tf.float32) print "Build model with input_number: {}, sequence_length: {}, layers_number: {}, " \ "units_number: {}, output_number: {}".format(input_number, sequence_length, layers_number, units_number, output_number) self.save(input_number, sequence_length, layers_number, units_number, output_number)
def prune_model(model,batchsize = 50,ckpt="model_pruned"): weights = model.get_weights() W = weights[0] U = weights[1] bias = weights[2] W_out = weights[3] bias_out = weights[4] GRU = PrunableGRU(W,U,bias) Logits = PrunableLogits(W_out,bias_out) X = tf.placeholder("float", [40, batchsize, 2]) Y = tf.placeholder("float", [None, W_out.shape[1]]) x = tf.unstack(X,axis=0) outputs, states = static_rnn(GRU, x, dtype=tf.float32) logits = Logits(outputs[-1]) prediction = tf.nn.softmax(logits) loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y)) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001) train_op = optimizer.minimize(loss_op) correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) init = tf.global_variables_initializer() dataset = build_dataset() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) for i in range(1000): batch_x, batch_y = get_batch(dataset,batchsize=batchsize,batchtype="train") batch_x = np.swapaxes(batch_x,1,0) sess.run(train_op, feed_dict={X: batch_x, Y: batch_y}) loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,Y: batch_y}) if i%100: saver.save(sess,ckpt) print(loss)
def _basic_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, feed_previous, dtype=dtypes.float32, scope=None): """Basic RNN sequence-to-sequence model. This model first runs an RNN to encode encoder_inputs into a state vector, then runs decoder, initialized with the last encoder state, on decoder_inputs. Encoder and decoder use the same RNN cell type, but don't share parameters. Args: encoder_inputs: A list of 2D Tensors [batch_size x input_size]. decoder_inputs: A list of 2D Tensors [batch_size x input_size]. feed_previous: Boolean; if True, only the first of decoder_inputs will be used (the "GO" symbol), all other inputs will be generated by the previous decoder output using _loop_function below. If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial state of the RNN cell (default: tf.float32). scope: VariableScope for the created subgraph; default: "basic_rnn_seq2seq". Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs. state: The state of each decoder cell in the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope(scope or "basic_rnn_seq2seq"): enc_cell = copy.deepcopy(cell) _, enc_state = rnn.static_rnn(enc_cell, encoder_inputs, dtype=dtype) if feed_previous: return _rnn_decoder(decoder_inputs, enc_state, cell, _loop_function) else: return _rnn_decoder(decoder_inputs, enc_state, cell)
def __init__(self, config, is_training=False): self.config = config self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps self.hidden_size = hidden_size = config.hidden_size self.num_layers = 1 vocab_size = config.vocab_size self.max_grad_norm = config.max_grad_norm self.use_lstm = config.use_lstm # Placeholders for inputs. self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) self.targets = tf.placeholder(tf.int32, [batch_size, num_steps]) self.initial_state = array_ops.zeros(tf.stack([self.batch_size, self.num_steps]), dtype=tf.float32).set_shape([None, self.num_steps]) embedding = tf.get_variable('embedding', [self.config.vocab_size, self.config.hidden_size]) # Set up ACT cell and inner rnn-type cell for use inside the ACT cell. with tf.variable_scope("rnn"): if self.use_lstm: inner_cell = BasicLSTMCell(self.config.hidden_size) else: inner_cell = GRUCell(self.config.hidden_size) with tf.variable_scope("ACT"): act = ACTCell(self.config.hidden_size, inner_cell, config.epsilon, max_computation=config.max_computation, batch_size=self.batch_size) inputs = tf.nn.embedding_lookup(embedding, self.input_data) inputs = [tf.squeeze(single_input, [1]) for single_input in tf.split(inputs, self.config.num_steps, 1)] self.outputs, final_state = static_rnn(act, inputs, dtype = tf.float32) # Softmax to get probability distribution over vocab. output = tf.reshape(tf.concat(self.outputs, 1), [-1, hidden_size]) softmax_w = tf.get_variable("softmax_w", [hidden_size, vocab_size]) softmax_b = tf.get_variable("softmax_b", [vocab_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b # dim (numsteps*batchsize, vocabsize) loss = sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([batch_size * num_steps])], vocab_size) # Add up loss and retrieve batch-normalised ponder cost: sum N + sum Remainder. ponder_cost = act.calculate_ponder_cost(time_penalty=self.config.ponder_time_penalty) self.cost = (tf.reduce_sum(loss) / batch_size) + ponder_cost self.final_state = self.outputs[-1] if is_training: self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), self.max_grad_norm) optimizer = tf.train.AdamOptimizer(self.config.learning_rate) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def RNN(x, weights, biases): x = tf.reshape(x, [-1, n_input]) x = tf.split(x,n_input,1) rnn_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(n_hidden),rnn.BasicLSTMCell(n_hidden)]) outputs, states = rnn.static_rnn(rnn_cell, x, dtype=tf.float32) return tf.matmul(outputs[-1], weights['out']) + biases['out']
def _lstm_model(X, y): stacked_lstm = rnn.MultiRNNCell(lstm_cells(rnn_layers), state_is_tuple=True) x_ = tf.unstack(X, axis=1, num=num_units) output, layers = rnn.static_rnn(stacked_lstm, x_, dtype=dtypes.float32) output = dnn_layers(output[-1], dense_layers) prediction, loss = tflearn.models.linear_regression(output, y) train_op = tf.contrib.layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), optimizer=optimizer, learning_rate=learning_rate) return prediction, loss, train_op
def rnn_estimator(x, y): """RNN estimator with target predictor function on top.""" x = input_op_fn(x) if cell_type == 'rnn': cell_fn = contrib_rnn.BasicRNNCell elif cell_type == 'gru': cell_fn = contrib_rnn.GRUCell elif cell_type == 'lstm': cell_fn = functools.partial( contrib_rnn.BasicLSTMCell, state_is_tuple=False) else: raise ValueError('cell_type {} is not supported. '.format(cell_type)) # TODO(ipolosukhin): state_is_tuple=False is deprecated if bidirectional: # forward direction cell fw_cell = cell_fn(rnn_size) bw_cell = cell_fn(rnn_size) # attach attention cells if specified if attn_length is not None: fw_cell = contrib_rnn.AttentionCellWrapper( fw_cell, attn_length=attn_length, attn_size=attn_size, attn_vec_size=attn_vec_size, state_is_tuple=False) bw_cell = contrib_rnn.AttentionCellWrapper( bw_cell, attn_length=attn_length, attn_size=attn_size, attn_vec_size=attn_vec_size, state_is_tuple=False) rnn_fw_cell = contrib_rnn.MultiRNNCell([fw_cell] * num_layers, state_is_tuple=False) # backward direction cell rnn_bw_cell = contrib_rnn.MultiRNNCell([bw_cell] * num_layers, state_is_tuple=False) # pylint: disable=unexpected-keyword-arg, no-value-for-parameter _, encoding = bidirectional_rnn(rnn_fw_cell, rnn_bw_cell, x, dtype=dtypes.float32, sequence_length=sequence_length, initial_state_fw=initial_state, initial_state_bw=initial_state) else: rnn_cell = cell_fn(rnn_size) if attn_length is not None: rnn_cell = contrib_rnn.AttentionCellWrapper( rnn_cell, attn_length=attn_length, attn_size=attn_size, attn_vec_size=attn_vec_size, state_is_tuple=False) cell = contrib_rnn.MultiRNNCell([rnn_cell] * num_layers, state_is_tuple=False) _, encoding = contrib_rnn.static_rnn(cell, x, dtype=dtypes.float32, sequence_length=sequence_length, initial_state=initial_state) return target_predictor_fn(encoding, y)
def __init__(self, args, deterministic=False): self.args = args if args.model == 'rnn': cell_fn = rnn.BasicRNNCell elif args.model == 'gru': cell_fn = rnn.GRUCell elif args.model == 'lstm': cell_fn = rnn.BasicLSTMCell elif args.model == 'bn-lstm': cell_fn = BatchNormLSTMCell else: raise Exception('model type not supported: {}'.format(args.model)) if args.model == 'bn-lstm': cell = cell_fn(args.rnn_size, self.is_training) else: cell = cell_fn(args.rnn_size) self.cell = cell = rnn.MultiRNNCell([cell] * args.num_layers) self.input_data = tf.placeholder(tf.int64, [None, args.seq_length]) # self.targets = tf.placeholder(tf.int64, [None, args.seq_length]) # seq2seq model self.targets = tf.placeholder(tf.int64, [None, ]) # target is class label with tf.variable_scope('embeddingLayer'): with tf.device('/cpu:0'): W = tf.get_variable('W', [args.vocab_size, args.rnn_size]) embedded = tf.nn.embedding_lookup(W, self.input_data) # shape: (batch_size, seq_length, cell.input_size) => (seq_length, batch_size, cell.input_size) inputs = tf.split(embedded, args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] outputs, last_state = rnn.static_rnn(self.cell, inputs, dtype=tf.float32, scope='rnnLayer') with tf.variable_scope('softmaxLayer'): softmax_w = tf.get_variable('w', [args.rnn_size, args.label_size]) softmax_b = tf.get_variable('b', [args.label_size]) logits = tf.matmul(outputs[-1], softmax_w) + softmax_b self.probs = tf.nn.softmax(logits) # self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=self.targets)) # Softmax loss self.cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=self.targets)) # Softmax loss self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.cost) # Adam Optimizer self.correct_pred = tf.equal(tf.argmax(self.probs, 1), self.targets) self.correct_num = tf.reduce_sum(tf.cast(self.correct_pred, tf.float32)) self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32))
def recurrent_neural_network(x): layer = {'weights':tf.Variable(tf.random_normal([rnn_size, n_classes])), 'biases':tf.Variable(tf.random_normal([n_classes]))} x = tf.transpose(x, [1, 0 ,2]) x = tf.reshape(x, [-1, chunk_size]) x = tf.split(x, n_chunks, 0) lstm_cell = rnn.BasicLSTMCell(rnn_size) outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) output = tf.matmul(outputs[-1], layer['weights']) + layer['biases'] return output
def simple_rnn(features, labels, mode): # 0. Reformat input shape to become a sequence x = tf.split(features[TIMESERIES_COL], N_INPUTS, 1) # 1. Configure the RNN lstm_cell = rnn.BasicLSTMCell(LSTM_SIZE, forget_bias = 1.0) outputs, _ = rnn.static_rnn(lstm_cell, x, dtype = tf.float32) # Slice to keep only the last cell of the RNN outputs = outputs[-1] #print('last outputs={}'.format(outputs)) # Output is result of linear activation of last layer of RNN weight = tf.get_variable("weight", initializer=tf.initializers.random_normal, shape=[LSTM_SIZE, N_OUTPUTS]) bias = tf.get_variable("bias", initializer=tf.initializers.random_normal, shape=[N_OUTPUTS]) predictions = tf.matmul(outputs, weight) + bias # 2. Loss function, training/eval ops if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL: loss = tf.losses.mean_squared_error(labels, predictions) train_op = tf.contrib.layers.optimize_loss( loss = loss, global_step = tf.train.get_global_step(), learning_rate = 0.01, optimizer = "SGD") eval_metric_ops = { "rmse": tf.metrics.root_mean_squared_error(labels, predictions) } else: loss = None train_op = None eval_metric_ops = None # 3. Create predictions predictions_dict = {"predicted": predictions} # 4. Create export outputs export_outputs = {"predict_export_outputs": tf.estimator.export.PredictOutput(outputs = predictions)} # 4. Return EstimatorSpec return tf.estimator.EstimatorSpec( mode = mode, predictions = predictions_dict, loss = loss, train_op = train_op, eval_metric_ops = eval_metric_ops, export_outputs = export_outputs)
def generate_rnn_output(self): """ Generate RNN state outputs with word embeddings as inputs """ with tf.variable_scope("generate_seq_output"): if self.bidirectional_rnn: embedding = tf.get_variable("embedding", [self.source_vocab_size, self.word_embedding_size]) encoder_emb_inputs = list() encoder_emb_inputs = [tf.nn.embedding_lookup(embedding, encoder_input)\ for encoder_input in self.encoder_inputs] rnn_outputs = static_bidirectional_rnn(self.cell_fw, self.cell_bw, encoder_emb_inputs, sequence_length=self.sequence_length, dtype=tf.float32) encoder_outputs, encoder_state_fw, encoder_state_bw = rnn_outputs # with state_is_tuple = True, if num_layers > 1, # here we simply use the state from last layer as the encoder state state_fw = encoder_state_fw[-1] state_bw = encoder_state_bw[-1] encoder_state = tf.concat([tf.concat(state_fw, 1), tf.concat(state_bw, 1)], 1) top_states = [tf.reshape(e, [-1, 1, self.cell_fw.output_size \ + self.cell_bw.output_size]) for e in encoder_outputs] attention_states = tf.concat(top_states, 1) else: embedding = tf.get_variable("embedding", [self.source_vocab_size, self.word_embedding_size]) encoder_emb_inputs = list() encoder_emb_inputs = [tf.nn.embedding_lookup(embedding, encoder_input)\ for encoder_input in self.encoder_inputs] rnn_outputs = static_rnn(self.cell_fw, encoder_emb_inputs, sequence_length=self.sequence_length, dtype=tf.float32) encoder_outputs, encoder_state = rnn_outputs # with state_is_tuple = True, if num_layers > 1, # here we use the state from last layer as the encoder state state = encoder_state[-1] encoder_state = tf.concat(state, 1) top_states = [tf.reshape(e, [-1, 1, self.cell_fw.output_size]) for e in encoder_outputs] attention_states = tf.concat(top_states, 1) return encoder_outputs, encoder_state, attention_states
def RNN(x, weights, biases): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.unstack(x, n_steps, 1) # Define a lstm cell with tensorflow lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0) # Get lstm cell output outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def recurrent_neural_network(x): layer = {'weights':tf.Variable(tf.random_normal([rnn_size, n_classes])), 'biases':tf.Variable(tf.random_normal([n_classes]))} # transpose 這個 funtion 是對矩陣做 不同維度座標軸的轉換,這邊把一張圖片轉成以每列為單位的輸入 x = tf.transpose(x, [1,0,2]) x = tf.reshape(x, [-1, chunk_size]) x = tf.split(axis=0, num_or_size_splits=n_chunks, value=x) # 定義要被 loop 的基本單元 lstm_cell = BasicLSTMCell(rnn_size) # 選一個把 cell 串起來的 model outputs, states = static_rnn(lstm_cell, x, dtype= tf.float32) # 用一個 full connection layer 輸出預測 output = tf.matmul(outputs[-1], layer['weights']) + layer['biases'] return output
def simple_rnn(features, targets, mode): # 0. Reformat input shape to become a sequence x = tf.split(features[TIMESERIES_COL], N_INPUTS, 1) #print 'x={}'.format(x) # 1. configure the RNN lstm_cell = rnn.BasicLSTMCell(LSTM_SIZE, forget_bias=1.0) outputs, _ = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) # slice to keep only the last cell of the RNN outputs = outputs[-1] #print 'last outputs={}'.format(outputs) # output is result of linear activation of last layer of RNN weight = tf.Variable(tf.random_normal([LSTM_SIZE, N_OUTPUTS])) bias = tf.Variable(tf.random_normal([N_OUTPUTS])) predictions = tf.matmul(outputs, weight) + bias # 2. loss function, training/eval ops if mode == tf.contrib.learn.ModeKeys.TRAIN or mode == tf.contrib.learn.ModeKeys.EVAL: loss = tf.losses.mean_squared_error(targets, predictions) train_op = tf.contrib.layers.optimize_loss( loss=loss, global_step=tf.contrib.framework.get_global_step(), learning_rate=0.01, optimizer="SGD") eval_metric_ops = { "rmse": tf.metrics.root_mean_squared_error(targets, predictions) } else: loss = None train_op = None eval_metric_ops = None # 3. Create predictions predictions_dict = {"predicted": predictions} # 4. return ModelFnOps return tflearn.ModelFnOps( mode=mode, predictions=predictions_dict, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops)
def _half_seq_len_vs_unroll_half_rnn_benchmark(inputs_list_t, sequence_length): (_, input_size) = inputs_list_t[0].get_shape().as_list() initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=127) cell = contrib_rnn.LSTMCell( num_units=input_size, use_peepholes=True, initializer=initializer, state_is_tuple=False) outputs, final_state = contrib_rnn.static_rnn( cell, inputs_list_t, sequence_length=sequence_length, dtype=dtypes.float32) trainable_variables = ops_lib.get_collection( ops_lib.GraphKeys.TRAINABLE_VARIABLES) gradients = gradients_impl.gradients(outputs + [final_state], trainable_variables) return control_flow_ops.group(final_state, *(gradients + outputs))
def RNN(x, weights, biases): #reshape to [1,n_input] x = tf.reshape(x, [-1, n_input]) #Generate a n_input-element sequence of inputs # (eg. [had] [a] [general] -> [20] [6] [33]) x = tf.split(x, n_input, 1) # 2-layer LSTM, each layer has n_hidden units # Average Accuracy = 95.20& at 50k iter # rnn_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(n_hidden),rnn.BasicLSTMCell(n_hidden)]) # 1-layer LSTM with n_hidden units but with lower accuracy # Average Accuracy = 90.60% 50k iter rnn_cell = rnn.BasicLSTMCell(n_hidden) # generate prediction outputs, states = rnn.static_rnn(rnn_cell, x, dtype=tf.float32) # there are n_input outputs but we only want the last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def MultiRNN(x, weights, biases, n_layers=1): # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.unstack(x, n_steps, 1) # Untack to get a list of 'n_layers' tensors of shape (2, batch_size, n_hidden) # state_per_layer_list = tf.unstack(init_state, axis=0) # rnn_tuple_state = tuple([rnn.LSTMStateTuple(state_per_layer_list[idx][0], state_per_layer_list[idx][1]) # for idx in range(n_layers)]) # Get multi lstm cells def build_lstm_cell(): lstm_cell = rnn.LSTMCell(n_hidden) return lstm_cell cell = rnn.MultiRNNCell([build_lstm_cell() for _ in range(n_layers)]) # Get cell output outputs, states = rnn.static_rnn(cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out'], states
def Rnn(x, nunits, nlayer, n_timetamps): #PLACEHOLDERS + WEIGHT & BIAS DEF #Processing input tensor x = tf.unstack(x, n_timetamps, 1) #NETWORK DEF #MORE THEN ONE LAYER: list of LSTMcell,nunits hidden units each, for each layer if nlayer > 1: cells = [] for _ in range(nlayer): cell = rnn.LSTMCell(nunits) #cell = rnn.GRUCell(nunits) cells.append(cell) cell = tf.contrib.rnn.MultiRNNCell(cells) #SIGNLE LAYER: single GRUCell, nunits hidden units each else: cell = rnn.LSTMCell(nunits) #cell = rnn.GRUCell(nunits) outputs, _ = rnn.static_rnn(cell, x, dtype="float32") return outputs[-1]
def RNN(x, weights, biases): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, timesteps, n_input) # Required shape: 'timesteps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input) # 白話:把 timesteps 拿來當index,使從 3-dim 變成 2-dim # 此動作稱為 unstack,讓 shape 改變 x = tf.unstack(x, timesteps, 1) # Define a lstm cell with tensorflow # forget_bias=1.0 和上課影片的值相同 lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) # 反正就照做,Get lstm cell output outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output # 取 outputs 的最後一筆來算 return tf.matmul(outputs[-1], weights['out']) + biases['out']
def lstm_model(features, mode): """ cnn model structure :param features: images :return: predicts """ input_layer = tf.unstack(value=features, num=28, axis=1, name='input') lstm_cell = rnn.BasicLSTMCell(num_units=128, name='lstm') lstm_out, _ = rnn.static_rnn( lstm_cell, input_layer, dtype=tf.float32, ) flatten_layer = layers.flatten(lstm_out[-1], name='flatten') dense_layer = layers.dense(inputs=flatten_layer, units=512, name='dense') dropout = layers.dropout(inputs=dense_layer, rate=0.5, training=(mode == tf.estimator.ModeKeys.TRAIN), name='dropout') logits = layers.dense(inputs=dropout, units=10, name='logits') return logits
def model(x, x_w, weights, biases): # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input) x = tf.unstack(x, timesteps, 1) # 1-layer LSTM with n_hidden units. rnn_cell = rnn.BasicLSTMCell(n_hidden) # generate prediction outputs, states = rnn.static_rnn(rnn_cell, x, dtype=tf.float32) # there are n_input outputs but # we only want the last output y_rnn = tf.matmul(outputs[-1], weights['rnn']) + biases['rnn'] # weather information y_w_hidden = tf.matmul(x_w, weights['fc']) + biases['fc'] y_w = tf.matmul(y_w_hidden, weights['out']) + biases['out'] # add weather information to rnn output return tf.add(y_rnn * alpha, y_w * (1.0 - alpha))
def RNN(x, weights, biases): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshaping to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(x, n_steps, 0) # Define a lstm cell with tensorflow lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0) # Get lstm cell output outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights["out"]) + biases["out"]
def RNN(x, weights, biases, num_hidden, timesteps): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, timesteps, n_input) # Required shape: 'timesteps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input) x = tf.unstack(x, timesteps, 1) # Define a lstm cell with tensorflow lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0, reuse=None) stacked_lstm = tf.contrib.rnn.MultiRNNCell([lstm_cell for _ in range(1)]) # Get lstm cell output outputs, states = rnn.static_rnn(stacked_lstm, x, dtype=tf.float32) #outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) outputs = tf.reshape(outputs, [-1, timesteps * num_hidden]) # Linear activation, using rnn inner loop last output return tf.matmul(outputs, weights) + biases
def questionLSTM(self, q, q_real_len, reuse=False, scope="questionLSTM"): """ Args q: zero padded questions, shape=[batch_size, q_max_len] q_real_len: original question length, shape = [batch_size, 1] Returns embedded_q: embedded questions, shape = [batch_size, hidden_dim] """ with tf.variable_scope(scope): embedded_q_word = tf.nn.embedding_lookup(self.word_embed_matrix, q) q_input = tf.unstack(embedded_q_word, num=self.q_max_len, axis=1) lstm_cell = rnn.BasicLSTMCell(self.hidden_dim, reuse=reuse) outputs, _ = rnn.static_rnn(lstm_cell, q_input, dtype=tf.float32) outputs = tf.stack(outputs) outputs = tf.transpose(outputs, [1, 0, 2]) index = tf.range(0, self.batch_size) * self.q_max_len + ( q_real_len - 1) embedded_q = tf.gather(tf.reshape(outputs, [-1, self.hidden_dim]), index) return embedded_q
def sentence_encoder(self, enc_inputs): # for the sentence level encoder: enc_inputs is of dimension (max_utter, max_len, batch_size) utterance_states = [] with tf.variable_scope(self.enc_scope_text) as scope: # init_state = self.enc_cells_text.zero_state(self.batch_size, tf.float32) for i in range(0, len(enc_inputs)): if i > 0: scope.reuse_variables() # enc_inputs[i] is a max_len sized list of tensor of dimension (batch_size) ################# CHECK IF INDEXING OVER TF VARIABLE IS WORKING _, states = rnn.static_rnn(self.enc_cells_text, enc_inputs[i], scope=scope, dtype=tf.float32) # rnn.rnn takes a max_len sized list of tensors of dimension (batch_size * self.text_embedding_size) (after passing through the embedding wrapper) # states is of dimension (batch_size, cell_size) utterance_states.append(states) # utterance_states is of dimension (max_utter, batch_size, cell_size) return utterance_states
def LSTM_Model(x, weights, biases): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, timesteps, n_input) # Required shape: 'timesteps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input) x = tf.unstack(x, timesteps, 1) # 使用多层的lstm结构 # lstm_cell = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.BasicLSTMCell(num_hidden, forget_bias=1.0) # for _ in range(num_layer)]) lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) lstm_cell = tf.contrib.rnn.DropoutWrapper(lstm_cell, 0.5) # Get lstm cell output outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) # outputs, states = tf.nn.dynamic_rnn(lstm_cell,x,dtype=tf.float32) # outputs是顶层LSTM在每一步的输出结果,他的维度是 # [batch_size,time,num_hidden] # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def RNN(x): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, timesteps, n_input) # Required shape: 'timesteps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input) x = tf.unstack(x, timesteps, 1) with tf.variable_scope('lstm1', reuse=tf.AUTO_REUSE): # Define a lstm cell with tensorflow lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) # Get lstm cell output outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) # Define weights weights = tf.Variable(tf.random_normal([num_hidden, num_classes])) biases = tf.Variable(tf.random_normal([num_classes])) # Linear activation, using rnn inner loop last output return tf.add(tf.matmul(outputs[-1], weights), biases, name="result_list")
def RNN(x, weights, biases): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.unstack(x, n_steps, 1) # Defining a lstm cell with tensorflow (single layered) #lstm_cell = DropoutWrapper(lstm_cell, output_keep_prob=dropout) #lstm_cell=lstmcell() #lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=0.0) multi_lstm_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(n_hidden, forget_bias=0.0) for _ in range(n_layers)]) # Get lstm cell output outputs, states = rnn.static_rnn(multi_lstm_cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def RNN(x, weights, biases): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, timesteps, n_input) # Required shape: 'timesteps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input) x = tf.unstack(x, timesteps, 1) # Define a lstm cell with tensorflow # lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) layer1 = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) layer2 = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) layer3 = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) cells = [layer1, layer2, layer3] multirnn = rnn.MultiRNNCell(cells) # Get lstm cell output # outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) outputs, states = rnn.static_rnn(multirnn, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def recurrent_neural_network(x): #(input_data* weights) + baises layer = { 'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])), 'biases': tf.Variable(tf.random_normal([n_classes])) } x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, chunk_size]) #x = tf.split(0, n_chunks, x) x = tf.split(x, n_chunks, 0) #lstm_cell = rnn_cell.BasicLSTMCell(rnn_size) lstm_cell = rnn.BasicLSTMCell(rnn_size) #outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32) outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) output = tf.matmul(outputs[-1], layer['weights']) + layer['biases'] return output
def RAE(x, probability, num_hidden_nodes): # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input) # before unstacking shape = batch_size, timesteps, num_input x = tf.unstack(tf.transpose(x, perm=[1, 0, 2])) # x = tf.unstack(x, signal_length, 1) # after unstacking shape = timesteps, batch_size, num_input lstm_fw_cell = rnn.LSTMCell(num_hidden_nodes, forget_bias=1.0) lstm_fw_cell = rnn.DropoutWrapper(cell=lstm_fw_cell, output_keep_prob=probability) outputs, _ = rnn.static_rnn(lstm_fw_cell, x, dtype=tf.float32) print(outputs[-1].get_shape()) # Linear activation, using rnn inner loop last output logit = tf.layers.dense(outputs[-1], signal_length, activation=None, use_bias=True, name='output_layer', kernel_initializer=tf.truncated_normal_initializer(stddev=0.01), bias_initializer=tf.ones_initializer()) print(logit.get_shape()) return logit
def twoLayerLSTM(x, weights, biases): # reshape to [1, seq_length] x = tf.reshape(x, [-1, seq_length]) # Generate a seq_length-element sequence of inputs # (eg. [had] [a] [general] -> [20] [6] [33]) x = tf.split(x, seq_length, 1) words = tf.identity(x, name="words") # define model to be a 2-layer LSTM, each layer has num_hidden units rnn_cell = rnn.MultiRNNCell( [rnn.BasicLSTMCell(num_hidden), rnn.BasicLSTMCell(num_hidden)]) # generate prediction outputs, states = rnn.static_rnn(rnn_cell, x, dtype=tf.float32) # there are seq_length outputs but # we only want the last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def classifier(self, x, training=True, reuse=None, name=None): # Reshape and unstack for LSTM layers x = tf.unstack(tf.reshape(x, [-1, 28, 28]), 28, 1) # Define LSTM cells with dropout wrappers cells = rnn.DropoutWrapper(rnn.BasicLSTMCell( 128, activation=tf.nn.leaky_relu, reuse=reuse, name='lstm_cells'), input_keep_prob=1.0, output_keep_prob=0.8) # Compute output list and final hidden state h_list, state = rnn.static_rnn(cells, x, dtype=tf.float32) # [None, 28] --> [None, label_count] h = dense(h_list[-1], self.label_count, reuse=reuse, name='c_dense_1') # Assign name to final output logits = tf.identity(h, name=name + "_logits") probs = tf.nn.sigmoid(logits, name=name + "_probs") return probs, logits
def _lstm_model(X, y): stacked_lstm = rnn.MultiRNNCell(lstm_cells(rnn_layers), state_is_tuple=True) x_ = tf.unstack(X, num=time_steps, axis=1) output, layers = rnn.static_rnn(stacked_lstm, x_, dtype=dtypes.float32) output = dnn_layers(output[-1], dense_layers) prediction, loss = tflearn.models.linear_regression(output, y) train_op = tf.contrib.layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), optimizer=optimizer, learning_rate=tf.train.exponential_decay( learning_rate, tf.contrib.framework.get_global_step(), decay_steps=1000, decay_rate=0.9, staircase=False, name=None)) print('learning_rate', learning_rate) return prediction, loss, train_op
def myrnn(x): time_steps = 28 output_size = 10 batch_size = 128 rnn_size = 28 x = tf.transpose(x, (1, 0, 2)) x = tf.reshape(x, (-1, rnn_size)) x = tf.split(x, time_steps, 0) # 创建一个Basic RNNCell myrnn = rnn.BasicRNNCell(rnn_size, activation=tf.nn.relu) zero_state = myrnn.zero_state(batch_size, dtype=tf.float32) outputs, states = rnn.static_rnn(myrnn, x, initial_state=zero_state, dtype=tf.float32) outputs = tf.convert_to_tensor(outputs[-1]) return tf.layers.dense(outputs, output_size, activation=tf.nn.relu, use_bias=True)
def recurrent_neural_network_model(x): # (input_data * weights) + biases --> biases adjust 0's from imput layer = { 'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])), 'biases': tf.Variable(tf.random_normal([n_classes])) } #linear algebra that manipulates the input data matrix x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, chunk_size]) x = tf.split(x, n_chunks, 0) lstm_cell = rnn.BasicLSTMCell(rnn_size) outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) output = tf.add(tf.matmul(outputs[-1], layer['weights']), layer['biases']) return output
def recurrentNeuralNetwork(x): layer = { 'weights': tf.Variable(tf.truncated_normal( [rnn_size, n_classes], stddev=0.1)), # passing the shape of the weight matrix 'biases': tf.Variable(tf.constant(0.1, shape=[n_classes])) } # biases to add after weights are multiplied x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, chunk_size]) x = tf.split(x, n_chunks, 0) lstm_cell = rnn.BasicLSTMCell(rnn_size) outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) output = tf.matmul(outputs[-1], layer['weights'] + layer['biases']) return output
def RNN(x, weights, biases, num_hidden, timesteps, useLSTM): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, timesteps, n_input) # Required shape: 'timesteps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input) x = tf.unstack(x, timesteps, 0) print("timesteps = ", timesteps) print("x shape = ", tf.shape(x)) print(x) # basic RNN or LSTM cell if (useLSTM == False): rnn_cell = tf.contrib.rnn.BasicRNNCell(num_hidden) else: rnn_cell = tf.contrib.rnn.LSTMCell(num_hidden) # Get lstm cell output outputs, states = rnn.static_rnn(rnn_cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def RNN(x, weights, biases): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshaping to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(x, n_steps, 0) # Define a lstm cell with tensorflow lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0) # Get lstm cell output outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) print(outputs) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def RNN(x, weights, biases): # reshape to [1, n_input] x = tf.reshape(x, [-1, n_input]) # Generate a n_input-element sequence of inputs # (eg. [had] [a] [general] -> [20] [6] [33]) x = tf.split(x, n_input, 1) rnn_cell = rnn.MultiRNNCell( [rnn.BasicLSTMCell(n_hidden), rnn.BasicLSTMCell(n_hidden)]) # Uncomment line below to test but comment out the 2-layer rnn.MultiRNNCell above #rnn_cell = rnn.BasicLSTMCell(n_hidden) # generate prediction outputs, states = rnn.static_rnn(rnn_cell, x, dtype=tf.float32) # there are n_input outputs but # we only want the last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def recurrent_neural_network(x, cv_i): layer = { 'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])), 'biases': tf.Variable(tf.random_normal([n_classes])) } x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, chunk_size]) x = tf.split(x, n_chunks, 0) # Define a lstm cell with tensorflow with tf.variable_scope('cell_def' + str(cv_i)): lstm_cell = rnn.BasicLSTMCell(rnn_size) # Get lstm cell output with tf.variable_scope('rnn_def' + str(cv_i)): outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) output = tf.matmul(outputs[-1], layer['weights']) + layer['biases'] return output
def model(X, W, B, lstm_size): # X, input shape: (batch_size, time_step_size, input_vec_size) # XT shape: (time_step_size, batch_size, input_vec_size) # permute time_step_size and batch_size,[28, 128, 28] XT = tf.transpose(X, [1, 0, 2]) # XR shape: (time_step_size * batch_size, input_vec_size) # each row has input for each lstm cell (lstm_size=input_vec_size) XR = tf.reshape(XT, [-1, lstm_size]) # Each array shape: (batch_size, input_vec_size) # split them to time_step_size (28 arrays),shape = [(128, 28),(128, 28)...] X_split = tf.split(XR, time_step_size, 0) # Make lstm with lstm_size (each input vector size). num_units=lstm_size; forget_bias=1.0 lstm = rnn.BasicLSTMCell(lstm_size, forget_bias=1.0, state_is_tuple=True) # Get lstm cell output, time_step_size (28) arrays with lstm_size output: (batch_size, lstm_size) # rnn..static_rnn()的输出相应于每个timestep。假设仅仅关心最后一步的输出,取outputs[-1]就可以 # 时间序列上每个Cell的输出:[... shape=(128, 28)..] outputs, _states = rnn.static_rnn(lstm, X_split, dtype=tf.float32) # Linear activation # Get the last output # State size to initialize the stat return tf.matmul(outputs[-1], W) + B, lstm.state_size
def RNN(x, weights, biases): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, timesteps, n_input) # Required shape: 'timesteps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input) print() x = tf.unstack(x, timesteps, 1) # Define a lstm cell with tensorflow lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) x1 = np.array(x) print(x1.shape) # Get lstm cell output outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) x2 = np.array(outputs) print(x2.shape) print() # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def RNN(x, weights, biases): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, timesteps, n_input) # Required shape: 'timesteps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input) x = tf.unstack(x, timesteps, 1) # Define a lstm cell with tensorflow lstm_cell = tf.nn.rnn_cell.LSTMCell(num_hidden, forget_bias=1.0) # Get lstm cell output outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output ooo = tf.add(tf.matmul(outputs[-1], weights['out']), biases['out'], name='outt') YYY = tf.nn.softmax(ooo, name='Y1') return ooo
def _create_loss(self): ''' Risk estimation loss function. The output is the planed position we should hold to next day. The change rate of next day is self.y, so we loss two categories of money: - self.y * self.position is trade loss, cost * self.position is constant loss because of tax and like missing profit of buying national debt. Therefore, the loss function is formulated as: 100 * (- self.y * self.position + cost * self.position) = -100 * ((self.y - cost) * self.position) :return: ''' #with tf.device("/cpu:0"): xx = tf.unstack(self.x, self.step, 1) lstm_cell = rnn.LSTMCell(self.hidden_size, forget_bias=1.0) outputs, states = rnn.static_rnn(lstm_cell, xx, dtype=tf.float32) signal = tf.matmul(outputs[-1], self.weights['out']) + self.biases['out'] scope = "activation_batch_norm" norm_signal = self.batch_norm_layer(signal, scope=scope) # batch_norm(signal, 0.9, center=True, scale=True, epsilon=0.001, activation_fn=tf.nn.relu6, # is_training=is_training, scope="activation_batch_norm", reuse=False) self.position = tf.nn.relu6(norm_signal) / 6. self.avg_position = tf.reduce_mean(self.position) # self.cost = 0.0002 self.loss = -100. * tf.reduce_mean(tf.multiply((self.y - self.cost), self.position, name="estimated_risk"))
def RNN(x): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, timesteps, n_input) # Required shape: 'timesteps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input) x = tf.unstack(x, timesteps, 1) # Define a lstm cell with tensorflow lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) # Get lstm cell output outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) output_layer = tf.contrib.layers.fully_connected(outputs[-1], num_classes, activation_fn=None) # Linear activation, using rnn inner loop last output return output_layer
def RNN(x, weights, biases): # reshape to [1, n_input] x = tf.reshape(x, [-1, n_input]) # Generate a n_input-element sequence of inputs # (eg. [had] [a] [general] -> [20] [6] [33]) x = tf.split(x,n_input,1) # 2-layer LSTM, each layer has n_hidden units. # Average Accuracy= 95.20% at 50k iter rnn_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(n_hidden),rnn.BasicLSTMCell(n_hidden)]) # 1-layer LSTM with n_hidden units but with lower accuracy. # Average Accuracy= 90.60% 50k iter # Uncomment line below to test but comment out the 2-layer rnn.MultiRNNCell above # rnn_cell = rnn.BasicLSTMCell(n_hidden) # generate prediction outputs, states = rnn.static_rnn(rnn_cell, x, dtype=tf.float32) # there are n_input outputs but # we only want the last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def rnn_seq2seq(encoder_inputs, decoder_inputs, encoder_cell, decoder_cell=None, dtype=dtypes.float32, scope=None): """RNN Sequence to Sequence model. Args: encoder_inputs: List of tensors, inputs for encoder. decoder_inputs: List of tensors, inputs for decoder. encoder_cell: RNN cell to use for encoder. decoder_cell: RNN cell to use for decoder, if None encoder_cell is used. dtype: Type to initialize encoder state with. scope: Scope to use, if None new will be produced. Returns: List of tensors for outputs and states for training and sampling sub-graphs. """ with vs.variable_scope(scope or "rnn_seq2seq"): _, last_enc_state = rnn.static_rnn( encoder_cell, encoder_inputs, dtype=dtype) return rnn_decoder(decoder_inputs, last_enc_state, decoder_cell or encoder_cell)
def __init__(self, config, training=False): self.config = config self.time_batch_len = time_batch_len = config.time_batch_len self.input_dim = input_dim = config.input_dim hidden_size = config.hidden_size num_layers = config.num_layers dropout_prob = config.dropout_prob input_dropout_prob = config.input_dropout_prob cell_type = config.cell_type self.seq_input = \ tf.placeholder(tf.float32, shape=[self.time_batch_len, None, input_dim]) if (dropout_prob <= 0.0 or dropout_prob > 1.0): raise Exception("Invalid dropout probability: {}".format(dropout_prob)) if (input_dropout_prob <= 0.0 or input_dropout_prob > 1.0): raise Exception("Invalid input dropout probability: {}".format(input_dropout_prob)) # setup variables with tf.variable_scope("rnnlstm"): output_W = tf.get_variable("output_w", [hidden_size, input_dim]) output_b = tf.get_variable("output_b", [input_dim]) self.lr = tf.constant(config.learning_rate, name="learning_rate") self.lr_decay = tf.constant(config.learning_rate_decay, name="learning_rate_decay") def create_cell(input_size): if cell_type == "vanilla": cell_class = rnn_cell.BasicRNNCell elif cell_type == "gru": cell_class = rnn_cell.BasicGRUCell elif cell_type == "lstm": cell_class = rnn_cell.BasicLSTMCell else: raise Exception("Invalid cell type: {}".format(cell_type)) cell = cell_class(hidden_size, input_size = input_size) if training: return rnn_cell.DropoutWrapper(cell, output_keep_prob = dropout_prob) else: return cell if training: self.seq_input_dropout = tf.nn.dropout(self.seq_input, keep_prob = input_dropout_prob) else: self.seq_input_dropout = self.seq_input self.cell = rnn_cell.MultiRNNCell( [create_cell(input_dim)] + [create_cell(hidden_size) for i in range(1, num_layers)]) batch_size = tf.shape(self.seq_input_dropout)[0] self.initial_state = self.cell.zero_state(batch_size, tf.float32) inputs_list = tf.unstack(self.seq_input_dropout) # rnn outputs a list of [batch_size x H] outputs outputs_list, self.final_state = rnn.static_rnn(self.cell, inputs_list, initial_state=self.initial_state) outputs = tf.stack(outputs_list) outputs_concat = tf.reshape(outputs, [-1, hidden_size]) logits_concat = tf.matmul(outputs_concat, output_W) + output_b logits = tf.reshape(logits_concat, [self.time_batch_len, -1, input_dim]) # probabilities of each note self.probs = self.calculate_probs(logits) self.loss = self.init_loss(logits, logits_concat) self.train_step = tf.train.RMSPropOptimizer(self.lr, decay = self.lr_decay) \ .minimize(self.loss)
out_weights = tf.Variable(tf.random_normal([num_units, n_classes])) out_bias = tf.Variable(tf.random_normal([n_classes])) # defining placeholders # input image placeholder x = tf.placeholder("float", [None, time_steps, n_input]) # input label placeholder y = tf.placeholder("float", [None, n_classes]) # processing the input tensor from [batch_size,n_steps,n_input] to "time_steps" number of [batch_size,n_input] tensors input = tf.unstack(x, time_steps, 1) # defining the network lstm_layer = rnn.BasicLSTMCell(num_units, forget_bias=1) outputs, _ = rnn.static_rnn(lstm_layer, input, dtype="float32") # converting last output of dimension [batch_size,num_units] to [batch_size,n_classes] by out_weight multiplication prediction = tf.matmul(outputs[-1], out_weights) + out_bias # loss_function loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)) # optimization opt = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss) # model evaluation correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # initialize variables init = tf.global_variables_initializer() with tf.Session() as sess:
def bidirectional_rnn(cell_fw, cell_bw, inputs, initial_state_fw=None, initial_state_bw=None, dtype=None, sequence_length=None, scope=None): """Creates a bidirectional recurrent neural network. Similar to the unidirectional case (rnn) but takes input and builds independent forward and backward RNNs with the final forward and backward outputs depth-concatenated, such that the output will have the format [time][batch][cell_fw.output_size + cell_bw.output_size]. The input_size of forward and backward cell must match. The initial state for both directions is zero by default (but can be set optionally) and no intermediate states are ever returned -- the network is fully unrolled for the given (passed in) length(s) of the sequence(s) or completely unrolled if length(s) is not given. Args: cell_fw: An instance of RNNCell, to be used for forward direction. cell_bw: An instance of RNNCell, to be used for backward direction. inputs: A length T list of inputs, each a tensor of shape [batch_size, cell.input_size]. initial_state_fw: (optional) An initial state for the forward RNN. This must be a tensor of appropriate type and shape [batch_size x cell.state_size]. initial_state_bw: (optional) Same as for initial_state_fw. dtype: (optional) The data type for the initial state. Required if either of the initial states are not provided. sequence_length: (optional) An int64 vector (tensor) of size [batch_size], containing the actual lengths for each of the sequences. scope: VariableScope for the created subgraph; defaults to "BiRNN" Returns: A pair (outputs, state) where: outputs is a length T list of outputs (one for each input), which are depth-concatenated forward and backward outputs state is the concatenated final state of the forward and backward RNN Raises: TypeError: If "cell_fw" or "cell_bw" is not an instance of RNNCell. ValueError: If inputs is None or an empty list. """ if not isinstance(cell_fw, contrib_rnn.RNNCell): raise TypeError('cell_fw must be an instance of RNNCell') if not isinstance(cell_bw, contrib_rnn.RNNCell): raise TypeError('cell_bw must be an instance of RNNCell') if not isinstance(inputs, list): raise TypeError('inputs must be a list') if not inputs: raise ValueError('inputs must not be empty') name = scope or 'BiRNN' # Forward direction with vs.variable_scope(name + '_FW'): output_fw, state_fw = contrib_rnn.static_rnn( cell_fw, inputs, initial_state_fw, dtype, sequence_length) # Backward direction with vs.variable_scope(name + '_BW'): tmp, state_bw = contrib_rnn.static_rnn( cell_bw, _reverse_seq(inputs, sequence_length), initial_state_bw, dtype, sequence_length) output_bw = _reverse_seq(tmp, sequence_length) # Concat each of the forward/backward outputs outputs = [array_ops_.concat(1, [fw, bw]) for fw, bw in zip(output_fw, output_bw)] return outputs, array_ops_.concat(1, [state_fw, state_bw])
tf.placeholder(tf.float32, (None,) + (size if isinstance(size, tuple) else (size,))) for size in cell.state_size ) for cell in net.cells ) + (tf.placeholder(tf.float32, (None, output_size)),) # out, finstate = rnn.dynamic_rnn(net, (x, y, modulation), initial_state=state, dtype=tf.float32) x_list = tf.unstack(x, num_steps, 1) y_list = tf.unstack(y, num_steps, 1) modulation_list = tf.unstack(modulation, num_steps, 1) out, finstate = rnn.static_rnn(net, zip(x_list, y_list, modulation_list), initial_state=state, dtype=tf.float32) optimizer = tf.train.AdamOptimizer(0.005) # optimizer = tf.train.GradientDescentOptimizer(10.0) grads_and_vars = [] for li, s in enumerate(finstate[:-1]): grads_and_vars += [ (tf.reduce_mean(s[5], 0)/num_steps, net.cells[li]._params[0]), (tf.reduce_mean(s[6], 0)/num_steps, net.cells[li]._params[1]), ] # if li < len(net.cells)-1: # grads_and_vars.append( # (tf.reduce_mean(s[7], 0)/num_steps, net.cells[li]._params[2]),
def act_step(self,batch_mask,prob_compare,prob,counter,state,input,acc_outputs,acc_states): ''' General idea: generate halting probabilites and accumulate them. Stop when the accumulated probs reach a halting value, 1-eps. At each timestep, multiply the prob with the rnn output/state. There is a subtlety here regarding the batch_size, as clearly we will have examples halting at different points in the batch. This is dealt with using logical masks to protect accumulated probabilities, states and outputs from a timestep t's contribution if they have already reached 1 - es at a timstep s < t. On the last timestep for each element in the batch the remainder is multiplied with the state/output, having been accumulated over the timesteps, as this takes into account the epsilon value. ''' # If all the probs are zero, we are seeing a new input => binary flag := 1, else 0. binary_flag = tf.cond(tf.reduce_all(tf.equal(prob, 0.0)), lambda: tf.ones([self.batch_size, 1], dtype=tf.float32), lambda: tf.zeros([self.batch_size, 1], tf.float32)) input_with_flags = tf.concat([binary_flag, input], 1) if self._state_is_tuple: (c, h) = tf.split(state, 2, 1) state = tf.contrib.rnn.LSTMStateTuple(c, h) output, new_state = static_rnn(cell=self.cell, inputs=[input_with_flags], initial_state=state, scope=type(self.cell).__name__) if self._state_is_tuple: new_state = tf.concat(new_state, 1) with tf.variable_scope('sigmoid_activation_for_pondering'): p = tf.squeeze(tf.layers.dense(new_state, 1, activation=tf.sigmoid, use_bias=True), squeeze_dims=1) # Multiply by the previous mask as if we stopped before, we don't want to start again # if we generate a p less than p_t-1 for a given example. new_batch_mask = tf.logical_and(tf.less(prob + p, self.one_minus_eps), batch_mask) new_float_mask = tf.cast(new_batch_mask, tf.float32) # Only increase the prob accumulator for the examples # which haven't already passed the threshold. This # means that we can just use the final prob value per # example to determine the remainder. prob += p * new_float_mask # This accumulator is used solely in the While loop condition. # we multiply by the PREVIOUS batch mask, to capture probabilities # that have gone over 1-eps THIS iteration. prob_compare += p * tf.cast(batch_mask, tf.float32) # Only increase the counter for those probabilities that # did not go over 1-eps in this iteration. counter += new_float_mask # Halting condition (halts, and uses the remainder when this is FALSE): # If any batch element still has both a prob < 1 - epsilon AND counter < N we # continue, using the outputed probability p. counter_condition = tf.less(counter, self.max_computation) final_iteration_condition = tf.logical_and(new_batch_mask, counter_condition) use_remainder = tf.expand_dims(1.0 - prob, -1) use_probability = tf.expand_dims(p, -1) update_weight = tf.where(final_iteration_condition, use_probability, use_remainder) float_mask = tf.expand_dims(tf.cast(batch_mask, tf.float32), -1) acc_state = (new_state * update_weight * float_mask) + acc_states acc_output = (output[0] * update_weight * float_mask) + acc_outputs return [new_batch_mask, prob_compare, prob, counter, new_state, input, acc_output, acc_state]
def model(self, x, y, weights, biases, training=True): # 注:以下的6是one_hot_word_vectors_dim # 取第一个样本的ABC encoder_inputs = tf.slice(x, [0, 0, 0], [1, self.max_seq_len, self.word_vec_dim]) # shape=(1, 8, 128) # 展开成2-D Tensor encoder_inputs = tf.unstack(encoder_inputs, self.max_seq_len, 1) # [<tf.Tensor shape=(1, 128)>,...] 内含8个Tensor # 取第一个样本的<EOS>WXYZ decoder_inputs = tf.slice(x, [0, self.max_seq_len, 0], [1, self.max_seq_len, self.word_vec_dim]) # shape=(1, 8, 128) decoder_inputs = decoder_inputs[0] # shape=(8, 128) # 转成解码器的输入输出形状 decoder_inputs = tf.matmul(decoder_inputs, weights['enc2dec']) + biases['enc2dec'] # 展开成2-D Tensor decoder_inputs = tf.unstack([decoder_inputs], axis=1) # [<tf.Tensor shape=(1, 6)>,...] 内含8个Tensor # 取第一个样本的WXYZ target_outputs = tf.slice(y, [0, 0, 0], [1, self.max_seq_len, self.one_hot_word_vectors_dim]) # shape=(1, 8, 6) target_outputs = target_outputs[0] # shape=(8, 6) # 构造网络结构:两层结构 encoder_layer1 = rnn.BasicLSTMCell(self.n_hidden, forget_bias=1.0) encoder_layer2 = rnn.BasicLSTMCell(self.n_hidden, forget_bias=1.0) decoder_layer1 = rnn.BasicLSTMCell(self.n_hidden, forget_bias=1.0) decoder_layer2 = rnn.BasicLSTMCell(self.n_hidden, forget_bias=1.0) # 输入是8个shape=(1, 128)的Tensor,输出是8个shape=(1, 1000)的Tensor encoder_layer1_outputs, encoder_layer1_states = rnn.static_rnn(encoder_layer1, encoder_inputs, dtype=tf.float32, scope='encoder_layer1') # 输入是8个shape=(1, 1000)的Tensor,输出是8个shape=(1, 1000)的Tensor encoder_layer2_outputs, encoder_layer2_states = rnn.static_rnn(encoder_layer2, encoder_layer1_outputs, dtype=tf.float32, scope='encoder_layer2') # 取解码器输入的<EOS> # 输入是1个shape=(1, 6)的Tensor(<EOS>),输出是1个shape=(1, 1000)的Tensor decoder_layer1_outputs, decoder_layer1_states = rnn.static_rnn(decoder_layer1, decoder_inputs[:1], initial_state=encoder_layer1_states, dtype=tf.float32, scope='decoder_layer1') # 输入是1个shape=(1, 1000)的Tensor,输出是1个shape=(1, 1000)的Tensor decoder_layer2_outputs, decoder_layer2_states = rnn.static_rnn(decoder_layer2, decoder_layer1_outputs, initial_state=encoder_layer2_states, dtype=tf.float32, scope='decoder_layer2') decoder_layer2_outputs_combine = [] decoder_layer2_outputs_combine.append(decoder_layer2_outputs) for i in range(self.max_seq_len - 1): decoder_layer2_outputs = tf.unstack(decoder_layer2_outputs, axis=1)[0] decoder_layer2_outputs = tf.matmul(decoder_layer2_outputs, weights['hid2tar']) + biases['hid2tar'][i] # 输入是1个shape=(1, 6)的Tensor,输出是1个shape=(1, 1000)的Tensor if training: decoder_layer1_outputs, decoder_layer1_states = rnn.static_rnn(decoder_layer1, decoder_inputs[i+1:i+2], initial_state=decoder_layer1_states, dtype=tf.float32, scope='decoder_layer1') else: decoder_layer1_outputs, decoder_layer1_states = rnn.static_rnn(decoder_layer1, [decoder_layer2_outputs], initial_state=decoder_layer1_states, dtype=tf.float32, scope='decoder_layer1') # 输入是1个shape=(1, 1000)的Tensor,输出是1个shape=(1, 1000)的Tensor decoder_layer2_outputs, decoder_layer2_states = rnn.static_rnn(decoder_layer2, decoder_layer1_outputs, initial_state=decoder_layer2_states, dtype=tf.float32, scope='decoder_layer2') decoder_layer2_outputs_combine.append(decoder_layer2_outputs) # 下面的过程把8个shape=(1, 1000)的数组转成8个shape=(1, 1000)的Tensor decoder_layer2_outputs_combine = tf.unstack(decoder_layer2_outputs_combine, axis=1)[0] decoder_layer2_outputs_combine = tf.unstack(decoder_layer2_outputs_combine, axis=1)[0] decoder_layer2_outputs_combine = tf.unstack([decoder_layer2_outputs_combine], axis=1) # 重新对decoder_layer2_outputs赋值 decoder_layer2_outputs = decoder_layer2_outputs_combine decoder_layer2_outputs = tf.unstack(decoder_layer2_outputs, axis=1)[0] # shape=(8, 1000) decoder_layer2_outputs = tf.matmul(decoder_layer2_outputs, weights['hid2tar']) + biases['hid2tar'] # shape=(8, 6) cost = tf.losses.mean_squared_error(decoder_layer2_outputs, target_outputs) optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(cost) return optimizer, cost, decoder_layer2_outputs
def _build_network(self, dropout): # Legend for tensor shapes below: # B := batch size # C := number of classes # H := number of hidden units (aka layer size) # S := sequence length # keep a reference to _config to make code below simpler config = self._config # Create size BxS input and target placeholder tensors # These will be filled in with actual values at session runtime data_dims = [self._batch_size, self._seq_len] self._input_ids = tf.placeholder(tf.int32, data_dims) self._target_ids = tf.placeholder(tf.int64, data_dims) # Create an embedding tensor to represent integer inputs into H dimensions # This must be done on the CPU, according to: # https://github.com/tensorflow/tensorflow/blob/r0.7/tensorflow/examples/tutorials/word2vec/word2vec_basic.py#L143 # (Ops and variables pinned to the CPU because of missing GPU implementation) with tf.device("/cpu:0"): # embeddings is a CxH tensor embeddings = tf.get_variable('embeddings', [config.num_classes, config.num_hidden]) # embedded is a BxSxH tensor embedded = tf.nn.embedding_lookup(embeddings, self._input_ids) # sequences is a list of length S containing Bx1xH tensors sequences = tf.split(embedded, self._seq_len, 1) # perform a "squeeze" on each item in the sequence list # inputs is a list of length S containing BxH tensors inputs = [tf.squeeze(seq, [1]) for seq in sequences] # create LSTM cell and stack cell = BasicLSTMCell(config.num_hidden) if dropout > 0: keep_prob = 1 - dropout cell = DropoutWrapper(cell, output_keep_prob=keep_prob) self._stack = MultiRNNCell([cell]*config.num_layers) self._state = self._stack.zero_state(self._batch_size, tf.float32) # Pump the inputs through the RNN layers # outputs is a list of length S containing BxH tensors outputs, self._state = static_rnn(self._stack, inputs, initial_state=self._state) # assert len(outputs) == self._seq_len #assert outputs[0].get_shape() == (self._batch_size, config.num_hidden), outputs[0].get_shape() # Softmax weight tensor is HxC W_soft = tf.get_variable('W_soft', [config.num_hidden, config.num_classes]) # Softmax bias tensor is Cx1 b_soft = tf.get_variable('b_soft', [config.num_classes]) # Reshape the output so that we can use it with the softmax weights and bias: # - concat makes list into a BxSH tensor, # - reshape converts the BxSH tensor into a BSxH tensor output = tf.reshape(tf.concat(outputs, 1), [-1, config.num_hidden]) #assert output.get_shape() == (self._batch_size*self._seq_len, config.num_hidden), output.get_shape() # logits is a (BSxH).(HxC) + 1xC = BSxC + 1xC = BSxC tensor logits = tf.nn.xw_plus_b(output, W_soft, b_soft) #assert logits.get_shape() == (self._batch_size*self._seq_len, config.num_classes), logits.get_shape() # probs is a BSxC tensor, with entry (i,j) containing the probability that batch i is class j self._probs = tf.nn.softmax(logits) #assert self._probs.get_shape() == (self._batch_size*self._seq_len, config.num_classes), self._probs.get_shape() # targets is a BSx1 tensor targets = tf.reshape(self._target_ids, [self._batch_size*self._seq_len]) # cross_entropy is a BSx1 tensor cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets) #assert cross_entropy.get_shape() == (self._batch_size*self._seq_len) # loss is a scalar containing the mean of cross_entropy losses self._loss = tf.reduce_mean(cross_entropy)