def __init__(self, name: str, env, temp=0.1): """ :param name: string :param env: gym env :param temp: temperature of boltzmann distribution """ ob_space = env.observation_space act_space = env.action_space # policy_state = tf.placeholder(tf.float32, [1, 256], name='pi_state') # value_state = tf.placeholder(tf.float32, [1, 256],name='v_state') with tf.variable_scope(name): self.obs = tf.placeholder(dtype=tf.float32, shape=[None] + list(ob_space.shape), name='obs') rnn_in = tf.expand_dims(self.obs, [0]) with tf.variable_scope('policy_net'): gru_cell = WeightedNormGRUCell( 256, activation=nn.relu, kernel_initializer=tf.initializers.orthogonal(), bias_initializer=tf.zeros_initializer()) outputs, states = nn.dynamic_rnn(gru_cell, inputs=rnn_in, dtype=tf.float32) outputs = tf.reshape(outputs, [-1, 256]) self.act_probs = dense(outputs, act_space.n, nonlinearity=None) self.policy_states = states with tf.variable_scope('value_net'): gru_cell = WeightedNormGRUCell( 256, activation=nn.relu, kernel_initializer=tf.initializers.orthogonal(), bias_initializer=tf.zeros_initializer()) outputs, states = nn.dynamic_rnn(gru_cell, inputs=rnn_in, dtype=tf.float32) outputs = tf.reshape(outputs, [-1, 256]) self.v_preds = tf.layers.dense( outputs, units=1, activation=None, kernel_initializer=tf.glorot_normal_initializer(), bias_initializer=tf.zeros_initializer()) self.value_states = states self.act_stochastic = tf.multinomial(tf.nn.log_softmax( self.act_probs), num_samples=1) self.act_stochastic = tf.reshape(self.act_stochastic, shape=[-1]) self.act_deterministic = tf.argmax(self.act_probs, axis=1) self.scope = tf.get_variable_scope().name
def __call__(self, inputs, batch_sz): pr_shape = lambda var: print(var.shape) if self.rnntype == "GRU": print("rnntype: " + self.rnntype) cell = nn.rnn_cell.GRUCell(self.n_hidden) else: print("rnntype: " + self.rnntype) cell = nn.rnn_cell.LSTMCell(self.n_hidden) initial_state = cell.zero_state(batch_sz, tf.float32) # initial_state = cell.zero_state(inputs.shape[0], tf.float32) # dynamic_rnn inputs shape = [batch_size, max_time, ...] # outs shape = [batch_size, max_time, cell.output_size] # states shape = [batch_size, cell.state_size] # n_step = int(inputs.shape[1]) # n_step outs, states = nn.dynamic_rnn(cell, inputs, initial_state=initial_state, dtype=tf.float32) print('outs shape: ') pr_shape(outs) # (batch_sz, max_time, n_hidden) # final_state = states[-1] # print('states shape: ') pr_shape(states) # (batch_sz, n_hidden) FC = tl.Dense(self.n_classes, use_bias=True, kernel_initializer=tc.layers.xavier_initializer( tf.float32)) outs = FC(states) return outs
def _build_sum(self, cell): """generate user memory states from behavior sequence Param: an initiazlied sum cell Returns: obj: a flatten representation of user memory states, in the shape of (BatchSize, SlotsNum x HiddenSize) """ hparams = self.hparams with tf.variable_scope("sum"): self.mask = self.iterator.mask self.sequence_length = tf.reduce_sum(self.mask, 1) rum_outputs, final_state = dynamic_rnn( cell, inputs=self.history_embedding, dtype=tf.float32, sequence_length=self.sequence_length, scope="sum", initial_state=cell.zero_state( tf.shape(self.history_embedding)[0], tf.float32), ) final_state = final_state[:, :hparams.slots * hparams.hidden_size] self.heads = cell.heads self.alpha = cell._alpha self.beta = cell._beta tf.summary.histogram("SUM_outputs", rum_outputs) return final_state
def _forward_score(self): """pass""" with tf.variable_scope("fuzzy_crf_forward"): first_fea = tf.squeeze( tf.slice(self.token_vec_place, [0, 0, 0], [-1, 1, -1]), [1]) first_tag = tf.squeeze( tf.slice(self.tag_place, [0, 0, 0], [-1, 1, -1]), [1]) # [batch, num_tag] first_state = tf.multiply(first_fea, tf.cast(first_tag, tf.float32)) # [batch, num_tag] # =========== rest_fea = tf.slice(self.token_vec_place, [0, 1, 0], [-1, -1, -1]) rest_tag = tf.slice(self.tag_place, [0, 1, 0], [-1, -1, -1]) rest_unk = self._sinpath_mask( ) # [batch, max_seq_len - 1, num_tag, num_tag] forward_cell = fuzzyCrfForwardCell(self.transitions) sequence_lengths_less_one = tf.maximum( tf.constant(0, dtype=self.seq_len_place.dtype), self.seq_len_place - 1) _, scores = dynamic_rnn(cell=forward_cell, inputs=(rest_fea, rest_tag, rest_unk), sequence_length=sequence_lengths_less_one, initial_state=first_state, dtype=tf.float32) self.forward_score = tf.reduce_logsumexp(scores, [1]) # [batch]
def prediction(self): num_units = [self._num_RNN, self._num_RNN] cells = [nn.rnn_cell.GRUCell(n) for n in num_units] stacked_rnn = tf.contrib.rnn.MultiRNNCell(cells) # Recurrent network. output_RNN, _ = nn.dynamic_rnn( stacked_rnn, self.data, dtype=tf.float32, sequence_length=self.length, ) batch_size = tf.shape(output_RNN)[0] max_length = int(output_RNN.get_shape()[1]) output_size = int(output_RNN.get_shape()[2]) output_reshape = tf.reshape(output_RNN, [-1, output_size]) weight_l1, bias_l1 = self._weight_and_bias(self._num_RNN, self.layer1) output_l1 = tf.nn.tanh(tf.matmul(output_reshape, weight_l1) + bias_l1) output_drop_out_l1 = tf.nn.dropout(output_l1, 0.2, seed=47) output_drop_out_reshape_l1 = tf.reshape( output_drop_out_l1, [batch_size, max_length, self.layer1]) last = self._last_relevant(output_drop_out_reshape_l1, self.length) weight_class, bias_class = self._weight_and_bias( self.layer1, int(self.target.get_shape()[1])) # Softmax layer. prediction = tf.nn.softmax(tf.matmul(last, weight_class) + bias_class) return prediction, weight_l1, bias_l1, weight_class, bias_class
def build_cell(cell_input, input_len, batch_size): cell = nn.rnn_cell.MultiRNNCell( [nn.rnn_cell.BasicLSTMCell(LSTMSIZE) for _ in range(LSTMNUM)]) state_input = cell.zero_state(batch_size, dtype=tf.float32) output, state_output = nn.dynamic_rnn(cell, cell_input, input_len, state_input) return output, state_input, state_output
def build_rnn(cell, input, input_len, batch_size, name): state_input = cell.zero_state(batch_size, dtype=tf.float32) output, state_output = nn.dynamic_rnn(cell, input, input_len, state_input, scope=name) return output, state_input, state_output
def GRULayer(input_tensor, num_layers=1): input_tensor = tf.reshape( input_tensor, shape=[-1, ConfigUtil.seq_length, ConfigUtil.hidden_size]) cell = GRUCell(num_units=128, kernel_initializer=create_initializer()) cell = DropoutWrapper(cell, output_keep_prob=(1 - ConfigUtil.dropout_prob)) cell = MultiRNNCell([cell] * num_layers) if num_layers > 1 else cell outputs, state = dynamic_rnn(cell, input_tensor, dtype=tf.float32) return outputs
def build_encoder(input, input_len, embedding, batch_size, training): with tf.variable_scope('encoder_pre'): input = build_preprocess(input, embedding, training) cell = build_cell(training) state_input = cell.zero_state(batch_size, dtype=tf.float32) output, state_output = nn.dynamic_rnn(cell, input, input_len, state_input, scope='encoder') return output, state_input, state_output
def decode(self, fea_vec, transitions, seq_len): """pass""" forward_cell = CrfDecodeForwardRnnCell(transitions) first_vec = tf.squeeze(tf.slice(fea_vec, [0, 0, 0], [-1, 1, -1]), [1]) # [batch, num_tag] rest_vec = tf.squeeze(tf.slice( fea_vec, [0, 1, 0], [-1, -1, -1])) # [batch, len - 1, num_tag] sequence_lengths_less_one = tf.maximum( tf.constant(0, dtype=seq_len.dtype), seq_len - 1) backpointers, scores = dynamic_rnn( cell=forward_cell, inputs=rest_vec, sequence_length=sequence_lengths_less_one, initial_state=first_vec, dtype=tf.int32) backpointers = tf.reverse_sequence( backpointers, sequence_lengths_less_one, seq_dim=1) # [batch, len - 1, num_tag] # =================== num_tags = tf.dimension_value(tf.shape(transitions)[1]) backward_cell = CrfDecodeBackwardRnnCell(num_tags) init_state = tf.cast(tf.argmax(scores, axis=1), dtype=tf.int32) init_state = tf.expand_dims(init_state, axis=1) # [batch, 1] decode_tags, _ = dynamic_rnn(cell=backward_cell, inputs=backpointers, sequence_length=sequence_lengths_less_one, dtype=tf.int32) decode_tags = tf.squeeze(decode_tags, axis=[-1]) # [batch, len - 1] decode_tags = tf.concat([init_state, decode_tags], axis=1) # [batch, len] decode_tags = tf.reverse_sequence(decode_tags, seq_len, seq_dim=1) best_score = tf.reduce_max(scores, axis=[1]) return decode_tags, best_score
def build_cell(cell_input, input_len, batch_size, training): cell = [nn.rnn_cell.BasicLSTMCell(LSTMSIZE) for _ in range(LSTMNUM)] if training: cell = [ nn.rnn_cell.DropoutWrapper(c, output_keep_prob=DROPOUT) for c in cell ] cell = nn.rnn_cell.MultiRNNCell(cell) state_input = cell.zero_state(batch_size, dtype=tf.float32) output, state_output = nn.dynamic_rnn(cell, cell_input, input_len, state_input) return output, state_input, state_output
def forward(self, x, computation_mode=MakiRestorable.INFERENCE_MODE): if self._dynamic: dynamic_x = dynamic_rnn(self._cell, x, dtype=tf.float32) # hidden states, (last candidate value, last hidden state) hs, (c_last, h_last) = dynamic_x return hs, c_last, h_last else: unstack_x = tf.unstack(x, axis=1) static_x = static_rnn(self._cell, unstack_x, dtype=tf.float32) hs_list, (c_last, h_last) = static_x hs = tf.stack(hs_list, axis=1) return hs, c_last, h_last
def prediction(self): # Recurrent network. output_RNN, _ = nn.dynamic_rnn( nn.rnn_cell.GRUCell(self._num_RNNs), self.data, dtype=tf.float32, sequence_length=self.length, ) last = self._attention(output_RNN) weight, bias = self._weight_and_bias( self._num_RNNs, int(self.target.get_shape()[1])) # Softmax layer. prediction = tf.nn.softmax(tf.matmul(last, weight) + bias) return prediction
def prediction(self): # Recurrent network. output, _ = nn.dynamic_rnn( nn.rnn_cell.GRUCell(self._num_hidden), data, dtype=tf.float32, sequence_length=self.length, ) last = self._last_relevant(output, self.length) # Softmax layer. weight, bias = self._weight_and_bias(self._num_hidden, int(self.target.get_shape()[1])) prediction = tf.nn.softmax(tf.matmul(last, weight) + bias) return prediction
def __init__(self, nfeats, nlabels, hidden_size): super(rnn, self).__init__() self._nlabels = nlabels self._nfeats = nfeats self.input = (tf.placeholder(tf.float32, shape=(None, None, nfeats), name="data"), tf.placeholder(tf.int32, shape=(None, ), name="lengths")) self.y = tf.placeholder(tf.int32, shape=(None, self._nlabels)) cell = nn.rnn_cell.BasicRNNCell(hidden_size) _, self.state = nn.dynamic_rnn(cell, inputs=self.input[0], sequence_length=self.input[1], dtype=tf.float32) self.output = layers.dense(inputs=self.state, units=nlabels)
def forward(self, X, is_training=False): if self.cell_type == CellType.Bidir_Dynamic: return bidirectional_dynamic_rnn(cell_fw=self.cells, cell_bw=self.cells, inputs=X, dtype=tf.float32) elif self.cell_type == CellType.Bidir_Static: X = tf.unstack(X, num=self.seq_length, axis=1) return static_bidirectional_rnn(cell_fw=self.cells, cell_bw=self.cells, inputs=X, dtype=tf.float32) elif self.cell_type == CellType.Dynamic: return dynamic_rnn(self.cells, X, dtype=tf.float32) elif self.cell_type == CellType.Static: X = tf.unstack(X, num=self.seq_length, axis=1) return static_rnn(self.cells, X, dtype=tf.float32)
def __init__(self, num_classes, batch_size=64, num_steps= 50, sampling = False, num_layers = 2, lstm_size=128): # Testing/Training batch_size, num_steps = state(sampling, batch_size, num_steps) # Define inputs/outputs on graph self.inputs, self.targets, self.keep_prob = set_placeholders(batch_size, num_steps) #Define LSTM cells in model cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob) #one hot encoder x_one_hot = tf.one_hot(self.inputs, num_classes) #run outputs, state = dynamic_rnn(cell, x_one_hot, initial_state=self.initial_state) self.initial_state = state #output self.prediction, self.logits = build_output(outputs, lstm_size, num_classes)
def define_rnn(batch_in_tf, seq_lens_tf, n_sharpe, n_time, n_ftrs, n_markets, allow_shorting=True, equality=False): """ Define a neural net for the Linear regressor. Args: batch_in_tf (n_batch, n_time, n_ftrs): Input data. seq_lens_tf (n_batch): Lengths of each batch sequence. Pad with zeros afterwards. state_in_tf: Symbolic init state. Can be None or returned by the rnn. n_sharpe (float): How many position-outputs to compute. n_time (float): Number of timesteps for input data. n_ftrs (float): Number of input features. W (n_ftrs * (n_time-n_sharpe+1), n_markets): Weight matrix. b (n_markets): Biases. zero_thr (scalar): Set smaller weights to zero. Returns: positions (n_batch, n_sharpe, n_markets): Positions for each market. """ lstm_cell = tf_rnn.BasicLSTMCell(num_units=n_markets, state_is_tuple=True) cell_state = tf.placeholder(TF_DTYPE, [None, lstm_cell.state_size[0]]) hidden_state = tf.placeholder(TF_DTYPE, [None, lstm_cell.state_size[0]]) init_state = tf.contrib.rnn.LSTMStateTuple(cell_state, hidden_state) out, state_out_tf = tf_nn.dynamic_rnn(cell=lstm_cell, inputs=batch_in_tf, time_major=False, sequence_length=seq_lens_tf, initial_state=init_state, dtype=tf.float32) if allow_shorting: out = out / tf.reduce_sum(tf.abs(out), axis=2, keep_dims=True) else: out = tf.pow(out, 2) out = out / tf.reduce_sum(out, axis=2, keep_dims=True) return out, state_out_tf, (cell_state, hidden_state)
def _build_gru(self): """Apply a GRU for modeling. Returns: obj: The output of GRU section. """ with tf.name_scope("gru"): self.mask = self.iterator.mask self.sequence_length = tf.reduce_sum(self.mask, 1) self.history_embedding = tf.concat( [self.item_history_embedding, self.cate_history_embedding], 2) rnn_outputs, final_state = dynamic_rnn( GRUCell(self.hidden_size), inputs=self.history_embedding, sequence_length=self.sequence_length, dtype=tf.float32, scope="gru", ) tf.summary.histogram("GRU_outputs", rnn_outputs) return final_state
def _multi_seq_fn(): """Forward computation of alpha values.""" rest_of_input = tf.slice(inputs, [0, 1, 0], [-1, -1, -1]) # Compute the alpha values in the forward algorithm in order to get the # partition function. forward_cell = CrfForwardRnnCell(transition_params) # Sequence length is not allowed to be less than zero. sequence_lengths_less_one = tf.maximum( tf.constant(0, dtype=sequence_lengths.dtype), sequence_lengths - 1) _, alphas = dynamic_rnn(cell=forward_cell, inputs=rest_of_input, sequence_length=sequence_lengths_less_one, initial_state=first_input, dtype=tf.float32) log_norm = tf.reduce_logsumexp(alphas, [1]) # Mask `log_norm` of the sequences with length <= zero. log_norm = tf.where(tf.less_equal(sequence_lengths, 0), tf.zeros_like(log_norm), log_norm) return log_norm
def prediction(self): # Recurrent network. num_units = [self._num_RNN] cells = [nn.rnn_cell.GRUCell(n) for n in num_units] stacked_rnn = tf.contrib.rnn.MultiRNNCell(cells) output, _ = nn.dynamic_rnn( stacked_rnn, self.data, dtype=tf.float32, sequence_length=self.length, ) batch_size = tf.shape(output)[0] max_length = int(output.get_shape()[1]) output_size = int(output.get_shape()[2]) target_size = int(self.target.get_shape()[2]) output_reshape = tf.reshape(output, [-1, output_size]) weight, bias = self._weight_and_bias( self._num_RNN, target_size) # Tanh layer. prediction = self.ext * tf.nn.tanh(tf.matmul(output_reshape, weight) + bias) prediction_reshape = tf.reshape(prediction, [batch_size, max_length, target_size]) return prediction_reshape
def forward(self, x, computation_mode=MakiRestorable.INFERENCE_MODE): if self._cell_type == CellType.BIDIR_DYNAMIC: (outputs_f, outputs_b), (states_f, states_b) = \ bidirectional_dynamic_rnn(cell_fw=self._cells, cell_bw=self._cells, inputs=x, dtype=tf.float32) # Creation of the two MakiTensors for both `outputs_f` and `outputs_b` is inappropriate since # the algorithm that builds the computational graph does not consider such case and # therefore can not handle this situation, it will cause an error. self._cells_state = tf.concat([states_f, states_b], axis=-1) return tf.concat([outputs_f, outputs_b], axis=-1) elif self._cell_type == CellType.BIDIR_STATIC: x = tf.unstack(x, num=self._seq_length, axis=1) outputs_fb, states_f, states_b = \ static_bidirectional_rnn(cell_fw=self._cells, cell_bw=self._cells, inputs=x, dtype=tf.float32) self._cells_state = tf.concat([states_f, states_f], axis=-1) return outputs_fb elif self._cell_type == CellType.DYNAMIC: outputs, states = dynamic_rnn(self._cells, x, dtype=tf.float32) self._cells_state = states return outputs elif self._cell_type == CellType.STATIC: x = tf.unstack(x, num=self._seq_length, axis=1) outputs, states = static_rnn(self._cells, x, dtype=tf.float32) self._cells_state = states return tf.stack(outputs, axis=1)
def __init__(self, is_training=True): self.max_grad_norm = 5 self.learning_rate = 0.003 self.unit_lstm = 30 self.unit = 30 self.drop_rate = 0.7 self.batch_size = 191 #1:383 3:245 6:101 7:74 9:62 10:44 11:138 12:67 13:87 14:108 15:166 #1:191 3:122 6:202 7:148 9:124 10:88 11:138 12:67 13:87 14:108 15:166 self.length = 100 self.fea_dim = 9 self.raw = 4 self.num_sub = 3 self.input = tf.placeholder(tf.float32, [None, self.length, self.fea_dim]) self.sbp_label = tf.placeholder(tf.float32, [None, 1]) self.dbp_label = tf.placeholder(tf.float32, [None, 1]) self.domain = tf.placeholder(tf.int32, [None, self.num_sub]) self.l = tf.placeholder(tf.float32, []) self.train = tf.placeholder(tf.bool, []) with tf.variable_scope('feature_extractor'): inputs = tf.transpose(self.input, [1, 0, 2]) inner_cell = BasicLSTMCell(self.unit_lstm) outputs, final_state = dynamic_rnn(inner_cell, inputs, time_major=True, dtype=tf.float32) if is_training: keep_prob = tf.constant(self.drop_rate) else: keep_prob = tf.constant(1.0) outputs = tf.nn.dropout(outputs, keep_prob) #keep_prob = tf.constant(1.0) #idx = tf.range(self.batch_size)*tf.shape(outputs)[1] + (self.seq_len - 1) #output = tf.gather(tf.reshape(outputs, [-1, self.unit]), idx) outputs = tf.transpose(outputs, [1, 0, 2]) #batch, seq, hidden output = tf.slice(outputs, [0, self.length - 1, 0], [-1, 1, self.unit_lstm]) output = tf.squeeze(output, axis=1) output = tf.layers.dense(output, self.unit, activation=tf.nn.relu, name='shared_dense') with tf.variable_scope('label_predictor'): sbp_dense1 = tf.layers.dense(output, self.unit, activation=tf.nn.relu, name='sbp_dense1') dbp_dense1 = tf.layers.dense(output, self.unit, activation=tf.nn.relu, name='dbp_dense1') sbp_dense = tf.layers.dense(sbp_dense1, self.unit, activation=tf.nn.relu, name='sbp_dense2') dbp_dense = tf.layers.dense(dbp_dense1, self.unit, activation=tf.nn.relu, name='dbp_dense2') sbp_dense = tf.layers.dense(sbp_dense, self.unit, activation=tf.nn.relu, name='sbp_dense3') dbp_dense = tf.layers.dense(dbp_dense, self.unit, activation=tf.nn.relu, name='dbp_dense3') #sbp_dense4 = tf.layers.dense(sbp_dense3, 70, activation=tf.nn.relu, name='sbp_dense4') #dbp_dense4 = tf.layers.dense(dbp_dense3, 70, activation=tf.nn.relu, name='dbp_dense4') #sbp_dense = tf.layers.dense(sbp_dense4, 70, activation=tf.nn.relu, name='sbp_dense5') #dbp_dense = tf.layers.dense(dbp_dense4, 70, activation=tf.nn.relu, name='dbp_dense5') self.sbp = tf.layers.dense(sbp_dense, 1, name='sbp_out') self.dbp = tf.layers.dense(dbp_dense, 1, name='dbp_out') loss1 = tf.losses.mean_squared_error(self.sbp_label, self.sbp) loss2 = tf.losses.mean_squared_error(self.dbp_label, self.dbp) self.pred_losses = loss1 + loss2 with tf.variable_scope('domain_predictor'): feat = flip_gradient(output, self.l) dom = tf.layers.dense(feat, 30, activation=tf.nn.relu, name='domain1') dom = tf.layers.dense(dom, self.num_sub, name='domain2') self.domain_pred = tf.nn.softmax(dom) self.domain_losses = tf.nn.softmax_cross_entropy_with_logits( logits=self.domain_pred, labels=self.domain)
def __init__(self, **kwargs): '''The following arguments are accepted: Parameters ---------- vocab_size : int Size of the vocabulary for creating embeddings embedding_matrix : int Dimensionality of the embedding space memory_size : int LSTM memory size keep_prob : float Inverse of dropout percentage for embedding and LSTM subsequence_length : int Length of the subsequences (all embeddings are padded to this length) optimizer : OptimizerSpec ''' ############################################################################################ # Get all hyperparameters # ############################################################################################ vocab_size = kwargs['vocab_size'] embedding_size = kwargs['embedding_size'] memory_size = kwargs['memory_size'] keep_prob = kwargs['keep_prob'] subsequence_length = kwargs['subsequence_length'] optimizer_spec = kwargs['optimizer'] optimizer = optimizer_spec.create() self.learning_rate = optimizer_spec.learning_rate self.step_counter = optimizer_spec.step_counter ############################################################################################ # Net inputs # ############################################################################################ self.batch_size = placeholder(tf.int32, shape=[], name='batch_size') self.is_training = placeholder(tf.bool, shape=[], name='is_training') self.word_ids = placeholder(tf.int32, shape=(None, subsequence_length), name='word_ids') self.labels = placeholder(tf.int32, shape=(None, ), name='labels') self.hidden_state = placeholder(tf.float32, shape=(None, memory_size), name='hidden_state') self.cell_state = placeholder(tf.float32, shape=(None, memory_size), name='cell_state') lengths = sequence_lengths(self.word_ids) ############################################################################################ # Embedding # ############################################################################################ self.embedding_matrix, _bias = get_weights_and_bias( (vocab_size, embedding_size)) embeddings = cond( self.is_training, lambda: nn.dropout(nn.embedding_lookup( self.embedding_matrix, self.word_ids), keep_prob=keep_prob), lambda: nn.embedding_lookup(self.embedding_matrix, self.word_ids)) ############################################################################################ # LSTM layer # ############################################################################################ cell = BasicLSTMCell(memory_size, activation=tf.nn.tanh) # during inference, use entire ensemble keep_prob = cond(self.is_training, lambda: constant(keep_prob), lambda: constant(1.0)) cell = DropoutWrapper(cell, output_keep_prob=keep_prob) # what's the difference to just creating a zero-filled tensor tuple? self.zero_state = cell.zero_state(self.batch_size, tf.float32) state = LSTMStateTuple(h=self.cell_state, c=self.hidden_state) # A dynamic rnn creates the graph on the fly, so it can deal with embeddings of different # lengths. We do not need to unstack the embedding tensor to get rows, instead we compute # the actual sequence lengths and pass that # We are not sure how any of this works. Do we need to mask the cost function so the cell # outputs for _NOT_A_WORD_ inputs are ignored? Is the final cell state really relevant if it # was last updated with _NOT_A_WORD_ input? Does static_rnn absolve us of any of those # issues? outputs, self.state = nn.dynamic_rnn(cell, embeddings, sequence_length=lengths, initial_state=state) # Recreate tensor from list outputs = reshape(concat(outputs, 1), [-1, subsequence_length * memory_size]) self.outputs = reduce_mean(outputs) ############################################################################################ # Fully connected layer, loss, and training # ############################################################################################ ff1 = fully_connected(outputs, 2, with_activation=False, use_bias=True) loss = reduce_mean( nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=ff1)) self.train_step = optimizer.minimize(loss, global_step=self.step_counter) self.predictions = nn.softmax(ff1) correct_prediction = equal(cast(argmax(self.predictions, 1), tf.int32), self.labels) self.accuracy = reduce_mean(cast(correct_prediction, tf.float32)) ############################################################################################ # Create summaraies # ############################################################################################ with tf.variable_scope('summary'): self.summary_loss = tf.summary.scalar('loss', loss) self.summary_accuracy = tf.summary.scalar('accuracy', self.accuracy)
def Build_Im2txt(kwargs): ''' IM2TXT''' with tf.name_scope('IM2TXT'): with Builder(**kwargs) as im2txt_builder: ''' input_placeholder = tf.placeholder(tf.float32, \ shape=[None, kwargs['Image_width']*kwargs['Image_height']*kwargs['Image_cspace']], name='Input') #dropout_prob_placeholder = tf.placeholder(tf.float32, name='Dropout') #state_placeholder = tf.placeholder(tf.string, name="State") #input_reshape = im2txt_builder.Reshape_input(input_placeholder, width=kwargs['Image_width'], height=kwargs['Image_height'], colorspace= kwargs['Image_cspace']) #Redundant feature extractor already creates this placeholder ''' if kwargs['State'] is 'Train': input_seq_placeholder = tf.placeholder(tf.int32, shape=[None, kwargs['Padded_length']], name='Input_Seq') target_seq_placeholder = tf.placeholder(tf.int32, shape=[None, kwargs['Padded_length']], name='Target_Seq') elif kwargs['State'] is 'Test': input_seq_placeholder = tf.placeholder(tf.int32, shape=[None, 1], name='Input_Seq') target_seq_placeholder = tf.placeholder(tf.int32, shape=[None, 1], name='Target_Seq') mask_placeholder = tf.placeholder(tf.int32, shape=[None, kwargs['Padded_length']], name='Seq_Mask') Lstm_state_placeholder = tf.placeholder(tf.float32, shape=[]) ''' TODO: Get input_seq, mask and target seq from reader Init inception-resnet correctly and attach input from reader to input_placeholder of inception-resnet Understand and build deploy state Seperate implementation of loss and construction of network ''' #reader will give input seq, mask and target seq #show tell init initalizer = tf.random_uniform_initializer(minval=-0.08 , maxval=0.08) #Building feature extractor Build_Inception_Resnet_v2a(kwargs) #Extracting necessary variables from feature extractor with tf.name_scope('Feature_Extractor'): inception_output = tf.get_collection(kwargs['Model_name'] + '_Incepout')[0] inception_state = tf.get_collection(kwargs['Model_name'] + '_State')[0] inception_dropout = tf.get_collection(kwargs['Model_name'] + '_Dropout_prob_ph')[0] #Setting control params im2txt_builder.control_params(Dropout_control=inception_dropout, State=inception_state) #Image embeddings with tf.name_scope('Lstm_Embeddings'): image_embeddings = im2txt_builder.FC_layer(inception_output, filters=512) image_embeddings_size= tf.shape(image_embeddings) #Seq embeddings embeddings_map = tf.get_variable(name='Map', shape=[40,512], initializer=initalizer) seq_embeddings = tf.nn.embedding_lookup(embeddings_map, input_seq_placeholder) lstm_cell = im2txt_builder.Lstm_cell_LayerNorm() #lstm_cell = im2txt_builder.Lstm_cell(); #lstm_cell = im2txt_builder.Rnn_dropout(lstm_cell) with tf.variable_scope("lstm") as lstm_scope: zero_state = lstm_cell.zero_state(batch_size=image_embeddings_size[0], dtype=tf.float32) _, initial_stae = lstm_cell(image_embeddings, zero_state) lstm_scope.reuse_variables() if kwargs['State'] is 'Test': state_feed = tf.placeholder(dtype=tf.float32, shape=[None, sum(lstm_cell.state_size)], name='State_feed') state_tuple = tf.split(value=state_feed, num_or_size_splits=2, axis=1) lstm_outputs, state_tuple = lstm_cell(inputs = tf.squeeze(seq_embeddings, axis=[1]), state=state_tuple) concat_input = tf.concat(values= initial_stae, axis=1) concat_state = tf.concat(values=state_tuple, axis=1) elif kwargs['State'] is 'Train': sequence_length = tf.reduce_sum(mask_placeholder, 1) #Add sequence_mask lstm_outputs, _ =nn.dynamic_rnn(cell=lstm_cell, inputs=seq_embeddings, sequence_length=sequence_length, initial_state=initial_stae, dtype=tf.float32, scope=lstm_scope) with tf.name_scope('Lstm_output'): lstm_outputs = tf.reshape(lstm_outputs, [-1, lstm_cell.output_size]) logits = im2txt_builder.FC_layer(lstm_outputs, filters=40, readout=True) #Target seq and losses next with tf.name_scope('Lstm_loss'): if kwargs['State'] is 'Train': targets = tf.reshape(target_seq_placeholder, [-1]) #flattening target seqs weights = tf.to_float(tf.reshape(mask_placeholder, [-1])) with tf.name_scope('Softmax_CE_loss'): seq_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets, logits=logits) batch_loss = tf.div(tf.reduce_sum(tf.multiply(seq_loss, weights)), tf.maximum(tf.reduce_sum(weights),1)) tf.add_to_collection(kwargs['Model_name'] + '_Input_seq_ph', input_seq_placeholder) tf.add_to_collection(kwargs['Model_name'] + '_Output_ph', target_seq_placeholder) tf.add_to_collection(kwargs['Model_name'] + '_Mask_ph', mask_placeholder) tf.add_to_collection(kwargs['Model_name'] + '_Output', logits) if kwargs['State'] is 'Test': tf.add_to_collection(kwargs['Model_name'] + '_Initial_state', concat_input) tf.add_to_collection(kwargs['Model_name'] + '_Lstm_state_feed', state_feed) tf.add_to_collection(kwargs['Model_name'] + '_Lstm_state', concat_state) elif kwargs['State'] is 'Train': tf.add_to_collection(kwargs['Model_name'] + '_Loss', batch_loss) #Test output next return 'Sequence'
def fit(self, X, y, num_epochs, embedding_dims, V, K, hidden_dims, lr, beta1=0.95, beta2=0.95, batch_sz=32): X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.25) len_t = max(len(x) for x in X_train) len_v = max(len(x) for x in X_valid) X_train = pad_sequences(X_train, len_t) y_train = pad_sequences(y_train, len_t) X_valid = pad_sequences(X_valid, len_v) y_valid = pad_sequences(y_valid, len_v) We = np.random.randn(V, embedding_dims) Wo = np.random.randn(hidden_dims, K) bo = np.zeros(K) N = X_train.shape[0] tf_X = tf.placeholder(tf.int32, (None, None)) tf_y = tf.placeholder(tf.int32, (None, None)) self.tf_X = tf_X self.tf_y = tf_y tf_We = tf.Variable(We, dtype=tf.float32) tf_Wo = tf.Variable(Wo, dtype=tf.float32) tf_bo = tf.Variable(bo, dtype=tf.float32) emb = tf.nn.embedding_lookup(tf_We, tf_X) cell = LSTMCell(hidden_dims, activation=tf.nn.relu) outputs, states = dynamic_rnn(cell, emb, dtype=tf.float32) outputs = tf.reshape( outputs, (tf.shape(tf_X)[0] * tf.shape(tf_X)[1], hidden_dims)) logits = tf.matmul(outputs, tf_Wo) + tf_bo labels = tf.reshape(tf_y, [-1]) cost = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits)) train_op = tf.train.AdamOptimizer(learning_rate=lr, beta1=beta1, beta2=beta2).minimize(cost) predictions = tf.argmax(logits, 1) predictions = tf.reshape(predictions, (tf.shape(tf_X)[0], tf.shape(tf_X)[1])) self.predictions = predictions num_batches = N // batch_sz t_costs = [] v_costs = [] init = tf.global_variables_initializer() self.session.run(init) for epoch in range(num_epochs): t0 = datetime.now() X_train, y_train = shuffle(X_train, y_train) t_cost = 0 v_cost = 0 for i in range(num_batches): X_batch = X_train[i * batch_sz:(i + 1) * batch_sz] y_batch = y_train[i * batch_sz:(i + 1) * batch_sz] len_b = max(len(x) for x in X_batch) X_batch = pad_sequences(X_batch, len_b) y_batch = pad_sequences(y_batch, len_b) self.session.run(train_op, feed_dict={ tf_X: X_batch, tf_y: y_batch }) if i % 100 == 0: t_c, t_pred = self.session.run((cost, predictions), feed_dict={ tf_X: X_train, tf_y: y_train }) t_cost += t_c t_acc = accuracy(t_pred, y_train) v_c, v_pred = self.session.run((cost, predictions), feed_dict={ tf_X: X_valid, tf_y: y_valid }) v_cost += v_c v_acc = accuracy(v_pred, y_valid) print('train cost: %f, train accuracy: %f' % (t_cost, t_acc)) print('valid cost: %f, valid accuracy: %f' % (v_cost, v_acc)) t_costs.append(t_cost) v_costs.append(v_cost) print('Epoch completed in %s' % (datetime.now() - t0)) plt.plot(t_costs) plt.plot(v_costs) plt.show()
def construct(self): self.saved_session_name = os.path.join(self.tmp_folder, self.uuid_code) self.input_data = tf.placeholder(tf.float32, [None, None, self.input_dim]) self.output_data = tf.placeholder(tf.float32, [None, None, self.output_dim]) self.start_tokens = tf.placeholder(tf.float32, [None, self.output_dim]) self.go_tokens = tf.placeholder(tf.float32, [None, 1, self.output_dim]) self.sequence_length = tf.placeholder(tf.int32, [None]) self.mask = tf.placeholder(tf.float32, [None, None]) self.target_sequence_length = tf.placeholder( tf.int32, (None, ), name='target_sequence_length') self.max_target_sequence_length = tf.reduce_max( self.target_sequence_length, name='max_target_len') self.source_sequence_length = tf.placeholder( tf.int32, (None, ), name='source_sequence_length') self.x_stopping = np.full((self.stop_pad_length, self.input_dim), self.stop_pad_token, dtype=np.float32) self.y_stopping = np.full((self.stop_pad_length, self.output_dim), self.stop_pad_token, dtype=np.float32) self.learning_rate = tf.placeholder(tf.float32) self.batch_size = tf.placeholder(tf.float32) enc_cell = make_cell(self.layer_sizes, self.keep_prob) # We want to train the decoder to learn the stopping point as well, # so the sequence lengths is extended for both the decoder and the encoder # logic: the encoder will learn that the stopping token is the signal that the input is finished # the decoder will learn to produce the stopping token to match the expected output # the inferer will learn to produce the stopping token for us to recognise that and stop inferring self.source_sequence_length_padded = self.source_sequence_length + self.stop_pad_length self.target_sequence_length_padded = self.target_sequence_length + self.stop_pad_length max_target_sequence_length_padded = self.max_target_sequence_length + self.stop_pad_length _, self.enc_state = dynamic_rnn( enc_cell, self.input_data, sequence_length=self.source_sequence_length_padded, dtype=tf.float32, time_major=False, swap_memory=True) self.enc_state_centre = self.enc_state[-1] if self.symmetric: self.enc_state = self.enc_state[::-1] dec_cell = make_cell(self.layer_sizes[::-1], self.keep_prob) else: dec_cell = make_cell(self.layer_sizes, self.keep_prob) # 3. Dense layer to translate the decoder's output at each time # step into a choice from the target vocabulary projection_layer = tf.layers.Dense( units=self.output_dim, # kernel_initializer=tf.initializers.he_normal(), # kernel_regularizer=regularizer, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1)) # 4. Set up a training decoder and an inference decoder # Training Decoder with tf.variable_scope("decode"): # During PREDICT mode, the output data is none so we can't have a training model. # Helper for the training process. Used by BasicDecoder to read inputs. dec_input = tf.concat([self.go_tokens, self.output_data], 1) training_helper = TrainingHelper( inputs=dec_input, sequence_length=self.target_sequence_length_padded, time_major=False) # Basic decoder training_decoder = BasicDecoder(dec_cell, training_helper, self.enc_state, projection_layer) # Perform dynamic decoding using the decoder self.training_decoder_output\ = dynamic_decode(training_decoder, # True because we're using variable length sequences, which have finish points impute_finished=True, maximum_iterations=max_target_sequence_length_padded)[0] # 5. Inference Decoder # Reuses the same parameters trained by the training process with tf.variable_scope("decode", reuse=True): def end_fn(time_step_value): # Ideally, the inferer should produce the stopping token # Which can be assessed as being equal to the modelled stop token, and this should be return: # return tf.reduce_all(tf.equal(time_step_value, self.y_stopping)) # However due to the nature of training, the produced stop token will never be exactly the same # as the modelled one. If we use an embedded layer, then this top token can be learned # however as we are not using the embedded layer, this function should return False # meaning there is no early stop return False inference_helper = InferenceHelper(sample_fn=lambda x: x, sample_shape=[self.output_dim], sample_dtype=dtypes.float32, start_inputs=self.start_tokens, end_fn=end_fn) # Basic decoder inference_decoder = BasicDecoder(dec_cell, inference_helper, self.enc_state, projection_layer) # Perform dynamic decoding using the decoder self.inference_decoder_output = dynamic_decode( inference_decoder, # True because we're using variable length sequences, which have finish points impute_finished=True, maximum_iterations=max_target_sequence_length_padded)[0]
# tf.reset_default_graph() encode_input = tf.placeholder(shape=[None, None], dtype=tf.int32, name='encode_input') decode_target = tf.placeholder(shape=[None, None], dtype=tf.int32, name='encode_input') decode_input = tf.placeholder(shape=[None, None], dtype=tf.int32, name='encode_input') embedding = tf.Variable(tf.random_uniform([4, 10], -1.0, 1.0), dtype=tf.float32) #生成词汇表,前面是字符数量,后面是词嵌入大小 encode_embedding = tf.nn.embedding_lookup(embedding, encode_input) decode_embedding = tf.nn.embedding_lookup(embedding, decode_input) lstm_cell = LSTMCell(4) outputs, states = dynamic_rnn(lstm_cell, encode_embedding, dtype=tf.float32) print('states is ', states) # y=tf.unstack(y,4,1)/ lstm_cell2 = LSTMCell(num_units=4) logit, states2 = dynamic_rnn(lstm_cell2, decode_embedding, dtype=tf.float32, initial_state=states, scope='decode_output') print('2') la = tf.one_hot(y_target, depth=4, dtype=tf.float32) print(la) pre = tf.nn.softmax(logit) print('logit is ', logit) print('pre is ', pre)
def _build_seq_graph(self): """The main function to create sli_rec model. Returns: obj:the output of sli_rec section. """ hparams = self.hparams with tf.variable_scope("sli_rec"): hist_input = tf.concat( [self.item_history_embedding, self.cate_history_embedding], 2) self.mask = self.iterator.mask self.sequence_length = tf.reduce_sum(self.mask, 1) with tf.variable_scope("long_term_asvd"): att_outputs1 = self._attention(hist_input, hparams.attention_size) att_fea1 = tf.reduce_sum(att_outputs1, 1) tf.summary.histogram("att_fea1", att_fea1) item_history_embedding_new = tf.concat( [ self.item_history_embedding, tf.expand_dims(self.iterator.time_from_first_action, -1), ], -1, ) item_history_embedding_new = tf.concat( [ item_history_embedding_new, tf.expand_dims(self.iterator.time_to_now, -1), ], -1, ) with tf.variable_scope("rnn"): rnn_outputs, final_state = dynamic_rnn( Time4LSTMCell(hparams.hidden_size), inputs=item_history_embedding_new, sequence_length=self.sequence_length, dtype=tf.float32, scope="time4lstm", ) tf.summary.histogram("LSTM_outputs", rnn_outputs) with tf.variable_scope("attention_fcn"): att_outputs2 = self._attention_fcn(self.target_item_embedding, rnn_outputs) att_fea2 = tf.reduce_sum(att_outputs2, 1) tf.summary.histogram("att_fea2", att_fea2) # ensemble with tf.name_scope("alpha"): concat_all = tf.concat( [ self.target_item_embedding, att_fea1, att_fea2, tf.expand_dims(self.iterator.time_to_now[:, -1], -1), ], 1, ) last_hidden_nn_layer = concat_all alpha_logit = self._fcn_net(last_hidden_nn_layer, hparams.att_fcn_layer_sizes, scope="fcn_alpha") alpha_output = tf.sigmoid(alpha_logit) user_embed = att_fea1 * alpha_output + att_fea2 * ( 1.0 - alpha_output) model_output = tf.concat([user_embed, self.target_item_embedding], 1) tf.summary.histogram("model_output", model_output) return model_output