def create_cell(): if self.dropout_keep_prob < 1.0: single_cell = lambda: BasicLSTMCell(hidden_size) hidden = MultiRNNCell( [single_cell() for _ in range(num_layer)]) hidden = DropoutWrapper( hidden, input_keep_prob=self.dropout_keep_prob, output_keep_prob=self.dropout_keep_prob) else: single_cell = lambda: BasicLSTMCell(hidden_size) hidden = MultiRNNCell( [single_cell() for _ in range(num_layer)]) return hidden
def __init__(self, num_symbols, num_embed_units, num_units, num_labels, embed, learning_rate=0.001, max_gradient_norm=5.0): self.texts = tf.placeholder(tf.int32, [None, None]) # shape: sentence*max_word self.text_length = tf.placeholder(tf.int32, [None]) # shape: sentence self.labels = tf.placeholder(tf.int32, [None]) # shape: sentence self.keep_prob = tf.placeholder(tf.float32) self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=tf.float32) self.global_step = tf.Variable(0, trainable=False) self.epoch = tf.Variable(0, trainable=False) self.epoch_add_op = self.epoch.assign(self.epoch + 1) # build the embedding table (index to vector) self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed) self.embed_inputs = tf.nn.embedding_lookup(self.embed, self.texts) # shape: sentence*max_word*num_embed_units fw_cell = DropoutWrapper(BasicLSTMCell(num_units), output_keep_prob=self.keep_prob) bw_cell = DropoutWrapper(BasicLSTMCell(num_units), output_keep_prob=self.keep_prob) middle_outputs, middle_states = bidirectional_dynamic_rnn(fw_cell, bw_cell, self.embed_inputs, self.text_length, dtype=tf.float32, scope="word_rnn") middle_outputs = tf.concat(middle_outputs, 2) # shape: sentence*max_word*(2*num_units) middle_inputs = tf.expand_dims(tf.reduce_max(middle_outputs, axis=1), 0) # shape: 1*sentence*(2*num_units) top_cell = DropoutWrapper(BasicLSTMCell(num_units), output_keep_prob=self.keep_prob) outputs, states = dynamic_rnn(top_cell, middle_inputs, dtype=tf.float32, scope="sentence_rnn") self.outputs = outputs[0] # shape: sentence*num_units logits = tf.layers.dense(self.outputs, num_labels) self.loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=logits), name='loss') mean_loss = self.loss / tf.cast(tf.shape(self.labels)[0], dtype=tf.float32) self.predict_labels = tf.argmax(logits, 1, 'predict_labels', output_type=tf.int32) self.accuracy = tf.reduce_sum(tf.cast(tf.equal(self.labels, self.predict_labels), tf.int32), name='accuracy') self.params = tf.trainable_variables() # calculate the gradient of parameters opt = tf.train.AdamOptimizer(self.learning_rate) gradients = tf.gradients(mean_loss, self.params) clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients, max_gradient_norm) self.update = opt.apply_gradients(zip(clipped_gradients, self.params), global_step=self.global_step) self.saver = tf.train.Saver(max_to_keep=3, pad_step_number=True)
def model(input, vocab_size): # 构建随机的词向量矩阵 # tf.get_variable(name, shape, initializer): name变量的名称,shape变量的维度,initializer变量初始化的方式 embeddings = tf.get_variable("embeddings", [vocab_size, embedding_size], initializer=tf.truncated_normal_initializer) embedded = tf.nn.embedding_lookup(embeddings, input) # 将数据处理成LSTM的输入格式(时序) rnn_input = tf.unstack(embedded, max_document_length, axis=1, name="rnn-input") # 定义LSTM lstm_cell = BasicLSTMCell(20, forget_bias=1.0) rnn_outputs, rnn_states = static_rnn(lstm_cell, rnn_input, dtype=tf.float32) # predict logits = tf.layers.dense(rnn_outputs[-1], num_classes) predicted_labels = tf.argmax(logits, axis=1) return predicted_labels, [embeddings, embedded, lstm_cell, logits]
def __init__(self, feature_size, max_video_length, num_classes, cell_size, use_lstm, learning_rate, learning_rate_decay_factor, min_learning_rate, training_steps_per_epoch, max_gradient_norm, keep_prob=0.5, is_training=False): self.frame_feature_ph = tf.placeholder(tf.float32, [None, max_video_length, feature_size]) self.video_length_ph = tf.placeholder(tf.int32, [None]) self.video_label_ph = tf.placeholder(tf.int32, [None]) if is_training: self.global_step = tf.Variable(0, trainable=False) self.learning_rate = tf.maximum( tf.train.exponential_decay( learning_rate, self.global_step, training_steps_per_epoch, learning_rate_decay_factor, staircase=True), min_learning_rate) # Make RNN cells cell = GRUCell(cell_size) if use_lstm: cell = BasicLSTMCell(cell_size, state_is_tuple=False) # RNN with tf.variable_scope('DynamicRNN'): outputs, state = dynamic_rnn(cell=cell, inputs=self.frame_feature_ph, sequence_length=self.video_length_ph, dtype=tf.float32) state = tf.nn.relu(state) if is_training: state = tf.nn.dropout(state, keep_prob=keep_prob) if num_classes == 2: with tf.variable_scope('Classification'): logit = tf.contrib.layers.fully_connected(inputs=state, num_outputs=1, activation_fn=None) # [batch_size, 1] self.logit = tf.squeeze(logit) # [batch_size] if is_training: video_label = tf.cast(x=self.video_label_ph, dtype=tf.float32) self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=video_label, logits=self.logit)) else: self.prediction = tf.cast(tf.greater(x=logit, y=0.5), tf.int32) else: with tf.variable_scope('Classification'): self.logits = tf.contrib.layers.fully_connected(inputs=state, num_outputs=num_classes, activation_fn=None) # [batch_size, num_classes] if is_training: self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.video_label_ph, logits=self.logits)) else: self.prediction = tf.argmax(logits, 1) if is_training: params = tf.trainable_variables() gradients = tf.gradients(self.loss, params) clipped_gradients, norm = tf.clip_by_global_norm(gradients, max_gradient_norm) self.train_op = tf.train.AdamOptimizer(self.learning_rate).apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=99999999)
def __init__(self, state_space_size, action_space_size, scope, trainer): with tf.variable_scope(scope): # Input self.inputs = tf.placeholder(shape=[None, state_space_size], dtype=tf.float32) # Recurrent network for temporal dependencies lstm_cell = BasicLSTMCell(256, state_is_tuple=True) c_init = np.zeros_like((1, lstm_cell.state_size.c), dtype=np.float32) h_init = np.zeros_like((1, lstm_cell.state_size.h), dtype=np.float32) self.state_init = [c_init, h_init] c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c]) h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h]) self.state_in = (c_in, h_in) state_in = LSTMStateTuple(c_in, h_in) lstm_outputs, lstm_state = tf.nn.dynamic_rnn(lstm_cell, self.inputs, initial_state=state_in, sequence_length=tf.shape(self.inputs)[0], time_major=False) lstm_c, lstm_h = lstm_state self.state_out = (lstm_c[:1, :], lstm_h[:1, :]) rnn_out = tf.reshape(lstm_outputs, [-1, 256]) # Output layers for policy and value estimations self.policy = slim.fully_connected(rnn_out, action_space_size, activation_fn=tf.nn.softmax, weights_initializer=normalized_columns_initializer(0.01), biases_initializer=None) self.value = slim.fully_connected(rnn_out, 1, activation_fn=None, weights_initializer=normalized_columns_initializer(1.0), biases_initializer=None) # Only the worker network need ops for loss functions and gradient updating. if scope != 'global': self.actions = tf.placeholder(shape=[None], dtype=tf.int32) self.actions_onehot = tf.one_hot(self.actions, action_space_size, dtype=tf.float32) self.target_v = tf.placeholder(shape=[None], dtype=tf.float32) self.advantages = tf.placeholder(shape=[None], dtype=tf.float32) self.responsible_outputs = tf.reduce_sum(self.policy * self.actions_onehot, [1]) # Loss functions self.value_loss = 0.5 * tf.reduce_sum(tf.square(self.target_v - tf.reshape(self.value, [-1]))) self.entropy = - tf.reduce_sum(self.policy * tf.log(self.policy)) self.policy_loss = -tf.reduce_sum(tf.log(self.responsible_outputs) * self.advantages) self.loss = 0.5 * self.value_loss + self.policy_loss - self.entropy * 0.01 # Get gradients from local network using local losses local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) self.gradients = tf.gradients(self.loss, local_vars) self.var_norms = tf.global_norm(local_vars) grads, self.grad_norms = tf.clip_by_global_norm(self.gradients, 40.0) # Apply local gradients to global network global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads = trainer.apply_gradients(zip(grads, global_vars))
def __call__(self, img_ph, location_network, retina_sensor, glimpse_network): # lstm cell cell = BasicLSTMCell(self.hidden_size) # helper func for feeding glimpses to every step of lstm # h_t_prev: a 2D tensor of shape (B, hidden_size). The hidden state vector for the previous timestep `t-1`. loc_ts, mean_ts = [], [] ## at time step t, location-->pths-->glimpse def loop_function(h_prev, _): # predict location from previous hidden state loc_t, mean_t = location_network(h_prev) loc_ts.append(loc_t) mean_ts.append(mean_t) # crop pths from image based on the predicted location pths_t = retina_sensor(img_ph, loc_t) # generate glimpse image from current pths_t and loc_t glimpse = glimpse_network(pths_t, loc_t) return glimpse # lstm init h_t init_state = cell.zero_state(self.batch_size, tf.float32) # lstm inputs at every step init_loc = tf.random_uniform((self.batch_size, self.loc_dim), minval=-1, maxval=1) init_pths = retina_sensor(img_ph, init_loc) init_glimpse = glimpse_network(init_pths, init_loc) rnn_inputs = [init_glimpse] rnn_inputs.extend([0] * self.num_glimpses) # get hidden state of every step from lstm h_ts, _ = rnn_decoder(rnn_inputs, init_state, cell, loop_function=loop_function) return loc_ts, mean_ts, h_ts
def __init__(self, frame_feature_ph, num_classes, cell_size, use_lstm=False): self.frame_feature_ph = frame_feature_ph cell = GRUCell(cell_size) if use_lstm: cell = BasicLSTMCell(cell_size, state_is_tuple=False) with tf.variable_scope('DynamicRNN'): outputs, state = dynamic_rnn(cell=cell, inputs=self.frame_feature_ph, dtype=tf.float32) outputs = tf.nn.relu(outputs) with tf.variable_scope('Classification'): node_logit = tf.contrib.layers.fully_connected(inputs=outputs, num_outputs=num_classes, activation_fn=None) logit = tf.nn.softmax(node_logit) self.logit = tf.nn.softmax(tf.reduce_mean(node_logit,1)) self.node = tf.argmax(logit, 2) self.prediction = tf.argmax(self.logit,1) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=99999999)
def __init__(self, img_width, img_height, nb_locations, glimpse_width, glimpse_height, g_size, l_size, glimpse_output_size, loc_dim, time_dim, variance, cell_size, nb_glimpses, nb_classes, learning_rate, learning_rate_decay_factor, min_learning_rate, nb_training_batch, max_gradient_norm, is_training=False): self.img_ph = tf.placeholder(tf.float32, [None, img_height, img_width]) self.lbl_ph = tf.placeholder(tf.int64, [None]) self.global_step = tf.Variable(0, trainable=False) # decayed_learning_rate = learning_rate * decay_rate ^ (global_step / training_batch_num) self.learning_rate = tf.maximum(tf.train.exponential_decay( learning_rate, self.global_step, nb_training_batch, # batch number learning_rate_decay_factor, # If the argument staircase is True, # then global_step / decay_steps is an integer division # and the decayed learning rate follows a staircase function. staircase=True), min_learning_rate) cell = BasicLSTMCell(cell_size) with tf.variable_scope('GlimpseNetwork'): glimpse_network = GlimpseNetwork(img_width, img_height, glimpse_width, glimpse_height, loc_dim+time_dim, g_size, l_size, glimpse_output_size, nb_locations) with tf.variable_scope('LocationNetwork'): location_network = LocationNetwork(loc_dim=loc_dim*nb_locations+time_dim, rnn_output_size=cell.output_size, # cell_size variance=variance, is_sampling=is_training) # with tf.variable_scope('CNN'): # cnn = CNN(nb_locations, glimpse_output_size) # with tf.variable_scope('CDD'): # cdd = CDD(glimpse_height, nb_locations*glimpse_output_size) # Core Network batch_size = tf.shape(self.img_ph)[0] init_loc_1 = tf.random_uniform((batch_size, loc_dim), minval=-1, maxval=1) init_loc_2 = tf.random_uniform((batch_size, loc_dim), minval=-1, maxval=1) init_loc_3 = tf.random_uniform((batch_size, loc_dim), minval=-1, maxval=1) init_t = tf.random_uniform((batch_size, loc_dim), minval=-1, maxval=1) # shape: (batch_size, loc_dim), range: [-1,1) init_state = cell.zero_state(batch_size, tf.float32) self.init_glimpse = glimpse_network(self.img_ph, init_loc_1, init_loc_2, init_loc_3, init_t) # self.init_glimpse_cooperate = cnn(self.init_glimpse) # self.imgs_ph, self.imgs_ph_re, self.h_fc1, self.conv_2d_1st, self.conv_2d_2nd, self.conv_2d_flat = cdd(self.init_glimpse) rnn_inputs = [self.init_glimpse] rnn_inputs.extend([0] * nb_glimpses) locs, loc_means = [], [] def loop_function(prev, _): loc, loc_mean = location_network(prev) locs.append(loc) loc_means.append(loc_mean) glimpse = glimpse_network(self.img_ph, tf.reshape(loc[:,0],[-1,1]), tf.reshape(loc[:, 1], [-1, 1]), tf.reshape(loc[:, 2], [-1, 1]), tf.reshape(loc[:, 3], [-1, 1])) # glimpse_cooperate = cnn(glimpse) return glimpse rnn_outputs, _ = rnn_decoder(rnn_inputs, init_state, cell, loop_function=loop_function) # Time independent baselines with tf.variable_scope('Baseline'): baseline_w = _weight_variable((cell.output_size, 1)) baseline_b = _bias_variable((1,)) baselines = [] for output in rnn_outputs[1:]: baseline = tf.nn.xw_plus_b(output, baseline_w, baseline_b) baseline = tf.squeeze(baseline) baselines.append(baseline) baselines = tf.stack(baselines) # [timesteps, batch_sz] baselines = tf.transpose(baselines) # [batch_sz, timesteps] # Classification. Take the last step only. rnn_last_output = rnn_outputs[-1] with tf.variable_scope('Classification'): logit_w = _weight_variable((cell.output_size, nb_classes)) logit_b = _bias_variable((nb_classes,)) logits = tf.nn.xw_plus_b(rnn_last_output, logit_w, logit_b) # self.prediction = tf.argmax(logits, 1) self.softmax = tf.nn.softmax(logits) self.pred = tf.argmax(self.softmax, 1) self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.pred, self.lbl_ph), tf.float32)) if is_training: # classification loss self.cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.lbl_ph, logits=logits)) # RL reward reward = tf.cast(tf.equal(self.pred, self.lbl_ph), tf.float32) rewards = tf.expand_dims(reward, 1) # [batch_sz, 1] rewards = tf.tile(rewards, (1, nb_glimpses)) # [batch_sz, timesteps] advantages = rewards - tf.stop_gradient(baselines) self.advantage = tf.reduce_mean(advantages) logll = _log_likelihood(loc_means, locs, variance) logllratio = tf.reduce_mean(logll * advantages) self.reward = tf.reduce_mean(reward) # baseline loss self.baselines_mse = tf.reduce_mean(tf.square((rewards - baselines))) # hybrid loss self.loss = -logllratio + self.cross_entropy + self.baselines_mse params = tf.trainable_variables() gradients = tf.gradients(self.loss, params) clipped_gradients, norm = tf.clip_by_global_norm(gradients, max_gradient_norm) self.train_op = tf.train.AdamOptimizer(self.learning_rate).apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=99999999)
def __init__(self, img_size_width, img_size_height, CNN_patch_width, CNN_patch_height, CNN_patch_number, patch_window_width, patch_window_height, g_size, l_size, glimpse_output_size, loc_dim, variance, cell_size, num_glimpses, num_classes, learning_rate, learning_rate_decay_factor, min_learning_rate, training_batch_num, max_gradient_norm, last_lstm_size, n_time_window, is_training=False): self.img_ph = tf.placeholder(tf.float32, [None, img_size_width * img_size_height]) self.lbl_ph = tf.placeholder(tf.int64, [None]) self.global_step = tf.Variable(0, trainable=False) # decayed_learning_rate = learning_rate * decay_rate ^ (global_step / training_batch_num) self.learning_rate = tf.maximum( tf.train.exponential_decay( learning_rate, self.global_step, training_batch_num, # batch number learning_rate_decay_factor, # If the argument staircase is True, # then global_step / decay_steps is an integer division # and the decayed learning rate follows a staircase function. staircase=True), min_learning_rate) cell = BasicLSTMCell(cell_size) with tf.variable_scope('CNN'): cnn_network = CNN(img_size_width, img_size_height, CNN_patch_width, CNN_patch_height, CNN_patch_number) with tf.variable_scope('GlimpseNetwork'): glimpse_network = GlimpseNetwork(img_size_width, img_size_height, patch_window_width, patch_window_height, loc_dim, g_size, l_size, glimpse_output_size) with tf.variable_scope('LocationNetwork'): location_network = LocationNetwork( loc_dim=loc_dim, rnn_output_size=cell.output_size, # cell_size variance=variance, is_sampling=is_training) # Core Network self.img_ph = cnn_network(self.img_ph) batch_size = tf.shape(self.img_ph)[0] # training_batch_size * M init_loc = tf.random_uniform((batch_size, loc_dim), minval=-1, maxval=1) # shape: (batch_size, loc_dim), range: [-1,1) init_state = cell.zero_state(batch_size, tf.float32) init_glimpse = glimpse_network(self.img_ph, init_loc) rnn_inputs = [init_glimpse] rnn_inputs.extend([0] * num_glimpses) self.locs, loc_means = [], [] def loop_function(prev, _): loc, loc_mean = location_network(prev) self.locs.append(loc) loc_means.append(loc_mean) glimpse = glimpse_network(self.img_ph, loc) return glimpse rnn_outputs, _ = rnn_decoder(rnn_inputs, init_state, cell, loop_function=loop_function) # Time independent baselines with tf.variable_scope('Baseline'): baseline_w = _weight_variable((cell.output_size, 1)) baseline_b = _bias_variable((1, )) baselines = [] for output in rnn_outputs[1:]: baseline = tf.nn.xw_plus_b(output, baseline_w, baseline_b) baseline = tf.squeeze(baseline) baselines.append(baseline) baselines = tf.stack(baselines) # [timesteps, batch_sz] baselines = tf.transpose(baselines) # [batch_sz, timesteps] # Classification. Take the last step only. rnn_last_output = rnn_outputs[-1] with tf.variable_scope('Classification'): logit_w = _weight_variable((cell.output_size, num_classes)) logit_b = _bias_variable((num_classes, )) logits = tf.nn.xw_plus_b(rnn_last_output, logit_w, logit_b) self.prediction = tf.argmax(logits, 1) self.softmax = tf.nn.softmax(logits) with tf.variable_scope('LSTM_Classification'): last_lstm_w_in = _weight_variable( (cell.output_size, last_lstm_size)) last_lstm_b_in = _bias_variable((last_lstm_size, )) last_lstm_in = tf.matmul(rnn_last_output, last_lstm_w_in) + last_lstm_b_in last_lstm_in = tf.reshape(last_lstm_in, [-1, n_time_window, last_lstm_size]) if int((tf.__version__).split('.')[1]) < 12 and int( (tf.__version__).split('.')[0]) < 1: cell = tf.nn.rnn_cell.BasicLSTMCell(last_lstm_size, forget_bias=1.0, state_is_tuple=True) else: cell = tf.contrib.rnn.BasicLSTMCell(last_lstm_size) # lstm cell is divided into two parts (c_state, h_state) init_state_last_lstm = cell.zero_state(batch_size // n_time_window, dtype=tf.float32) lstm_outputs, final_state = tf.nn.dynamic_rnn( cell, last_lstm_in, initial_state=init_state_last_lstm, time_major=False) last_lstm_w_out = _weight_variable((cell.output_size, num_classes)) last_lstm_b_out = _bias_variable((num_classes, )) if int((tf.__version__).split('.')[1]) < 12 and int( (tf.__version__).split('.')[0]) < 1: lstm_outputs = tf.unpack(tf.transpose( lstm_outputs, [1, 0, 2])) # states is the last outputs else: lstm_outputs = tf.unstack(tf.transpose(lstm_outputs, [1, 0, 2])) lstm_logits = tf.matmul(lstm_outputs[-1], last_lstm_w_out) + last_lstm_b_out lstm_logits = tf.reshape(tf.tile(lstm_logits, (1, n_time_window)), [-1, num_classes]) self.lstm_prediction = tf.argmax(lstm_logits, 1) self.lstm_softmax = tf.nn.softmax(lstm_logits) if is_training: # classification loss self.cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.lbl_ph, logits=logits)) self.lstm_cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.lbl_ph, logits=lstm_logits)) # RL reward reward = tf.cast(tf.equal(self.prediction, self.lbl_ph), tf.float32) rewards = tf.expand_dims(reward, 1) # [batch_sz, 1] rewards = tf.tile(rewards, (1, num_glimpses)) # [batch_sz, timesteps] advantages = rewards - tf.stop_gradient(baselines) self.advantage = tf.reduce_mean(advantages) logll = _log_likelihood(loc_means, self.locs, variance) logllratio = tf.reduce_mean(logll * advantages) self.reward = tf.reduce_mean(reward) # baseline loss self.baselines_mse = tf.reduce_mean( tf.square((rewards - baselines))) # hybrid loss self.loss = -logllratio + self.cross_entropy + self.baselines_mse + self.lstm_cross_entropy params = tf.trainable_variables() gradients = tf.gradients(self.loss, params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.train_op = tf.train.AdamOptimizer( self.learning_rate).apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=99999999)
def __init__(self, kwd_voc_size, *args, **kwargs): BasicLSTMCell.__init__(self, *args, **kwargs) self.key_words_voc_size = kwd_voc_size
def single_cell(): return BasicLSTMCell(rnnHiddenSize, state_is_tuple=False)
def model(self): self.train_mode = tf.placeholder(tf.bool) self.train_dataset = self._generate_train_image_label() self.test_dataset = self.generate_test_image_label() self.valid_dataset = self.generate_valid_image_label() self.iter = tf.data.Iterator.from_structure( self.train_dataset.output_types, self.train_dataset.output_shapes) self.train_init_op = self.iter.make_initializer(self.train_dataset) self.test_init_op = self.iter.make_initializer(self.test_dataset) self.valid_init_op = self.iter.make_initializer(self.valid_dataset) self.images, self.labels = self.iter.get_next() self.img_ph = tf.tile(self.images, [self.glimpse_times, 1, 1, 1]) self.lbl_ph = tf.tile(self.labels, [self.glimpse_times]) cell = BasicLSTMCell(self.lstm_cell_size) init_state = cell.zero_state(tf.shape(self.img_ph)[0], tf.float32) self.rgbs, self.rnn_outputs, self.locs, self.loc_means = self.rnn_decode( init_state, cell, self.img_ph) baselines = self.baseline_network(self.rnn_outputs) logits, probs = self.prob_network(self.rnn_outputs[-1]) predict_label = tf.argmax(logits, 1, output_type=tf.int32) if self.result_mode == 'single': rewards = tf.cast(tf.equal(predict_label, self.lbl_ph), tf.float32) rewards = tf.tile(tf.expand_dims(rewards, 1), [1, self.num_glimpses]) self.test_label = tf.argmax(tf.reduce_mean(tf.reshape( probs, [self.glimpse_times, -1, 200]), axis=0), 1, output_type=tf.int32) self.test_acc = tf.reduce_mean( tf.cast(tf.equal(self.test_label, self.labels), tf.float32)) self.m_test_acc = self.test_acc else: m_logits, m_probs = self.prob_network(tf.stack(self.rnn_outputs)) m_predict_label = tf.argmax(m_logits, -1, output_type=tf.int32) m_lbl = tf.tile(tf.expand_dims(self.lbl_ph, 0), [self.num_glimpses, 1]) rewards = tf.transpose( tf.cast(tf.equal(m_predict_label, m_lbl), tf.float32), [1, 0]) self.classification_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=m_lbl, logits=m_logits)) self.test_label = tf.argmax(tf.reduce_mean(tf.reshape( probs, [self.glimpse_times, -1, 200]), axis=0), 1, output_type=tf.int32) self.test_acc = tf.reduce_mean( tf.cast(tf.equal(self.test_label, self.labels), tf.float32)) m_test_label = tf.argmax(tf.reduce_mean(tf.reshape( tf.reduce_mean(m_probs, 0), [self.glimpse_times, -1, 200]), axis=0), 1, output_type=tf.int32) self.m_test_acc = tf.reduce_mean( tf.cast(tf.equal(m_test_label, self.labels), tf.float32)) log_action_prob = self._log_likelihood(self.loc_means, self.locs, self.variance) if self.reinforce_mode == 'baseline': advantages = tf.stop_gradient(rewards - baselines) if self.result_mode == 'single': self.classification_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.lbl_ph, logits=logits)) self.reinforce_loss = tf.reduce_mean(log_action_prob * advantages) self.baselines_loss = tf.reduce_mean( tf.square((tf.stop_gradient(rewards) - baselines))) self.location_loss = self._loc_loss(self.loc_means) self.loss = -self.reinforce_loss + self.classification_loss + self.baselines_loss else: advantages = rewards if self.result_mode == 'single': self.classification_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.lbl_ph, logits=logits)) self.reinforce_loss = tf.reduce_mean(log_action_prob * advantages) self.baselines_loss = tf.constant(0) self.location_loss = self._loc_loss(self.loc_means) self.loss = -self.reinforce_loss + self.classification_loss if self.mode == 'fine_tune': params = tf.trainable_variables() elif self.mode == 'origin': params = [ param for param in tf.trainable_variables() if 'resnet' not in param.name ] else: # params = tf.trainable_variables() params = [ param for param in tf.trainable_variables() if 'resnet' not in param.name ] # params = [param for param in tf.trainable_variables() if 'resnet' not in param.name and # 'glimpse_feature' not in param.name] gradients = tf.gradients(self.loss, params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, self.max_gradient_norm) if self.train_method == 'Adam': self.train_op = tf.train.AdamOptimizer(learning_rate=self.decay_learning_rate).\ apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) else: self.train_op = tf.train.MomentumOptimizer(self.decay_learning_rate, 0.9). \ apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) self.reward = tf.reduce_mean(rewards) self.advantage = tf.reduce_mean(advantages) tf.summary.histogram( 'probs', tf.reduce_sum(probs * tf.one_hot(self.lbl_ph, 200), axis=-1)) for param in params: tf.summary.histogram(param.name, param) tf.summary.scalar('loss', self.loss) tf.summary.scalar('reward', self.reward) tf.summary.scalar('advantage', self.advantage) tf.summary.scalar('classification_loss', self.classification_loss) tf.summary.scalar('reinforce_loss', self.reinforce_loss) tf.summary.scalar('baseline_loss', self.baselines_loss) self.merged = tf.summary.merge_all() self.test_multi_dataset = self.generate_test_multi_image_label() self.multi_iter = tf.data.Iterator.from_structure( self.test_multi_dataset.output_types, self.test_multi_dataset.output_shapes) self.test_multi_init_op = self.multi_iter.make_initializer( self.test_multi_dataset) self.multi_images, self.multi_labels = self.multi_iter.get_next() self.multi_images = tf.reshape(self.multi_images, [-1, 224, 224, 3]) self.multi_img_ph = tf.tile(self.multi_images, [self.glimpse_times, 1, 1, 1]) multi_init_state = cell.zero_state( tf.shape(self.multi_img_ph)[0], tf.float32) _, self.multi_rnn_outputs, __, ___ = self.rnn_decode( multi_init_state, cell, self.multi_img_ph) _, multi_probs = self.prob_network(self.multi_rnn_outputs[-1]) self.multi_test_labels = tf.reduce_mean(tf.reshape( multi_probs, [self.glimpse_times, -1, 200]), axis=0) self.multi_test_labels = tf.reduce_mean(tf.reshape( self.multi_test_labels, [-1, 10, 200]), axis=1) self.multi_test_labels = tf.argmax(self.multi_test_labels, 1, output_type=tf.int32) self.multi_accuracy = tf.reduce_mean( tf.cast(tf.equal(self.multi_test_labels, self.multi_labels), tf.float32))
# 容器,存放输入输出 datas_placeholder = tf.placeholder(tf.int32, [None, max_document_length]) labels_placeholder = tf.placeholder(tf.int32, [None]) # 词向量表 embeddings = tf.get_variable("embeddings", [vocab_size, embedding_size], initializer=tf.truncated_normal_initializer) # 将词索引号转换为词向量[None, max_document_length] => [None, max_document_length, embedding_size] embedded = tf.nn.embedding_lookup(embeddings, datas_placeholder) # 转换为LSTM的输入格式,要求是数组,数组的每个元素代表某个时间戳一个Batch的数据 rnn_input = tf.unstack(embedded, max_document_length, axis=1) # 定义LSTM lstm_cell = BasicLSTMCell(20, forget_bias=1.0) rnn_outputs, rnn_states = static_rnn(lstm_cell, rnn_input, dtype=tf.float32) #利用LSTM最后的输出进行预测 logits = tf.layers.dense(rnn_outputs[-1], num_classes) predicted_labels = tf.argmax(logits, axis=1) # 定义损失和优化器 losses = tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot( labels_placeholder, num_classes), logits=logits) mean_loss = tf.reduce_mean(losses) optimizer = tf.train.AdamOptimizer(learning_rate=1e-2).minimize(mean_loss)
def __init__(self, img_channel, img_size, pth_size, g_size, l_size, glimpse_output_size, loc_dim, variance, cell_size, num_glimpses, num_classes, learning_rate, learning_rate_decay_factor, min_learning_rate, training_steps_per_epoch, max_gradient_norm, fc1_size, base_channels, output_dim, is_training=False): self.img_ph = tf.placeholder(tf.float32, [None, img_size * img_size * img_channel]) self.lbl_ph = tf.placeholder(tf.float32, [None, output_dim]) self.global_step = tf.Variable(0, trainable=False) self.learning_rate = tf.maximum( tf.train.exponential_decay(learning_rate, self.global_step, training_steps_per_epoch, learning_rate_decay_factor, staircase=True), min_learning_rate) cell = BasicLSTMCell(cell_size) with tf.variable_scope('GlimpseNetwork'): glimpse_network = GlimpseNetwork(img_channel, img_size, pth_size, loc_dim, g_size, l_size, glimpse_output_size) with tf.variable_scope('Agent'): # the agent is resposibale for select a windows and est a gain with tf.variable_scope('LocationNetwork'): location_network = LocationNetwork( loc_dim=loc_dim, rnn_output_size=cell.output_size, variance=variance, is_sampling=is_training) with tf.variable_scope('WhiteBalanceNetwork'): wb_network = WhiteBalanceNetwork( rnn_output_size=cell.output_size, output_dim=output_dim) if FLAGS.USE_CRITIC: with tf.variable_scope('Critic'): critic_network = CriticNetwork(fc1_size, base_channels) # Core Network batch_size = tf.shape(self.img_ph)[0] init_loc = tf.random_uniform((batch_size, loc_dim), minval=-1, maxval=1) init_state = cell.zero_state(batch_size, tf.float32) init_glimpse = glimpse_network(self.img_ph, init_loc) rnn_inputs = [init_glimpse] rnn_inputs.extend([0] * num_glimpses) locs, loc_means = [], [] gains = [] img_retouched = [] def _apply_gain(ill, loc, img, patch_wise=False): if patch_wise: retina = RetinaSensor(img_channel, img_size, pth_size) pth = retina(img, loc, serial=False) img = tf.reshape( img, [tf.shape(img)[0], img_size, img_size, img_channel]) retouched_channel = [] for i in range(3): tmp = pth[:, :, :, i] tmp = tf.reshape(tmp, [tf.shape(tmp)[0], -1]) tmp_ill = tf.reshape(ill[:, i] / ill[:, 1], [tf.shape(img)[0], 1]) tmp_ill = tf.tile(tmp_ill, [1, pth_size * pth_size]) tmp *= tmp_ill retouched_channel.append(tmp) retouched = tf.concat(retouched_channel, -1) img[:, round(img_size * loc[0]) - pth_size:round(img_size * loc[0]) + pth_size, round(img_size * loc[1]) - pth_size:round(img_size * loc[1]) + pth_size, :] = retouched else: img = tf.reshape( img, [tf.shape(img)[0], img_size, img_size, img_channel]) retouched_channel = [] for i in range(3): tmp = img[:, :, :, i] tmp = tf.reshape(tmp, [tf.shape(tmp)[0], -1]) tmp_ill = tf.reshape(ill[:, i] / ill[:, 1], [tf.shape(img)[0], 1]) tmp_ill = tf.tile(tmp_ill, [1, img_size * img_size]) tmp *= tmp_ill retouched_channel.append(tmp) img = tf.concat(retouched_channel, -1) return img def _loop_function(prev, _): loc, loc_mean = location_network(prev) locs.append(loc) loc_means.append(loc_mean) gain = wb_network(prev) gains.append(gain) if img_retouched: img_retouched.append(_apply_gain(gain, loc, img_retouched[-1])) glimpse = glimpse_network(img_retouched[-1], loc) else: img_retouched.append(_apply_gain(gain, loc, self.img_ph)) glimpse = glimpse_network(self.img_ph, loc) return glimpse rnn_outputs, _ = rnn_decoder(rnn_inputs, init_state, cell, loop_function=_loop_function) assert len(gains) == len(locs) # Time independent baselines with tf.variable_scope('Baseline'): baseline_w = weight_variable((cell.output_size, 1)) baseline_b = bias_variable((1, )) baselines = [] for output in rnn_outputs[1:]: baseline = tf.nn.xw_plus_b(output, baseline_w, baseline_b) baseline = tf.squeeze(baseline) baselines.append(baseline) baselines = tf.stack(baselines) # [timesteps, batch_sz] baselines = tf.transpose(baselines) # [batch_sz, timesteps] # Classification. Take the last step only. rnn_last_output = rnn_outputs[-1] with tf.variable_scope('Classification'): logit_w = weight_variable((cell.output_size, num_classes)) logit_b = bias_variable((num_classes, )) logits = tf.nn.xw_plus_b(rnn_last_output, logit_w, logit_b) # batch_size *3 self.prediction = tf.nn.l2_normalize(logits, axis=1) self.locations = locs if is_training: # angular loss self.xent = get_angular_loss(self.prediction, self.lbl_ph) tf.summary.scalar('xent', self.xent) # RL reward # reward shape [batchsize, 1] if FLAGS.USE_CRITIC: img_critic = tf.reshape(self.img_ph, [ tf.shape(self.img_ph)[0], img_size, img_size, img_channel ]) img_real = apply_gain(img_critic, self.lbl_ph) img_real = tf.reshape( img_real, [tf.shape(img_real)[0], img_size, img_size, img_channel]) img_fake = apply_gain(img_critic, self.prediction) img_fake = tf.reshape( img_fake, [tf.shape(img_fake)[0], img_size, img_size, img_channel]) real_logit = critic_network(img_real, is_train=is_training, reuse=False) fake_logit = critic_network(img_fake, is_train=is_training, reuse=True) rnn_fake_logits = [] for index_sequence in range(len(img_retouched)): rnn_img_fake = tf.reshape(img_retouched[index_sequence], [ tf.shape(img_retouched[index_sequence])[0], img_size, img_size, img_channel ]) rnn_fake_logit = critic_network(rnn_img_fake, is_train=is_training, reuse=True) rnn_fake_logits.append(rnn_fake_logit) rewards = tf.stop_gradient( tf.convert_to_tensor( rnn_fake_logits)) # shape (timesteps, batch_sz, 1) rewards = tf.transpose(tf.squeeze( rewards, 2)) # shape [batch_sz, timesteps] self.c_loss = tf.reduce_mean(fake_logit - real_logit) if FLAGS.grad_penalty < 0: # use grad clip gradients = tf.gradients(self.c_loss, theta_c) clipped_gradients, norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.opt_c = tf.train.AdamOptimizer( self.learning_rate).apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) else: # Critic gradient norm and penalty alpha_dist = tf.contrib.distributions.Uniform(low=0., high=1.) alpha = alpha_dist.sample((batch_size, 1, 1, 1)) interpolated = img_real + alpha * (img_fake - img_real) inte_logit = critic_network(images=interpolated, is_train=is_training, reuse=True) gradients = tf.gradients(inte_logit, [ interpolated, ])[0] gradient_norm = tf.sqrt( 1e-6 + tf.reduce_sum(gradients**2, axis=[1, 2, 3])) gradient_penalty = FLAGS.grad_penalty * tf.reduce_mean( tf.maximum(gradient_norm - 1.0, 0.0)**2) self.c_loss += gradient_penalty theta_c = tf.trainable_variables(scope='critic') gradients = tf.gradients(self.c_loss, theta_c) self.opt_c = tf.train.AdamOptimizer( self.learning_rate).apply_gradients( zip(gradients, theta_c), global_step=self.global_step) else: reward = tf.norm(self.prediction - self.lbl_ph, axis=1) rewards = tf.expand_dims(reward, 1) rewards = tf.tile(rewards, (1, num_glimpses)) # [batch_sz, timesteps] advantages = rewards - tf.stop_gradient(baselines) self.advantage = tf.reduce_mean(advantages) logll = log_likelihood(loc_means, locs, variance) logllratio = tf.reduce_mean(logll * advantages) self.reward = tf.reduce_mean(rewards) tf.summary.scalar('reward', self.reward) # baseline loss self.baselines_mse = tf.reduce_mean( tf.square((rewards - baselines))) # hybrid loss self.loss = -logllratio + self.xent + self.baselines_mse tf.summary.scalar('loss', self.loss) # exclude the variables in critic scope params_all = tf.trainable_variables() params = [] for var in params_all: if not 'critic' in var.op.name: params.append(var) gradients = tf.gradients(self.loss, params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.train_op = tf.train.AdamOptimizer( self.learning_rate).apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) img = tf.reshape( self.img_ph, [tf.shape(self.img_ph)[0], img_size, img_size, img_channel]) tf.summary.image('input', img) tf.summary.image('gt', apply_gain(img, self.lbl_ph)) tf.summary.image('est', apply_gain(img, self.prediction)) self.sum_total = tf.summary.merge_all() self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=99999999)
def __init__(self, img_shape, pth_size, g_size, l_size, glimpse_output_size, loc_dim, variance, cell_size, num_glimpses, num_classes, learning_rate, learning_rate_decay_factor, min_learning_rate, training_steps_per_epoch, max_gradient_norm, is_training=False): self.is_training = is_training self.img_ph = tf.placeholder( tf.float32, [None, img_shape[0], img_shape[1], img_shape[2]]) self.lbl_ph = tf.placeholder(tf.int64, [None]) self.global_step = tf.Variable(0, trainable=False) self.learning_rate = tf.maximum( tf.train.exponential_decay(learning_rate, self.global_step, training_steps_per_epoch, learning_rate_decay_factor, staircase=True), min_learning_rate) cell = BasicLSTMCell(cell_size) with tf.variable_scope('GlimpseNetwork'): glimpse_network = GlimpseNetwork(img_shape, pth_size, loc_dim, g_size, l_size, glimpse_output_size) with tf.variable_scope('LocationNetwork'): location_network = LocationNetwork( loc_dim=loc_dim, rnn_output_size=cell.output_size, variance=variance, is_sampling=self.is_training) # Core Network batch_size = tf.shape(self.img_ph)[0] init_loc = tf.random_uniform((batch_size, loc_dim), minval=-1, maxval=1) init_state = cell.zero_state(batch_size, tf.float32) init_glimpse = glimpse_network(self.img_ph, init_loc) rnn_inputs = [init_glimpse] rnn_inputs.extend([0] * num_glimpses) locs, loc_means = [], [] def loop_function(prev, _): loc, loc_mean = location_network(prev, self.is_training) locs.append(loc) loc_means.append(loc_mean) glimpse = glimpse_network(self.img_ph, loc) return glimpse rnn_outputs, _ = rnn_decoder(rnn_inputs, init_state, cell, loop_function=loop_function) # to be displyed self.locs = locs # Time independent baselines with tf.variable_scope('Baseline'): baseline_w = _weight_variable((cell.output_size, 1)) baseline_b = _bias_variable((1, )) baselines = [] for output in rnn_outputs[1:]: baseline = tf.nn.xw_plus_b(output, baseline_w, baseline_b) baseline = tf.squeeze(baseline) baselines.append(baseline) baselines = tf.stack(baselines) # [timesteps, batch_sz] baselines = tf.transpose(baselines) # [batch_sz, timesteps] # Classification. Take the last step only. rnn_last_output = rnn_outputs[-1] with tf.variable_scope('Classification'): logit_w = _weight_variable((cell.output_size, num_classes)) logit_b = _bias_variable((num_classes, )) logits = tf.nn.xw_plus_b(rnn_last_output, logit_w, logit_b) self.prediction = tf.argmax(logits, 1) self.softmax = tf.nn.softmax(logits) if self.is_training: # classification loss #self.xent = focal_loss(logits, self.lbl_ph)# self.xent = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.lbl_ph, logits=logits)) # RL reward reward = tf.cast(tf.equal(self.prediction, self.lbl_ph), tf.float32) # reward = tf.multiply(tf.cast(tf.equal(self.prediction, self.lbl_ph), tf.float32),0.1) + tf.multiply(tf.cast(tf.multiply(self.prediction, self.lbl_ph), tf.float32),0.9) rewards = tf.expand_dims(reward, 1) # [batch_sz, 1] rewards = tf.tile(rewards, (1, num_glimpses)) # [batch_sz, timesteps] advantages = rewards - tf.stop_gradient(baselines) self.advantage = tf.reduce_mean(advantages) logll = _log_likelihood(loc_means, locs, variance) logllratio = tf.reduce_mean(logll * advantages) self.reward = tf.reduce_mean(reward) # baseline loss self.baselines_mse = tf.reduce_mean( tf.square((rewards - baselines))) # hybrid loss self.loss = -logllratio + self.xent + self.baselines_mse params = tf.trainable_variables() gradients = tf.gradients(self.loss, params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.train_op = tf.train.AdamOptimizer( self.learning_rate).apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=99999999)
def __init__(self, config, decay_step, is_training=False, is_translate=False): # image means feed-in images: batch_size * img_size^2 # label: labels of images not one hot representation self.config = config self.decay_step = decay_step self.is_training = is_training self.is_translate = is_translate # input data placeholders with tf.name_scope('input'): self.image = tf.placeholder( tf.float32, [None, config.input_img_size * config.input_img_size]) self.label = tf.placeholder(tf.int64, [None]) with tf.name_scope('image_translate'): # translate MNIST data if need if self.is_translate: img = tf.reshape(self.image, [ tf.shape(self.image)[0], config.input_img_size, config.input_img_size, 1 ], name='2D_2_4D') self.proc_image = self._translate_image(img) # reshape into 2D tensor: [batch_size, img_size^2] # new_img_size = self.proc_image.get_shape().as_list() # print(new_img_size) self.proc_image = tf.reshape(self.proc_image, [ tf.shape(self.image)[0], config.img_size * config.img_size ], name='4D_2_2D') else: self.proc_image = self.image with tf.name_scope('global_step'): self.global_step = tf.Variable(0, trainable=False) # define learning rate with tf.name_scope('learning_rate'): self.learning_rate = tf.maximum( tf.train.exponential_decay(config.learning_rate, self.global_step, decay_step, config.decay_factor, staircase=True), config.min_learning_rate) tf.summary.scalar("learning_rate", self.learning_rate) # Glimpse Network with tf.name_scope('glimpse_net'): self.glimpse_network = GlimpseNetwork( config=config, is_translate=self.is_translate) # Actor Network with tf.name_scope('actor_net'): self.actor_network = ActorNetwork(config=config, rnn_output_size=config.cell_size, is_sampling=self.is_training) # LSTM Network with tf.name_scope('lstm'): cell = BasicLSTMCell(config.cell_size, name='basic_lstm_cell') with tf.name_scope('initialization'): with tf.name_scope('batch_size'): batch_size = tf.shape(self.image)[0] with tf.name_scope('init_locs'): init_locs = tf.random_uniform( shape=[batch_size, config.loc_dim], minval=-1, maxval=1, name='sampling') with tf.name_scope('init_state'): init_state = cell.zero_state(batch_size, tf.float32) # transfer glimpse network output into 2D list # rnn_inputs: 3D list [[batch_size, 256], ...] with tf.name_scope('init_glimpse'): init_glimpse = self.glimpse_network( self.proc_image, init_locs) with tf.name_scope('rnn_inputs'): rnn_inputs = [init_glimpse] rnn_inputs.extend([0] * config.num_glimpses) with tf.name_scope('init_list'): self.locs, self.loc_means, self.retina_reprsent = [], [], [] # with tf.name_scope('rnn_decoder'): def loop_function(prev, _): loc, loc_mean = self.actor_network(prev) self.locs.append(loc) self.loc_means.append(loc_mean) glimpse = self.glimpse_network(self.proc_image, loc) self.retina_reprsent.append( self.glimpse_network.retina_sensor.retina_reprsent) return glimpse self.rnn_outputs, _ = rnn_decoder(rnn_inputs, init_state, cell, loop_function=loop_function) # Critic Network with tf.name_scope('critic_net'): self.critic_network = CriticNetwork( config=config, rnn_output_size=cell.output_size) # Classify Network with tf.name_scope('classify_net'): self.classify_network = ClassifyNetwork( config=config, rnn_output_size=cell.output_size) rnn_last_output = self.rnn_outputs[-1] self.logits = self.classify_network(rnn_last_output) with tf.name_scope('argmax'): self.prediction = tf.argmax(self.logits, 1) # [batch_size] with tf.name_scope('softmax'): self.softmax = tf.nn.softmax(self.logits) if is_training: # hybrid loss: classification loss, RL reward, baseline loss with tf.name_scope('total_loss'): self.loss = self.total_loss() tf.summary.scalar("total_loss", self.loss) with tf.name_scope('train'): var_list = tf.trainable_variables() gradients = tf.gradients(self.loss, var_list) clipped_gradients, norm = tf.clip_by_global_norm( gradients, config.max_gradient_norm) self.train_op = tf.train.AdamOptimizer( self.learning_rate).apply_gradients( zip(clipped_gradients, var_list), global_step=self.global_step) with tf.name_scope('merge'): self.merged = tf.summary.merge_all()
# 词向量表-随机初始化 train_x, vocab_size = get_vocabulary(train_x) test_x, vocab_size_test = get_vocabulary(test_x) print("datas shape:",train_x.shape) embeddings = tf.get_variable("embeddings", [vocab_size, embedding_size], initializer=tf.truncated_normal_initializer) # 将词索引号转换为词向量[None, max_document_length] => [None, max_document_length, embedding_size] embedded = tf.nn.embedding_lookup(embeddings, datas_placeholder) # 转换为LSTM的输入格式,要求是数组,数组的每个元素代表一个Batch下,一个时序的数据(即一个词) rnn_input = tf.unstack(embedded, max_document_length, axis=1) # 定义LSTM网络结构 lstm_cell = BasicLSTMCell(num_units=num_units, forget_bias=1.0) # cell lstm_cell = DropoutWrapper(cell=lstm_cell, input_keep_prob=1.0, output_keep_prob=keep_prob) rnn_outputs, rnn_states = static_rnn(cell=lstm_cell, inputs=rnn_input, dtype=tf.float32) # network # 最后一层 logits = tf.layers.dense(units=num_classes,inputs=rnn_outputs[-1]) # fully-connected pred_labels = tf.arg_max(input=logits,dimension=1) # 概率最大的类别为预测的类别 # 定义损失函数, logists为网络最后一层输出, labels为真实标签 losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf.one_hot(labels_placeholder, num_classes)) mean_losses = tf.reduce_mean(losses) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(mean_losses) with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: # 初始化变量
def lstm_cell(): """lstm核""" return BasicLSTMCell(self.config.hidden_dim, state_is_tuple=True, )