def create_dc_actor_critic(self, h_size, num_layers): num_streams = 1 hidden_streams = self.create_new_obs(num_streams, h_size, num_layers) hidden = hidden_streams[0] if self.use_recurrent: tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32) self.prev_action = tf.placeholder(shape=[None], dtype=tf.int32, name='prev_action') self.prev_action_oh = c_layers.one_hot_encoding(self.prev_action, self.a_size) hidden = tf.concat([hidden, self.prev_action_oh], axis=1) self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in') hidden, self.memory_out = self.create_recurrent_encoder(hidden, self.memory_in) self.memory_out = tf.identity(self.memory_out, name='recurrent_out') self.policy = tf.layers.dense(hidden, self.a_size, activation=None, use_bias=False, kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01)) self.all_probs = tf.nn.softmax(self.policy, name="action_probs") self.output = tf.multinomial(self.policy, 1) self.output = tf.identity(self.output, name="action") self.value = tf.layers.dense(hidden, 1, activation=None) self.value = tf.identity(self.value, name="value_estimate") self.entropy = -tf.reduce_sum(self.all_probs * tf.log(self.all_probs + 1e-10), axis=1) self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32) self.selected_actions = c_layers.one_hot_encoding(self.action_holder, self.a_size) self.all_old_probs = tf.placeholder(shape=[None, self.a_size], dtype=tf.float32, name='old_probabilities') # We reshape these tensors to [batch x 1] in order to be of the same rank as continuous control probabilities. self.probs = tf.expand_dims(tf.reduce_sum(self.all_probs * self.selected_actions, axis=1), 1) self.old_probs = tf.expand_dims(tf.reduce_sum(self.all_old_probs * self.selected_actions, axis=1), 1)
def unroll(iter, state, hidden_states): """ """ hidden_conv = tf.cond(self.update_bool, lambda: tf.gather_nd(self.hidden_conv, self.indices+iter), lambda: self.hidden_conv) inf_prev_action = tf.cond(self.update_bool, lambda: tf.gather_nd(self.inf_prev_action, self.indices+iter), lambda: self.inf_prev_action) inf_hidden = state.h if self.parameters['attention']: inf_hidden = attention(hidden_conv, inf_hidden) else: inf_hidden = select_dpatch(hidden_conv) inf_prev_action_onehot = c_layers.one_hot_encoding(inf_prev_action, self.act_size) inf_hidden = tf.concat([inf_hidden, inf_prev_action_onehot], axis=1) inf_hidden, state = net.rnn(inf_hidden, state, self.parameters['inf_num_rec_units'], self.inf_seq_len, 'inf_rnn') hidden_states = hidden_states.write(iter, inf_hidden) iter += 1 return [iter, state, hidden_states]
def unroll(iter, state, hidden_states):#, softmax_weights): """ """ hidden = tf.cond(self.update_bool, lambda: tf.gather_nd(self.feature_vector, self.indices+iter), lambda: self.feature_vector) inf_prev_action = tf.cond(self.update_bool, lambda: tf.gather_nd(self.inf_prev_action, self.indices+iter), lambda: self.inf_prev_action) inf_hidden = state.h if self.parameters['attention']: inf_hidden = attention(hidden, inf_hidden) elif self.parameters['automatic_dpatch']: inf_hidden = automatic_dpatch(hidden) else: inf_hidden = manual_dpatch(hidden) inf_prev_action_onehot = c_layers.one_hot_encoding(inf_prev_action, self.act_size) # inf_hidden = tf.concat([inf_hidden, inf_prev_action_onehot], axis=1) inf_hidden, state = net.rnn(inf_hidden, state, self.parameters['inf_num_rec_units'], self.inf_seq_len, 'inf_rnn') hidden_states = hidden_states.write(iter, inf_hidden) iter += 1 return [iter, state, hidden_states]#, softmax_weights]
def create_discrete_state_encoder(self, s_size, h_size, num_streams, activation, num_layers): """ Builds a set of hidden state encoders from discrete state input. :param s_size: state input size (discrete). :param h_size: Hidden layer size. :param num_streams: Number of state streams to construct. :param activation: What type of activation function to use for layers. :return: List of hidden layer tensors. """ self.state_in = tf.placeholder(shape=[None, 1], dtype=tf.int32, name='state') state_in = tf.reshape(self.state_in, [-1]) state_onehot = c_layers.one_hot_encoding(state_in, s_size) streams = [] hidden = state_onehot for i in range(num_streams): for j in range(num_layers): hidden = tf.layers.dense(hidden, h_size, use_bias=False, activation=activation) streams.append(hidden) return streams
def pred(x, a: tf.int32): x = tf.concat((x, layers.one_hot_encoding(a, self.n_actions)), axis=1) x = layers.fully_connected(x, 100) x = layers.fully_connected(x, 50) x = layers.fully_connected(x, 50) x = layers.fully_connected(x, 100) x = layers.fully_connected(x, self.n_state, None) return x
def build_main_model(self): """ Builds neural network model to approximate policy and value functions """ if self.parameters['obs_type'] == 'image': self.observation = tf.placeholder(shape=[None, self.parameters["frame_height"], self.parameters["frame_width"], self.parameters["num_frames"]], dtype=tf.float32, name='observation') else: self.observation = tf.placeholder(shape=[None, self.parameters["vec_size"]], dtype=tf.float32, name='observation') hidden = self.observation # normalize input if self.parameters['env_type'] == 'atari': self.observation_norm = tf.cast(self.observation, tf.float32) / 255. hidden = self.observation_norm if self.convolutional: self.hidden_conv = net.cnn(self.observation, self.parameters["num_conv_layers"], self.parameters["num_filters"], self.parameters["kernel_sizes"], self.parameters["strides"], tf.nn.relu, False, 'cnn') hidden = c_layers.flatten(self.hidden_conv) if self.fully_connected: hidden = net.fcn(hidden, self.parameters["num_fc_layers"], self.parameters["num_fc_units"], tf.nn.relu, 'fcn') if self.recurrent: self.prev_action = tf.placeholder(shape=[None], dtype=tf.int32, name='prev_action') self.prev_action_onehot = c_layers.one_hot_encoding(self.prev_action, self.act_size) hidden = tf.concat([hidden, self.prev_action_onehot], axis=1) c_in = tf.placeholder(tf.float32, [None, self.parameters['num_rec_units']], name='c_state') h_in = tf.placeholder(tf.float32, [None, self.parameters['num_rec_units']], name='h_state') self.seq_len = tf.placeholder(shape=None, dtype=tf.int32, name='sequence_length') self.state_in = tf.contrib.rnn.LSTMStateTuple(c_in, h_in) hidden, self.state_out = net.rnn(hidden, self.state_in, self.parameters['num_rec_units'], self.seq_len, 'rnn') self.hidden = hidden
def __init__(self, lr, brain, h_size, epsilon, beta, max_step, normalize, num_layers): """ Creates Discrete Control Actor-Critic model. :param brain: State-space size :param h_size: Hidden layer size """ super(DiscreteControlModel, self).__init__() self._create_global_steps() self._create_reward_encoder() self.normalize = normalize hidden_state, hidden_visual, hidden = None, None, None if brain.number_observations > 0: height_size, width_size = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['width'] bw = brain.camera_resolutions[0]['blackAndWhite'] hidden_visual = self._create_visual_encoder(height_size, width_size, bw, h_size, 1, tf.nn.elu, num_layers)[ 0] if brain.state_space_size > 0: s_size = brain.state_space_size if brain.state_space_type == "continuous": hidden_state = self._create_continuous_state_encoder(s_size, h_size, 1, tf.nn.elu, num_layers)[0] else: hidden_state = self._create_discrete_state_encoder(s_size, h_size, 1, tf.nn.elu, num_layers)[0] if hidden_visual is None and hidden_state is None: raise Exception("No valid network configuration possible. " "There are no states or observations in this brain") elif hidden_visual is not None and hidden_state is None: hidden = hidden_visual elif hidden_visual is None and hidden_state is not None: hidden = hidden_state elif hidden_visual is not None and hidden_state is not None: hidden = tf.concat([hidden_visual, hidden_state], axis=1) a_size = brain.action_space_size self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name='batch_size') self.policy = tf.layers.dense(hidden, a_size, activation=None, use_bias=False, kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01)) self.probs = tf.nn.softmax(self.policy, name="action_probs") self.output = tf.multinomial(self.policy, 1) self.output_max = tf.argmax(self.probs, name='action_max', axis=1) self.output = tf.identity(self.output, name="action") self.value = tf.layers.dense(hidden, 1, activation=None, use_bias=False, kernel_initializer=c_layers.variance_scaling_initializer(factor=1.0)) self.value = tf.identity(self.value, name="value_estimate") self.entropy = -tf.reduce_sum(self.probs * tf.log(self.probs + 1e-10), axis=1) self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32) self.selected_actions = c_layers.one_hot_encoding(self.action_holder, a_size) self.old_probs = tf.placeholder(shape=[None, a_size], dtype=tf.float32, name='old_probabilities') self.responsible_probs = tf.reduce_sum(self.probs * self.selected_actions, axis=1) self.old_responsible_probs = tf.reduce_sum(self.old_probs * self.selected_actions, axis=1) self._create_ppo_optimizer(self.responsible_probs, self.old_responsible_probs, self.value, self.entropy, beta, epsilon, lr, max_step)
def __init__(self, lr, s_size, a_size, h_size, epsilon, beta, max_step): """ Creates Discrete Control Actor-Critic model. :param s_size: State-space size :param a_size: Action-space size :param h_size: Hidden layer size """ self.state_in = tf.placeholder(shape=[None, s_size], dtype=tf.float32, name='state') self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name='batch_size') hidden_1 = tf.layers.dense(self.state_in, h_size, use_bias=False, activation=tf.nn.elu) hidden_2 = tf.layers.dense(hidden_1, h_size, use_bias=False, activation=tf.nn.elu) self.policy = tf.layers.dense( hidden_2, a_size, activation=None, use_bias=False, kernel_initializer=c_layers.variance_scaling_initializer( factor=0.1)) self.probs = tf.nn.softmax(self.policy) self.action = tf.multinomial(self.policy, 1) self.output = tf.identity(self.action, name='action') self.value = tf.layers.dense(hidden_2, 1, activation=None, use_bias=False) self.entropy = -tf.reduce_sum(self.probs * tf.log(self.probs + 1e-10), axis=1) self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32) self.selected_actions = c_layers.one_hot_encoding( self.action_holder, a_size) self.old_probs = tf.placeholder(shape=[None, a_size], dtype=tf.float32, name='old_probabilities') self.responsible_probs = tf.reduce_sum(self.probs * self.selected_actions, axis=1) self.old_responsible_probs = tf.reduce_sum(self.old_probs * self.selected_actions, axis=1) PPOModel.__init__(self, self.responsible_probs, self.old_responsible_probs, self.value, self.entropy, beta, epsilon, lr, max_step)
def create_discrete_state_encoder(self, s_size, h_size, activation, num_layers): """ Builds a set of hidden state encoders from discrete state input. :param s_size: state input size (discrete). :param h_size: Hidden layer size. :param activation: What type of activation function to use for layers. :param num_layers: number of hidden layers to create. :return: List of hidden layer tensors. """ vector_in = tf.reshape(self.vector_in, [-1]) state_onehot = c_layers.one_hot_encoding(vector_in, s_size) hidden = state_onehot for j in range(num_layers): hidden = tf.layers.dense(hidden, h_size, use_bias=False, activation=activation) return hidden
def __init__(self, lr, s_size, a_size): #These lines established the feed-forward part of the network. The agent takes a state and produces an action. self.state_in = tf.placeholder(shape=[1], dtype=tf.int32) state_in_OH = slim.one_hot_encoding(self.state_in, s_size) output = slim.fully_connected(state_in_OH,a_size,\ biases_initializer=None,activation_fn=tf.nn.sigmoid,weights_initializer=tf.ones_initializer()) self.output = tf.reshape(output, [-1]) self.chosen_action = tf.argmax(self.output, 0) #The next six lines establish the training proceedure. We feed the reward and chosen action into the network #to compute the loss, and use it to update the network. self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32) self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32) self.responsible_weight = tf.slice(self.output, self.action_holder, [1]) self.loss = -(tf.log(self.responsible_weight) * self.reward_holder) optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr) self.update = optimizer.minimize(self.loss)
def build_actor_critic(self): """ Adds actor and critic heads to Tensorflow graph """ if self.influence: hidden = tf.concat([self.hidden, self.inf_hidden], axis=1) else: hidden = self.hidden self.logits = tf.layers.dense( hidden, self.act_size, activation=None, use_bias=False, kernel_initializer=c_layers.variance_scaling_initializer( factor=0.01)) self.action_probs = tf.nn.softmax(self.logits, name="action_probs") self.action = tf.reduce_sum(tf.multinomial(self.logits, 1), axis=1) self.action = tf.identity(self.action, name="action") self.value = tf.reduce_sum(tf.layers.dense(hidden, 1, activation=None), axis=1) self.value = tf.identity(self.value, name="value_estimate") self.entropy = -tf.reduce_sum( self.action_probs * tf.log(self.action_probs + 1e-10), axis=1) self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32, name='action_holder') self.actions_onehot = c_layers.one_hot_encoding( self.action_holder, self.act_size) self.old_action_probs = tf.placeholder(shape=[None, self.act_size], dtype=tf.float32, name='old_probs') self.action_prob = tf.reduce_sum(self.action_probs * self.actions_onehot, axis=1) self.old_action_prob = tf.reduce_sum(self.old_action_probs * self.actions_onehot, axis=1)
def build_tagging_graph(self, inputs, hidden_layers, channels, num_tags, use_crf, lamd, dropout_emb, dropout_hidden, kernel_size, use_bn, use_wn, active_type): """ Build a deep neural model for sequence tagging. """ stag_ids = tf.placeholder(dtype=INT_TYPE, shape=[None, None], name='stag_ids') seq_lengths = tf.placeholder(dtype=INT_TYPE, shape=[None], name='seq_lengths') # Default is not train. is_train = tf.placeholder(dtype=tf.bool, shape=[], name='is_train') masks = tf.cast(tf.sequence_mask(seq_lengths), FLOAT_TYPE) # Dropout on embedding output. if dropout_emb: inputs = tf.cond(is_train, lambda: tf.nn.dropout(inputs, 1 - dropout_emb), lambda: inputs) hidden_output = inputs pre_channels = inputs.get_shape()[-1].value for i in xrange(hidden_layers): k = kernel_size cur_channels = channels[i] filter_w = tf.get_variable('filter_w_%d' % i, shape=[k, pre_channels, cur_channels], dtype=FLOAT_TYPE) filter_v = tf.get_variable('filter_v_%d' % i, shape=[k, pre_channels, cur_channels], dtype=FLOAT_TYPE) bias_b = tf.get_variable( 'bias_b_%d' % i, shape=[cur_channels], initializer=tf.zeros_initializer(dtype=FLOAT_TYPE)) bias_c = tf.get_variable( 'bias_c_%d' % i, shape=[cur_channels], initializer=tf.zeros_initializer(dtype=FLOAT_TYPE)) # Weight normalization. if use_wn: epsilon = 1e-12 g_w = tf.get_variable('g_w_%d' % i, shape=[k, 1, cur_channels], dtype=FLOAT_TYPE) g_v = tf.get_variable('g_v_%d' % i, shape=[k, 1, cur_channels], dtype=FLOAT_TYPE) # Perform wn filter_w = g_w * filter_w / (tf.sqrt( tf.reduce_sum(filter_w**2, 1, keep_dims=True)) + epsilon) filter_v = g_v * filter_v / (tf.sqrt( tf.reduce_sum(filter_v**2, 1, keep_dims=True)) + epsilon) w = tf.nn.conv1d(hidden_output, filter_w, 1, 'SAME') + bias_b v = tf.nn.conv1d(hidden_output, filter_v, 1, 'SAME') + bias_c if use_bn: w = layers.batch_norm(inputs=v, decay=0.9, is_training=is_train, center=True, scale=True, scope='BatchNorm_w_%d' % i) v = layers.batch_norm(inputs=w, decay=0.9, is_training=is_train, center=True, scale=True, scope='BatchNorm_v_%d' % i) if active_type == 'glu': hidden_output = w * tf.nn.sigmoid(v) elif active_type == 'relu': hidden_output = tf.nn.relu(w) elif active_type == 'gtu': hidden_output = tf.tanh(w) * tf.nn.sigmoid(v) elif active_type == 'tanh': hidden_output = tf.tanh(w) elif active_type == 'linear': hidden_output = w elif active_type == 'bilinear': hidden_output = w * v # Mask paddings. hidden_output = hidden_output * tf.expand_dims(masks, -1) # Dropout on hidden output. if dropout_hidden: hidden_output = tf.cond( is_train, lambda: tf.nn.dropout(hidden_output, 1 - dropout_hidden), lambda: hidden_output) pre_channels = cur_channels # Un-scaled log probabilities. scores = layers.fully_connected(hidden_output, num_tags, tf.identity) if use_crf: cost, transitions = crf.crf_log_likelihood( inputs=scores, tag_indices=stag_ids, sequence_lengths=seq_lengths) cost = -tf.reduce_mean(cost) else: reshaped_scores = tf.reshape(scores, [-1, num_tags]) reshaped_stag_ids = tf.reshape(stag_ids, [-1]) real_distribution = layers.one_hot_encoding( reshaped_stag_ids, num_tags) cost = tf.nn.softmax_cross_entropy_with_logits( reshaped_scores, real_distribution) cost = tf.reduce_sum( tf.reshape(cost, tf.shape(stag_ids)) * masks) / tf.cast( tf.shape(inputs)[0], FLOAT_TYPE) # Calculate L2 penalty. l2_penalty = 0 if lamd > 0: for v in tf.trainable_variables(): if '/B:' not in v.name and '/biases:' not in v.name: l2_penalty += lamd * tf.nn.l2_loss(v) train_cost = cost + l2_penalty # Summary cost. tf.summary.scalar('cost', cost) summaries = tf.summary.merge_all() update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: updates = tf.group(*update_ops) with tf.control_dependencies([updates]): cost = tf.identity(cost) return stag_ids, seq_lengths, is_train, cost, train_cost, scores, summaries
def build_model(self): self.placeholders = _get_placeholders(self.spatial_dim) with tf.variable_scope("theta"): units_embedded = layers.embed_sequence( self.placeholders.screen_unit_type, vocab_size=SCREEN_FEATURES.unit_type.scale, embed_dim=self.unit_type_emb_dim, scope="unit_type_emb", trainable=self.trainable ) # Let's not one-hot zero which is background player_relative_screen_one_hot = layers.one_hot_encoding( self.placeholders.player_relative_screen, num_classes=SCREEN_FEATURES.player_relative.scale )[:, :, :, 1:] player_relative_minimap_one_hot = layers.one_hot_encoding( self.placeholders.player_relative_minimap, num_classes=MINIMAP_FEATURES.player_relative.scale )[:, :, :, 1:] channel_axis = 3 screen_numeric_all = tf.concat( [self.placeholders.screen_numeric, units_embedded, player_relative_screen_one_hot], axis=channel_axis ) minimap_numeric_all = tf.concat( [self.placeholders.minimap_numeric, player_relative_minimap_one_hot], axis=channel_axis ) # BUILD CONVNNs screen_output = self._build_convs(screen_numeric_all, "screen_network") minimap_output = self._build_convs(minimap_numeric_all, "minimap_network") # State representation (last layer before separation as described in the paper) self.map_output = tf.concat([screen_output, minimap_output], axis=channel_axis) # BUILD CONVLSTM self.rnn_in = tf.reshape(self.map_output, [1, -1, 32, 32, 64]) self.cell = tf.contrib.rnn.Conv2DLSTMCell(input_shape=[32, 32, 1], # input dims kernel_shape=[3, 3], # for a 3 by 3 conv output_channels=64) # number of feature maps c_init = np.zeros((1, 32, 32, 64), np.float32) h_init = np.zeros((1, 32, 32, 64), np.float32) self.state_init = [c_init, h_init] step_size = tf.shape(self.map_output)[:1] # Get step_size from input dimensions c_in = tf.placeholder(tf.float32, [None, 32, 32, 64]) h_in = tf.placeholder(tf.float32, [None, 32, 32, 64]) self.state_in = (c_in, h_in) state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in) self.step_size = tf.placeholder(tf.float32, [1]) (self.outputs, self.state) = tf.nn.dynamic_rnn(self.cell, self.rnn_in, initial_state=state_in, sequence_length=step_size, time_major=False, dtype=tf.float32) lstm_c, lstm_h = self.state self.state_out = (lstm_c[:1, :], lstm_h[:1, :]) rnn_out = tf.reshape(self.outputs, [-1, 32, 32, 64]) # 1x1 conv layer to generate our spatial policy self.spatial_action_logits = layers.conv2d( rnn_out, data_format="NHWC", num_outputs=1, kernel_size=1, stride=1, activation_fn=None, scope='spatial_action', trainable=self.trainable ) spatial_action_probs = tf.nn.softmax(layers.flatten(self.spatial_action_logits)) map_output_flat = tf.reshape(self.outputs, [-1, 65536]) # (32*32*64) # fully connected layer for Value predictions and action_id self.fc1 = layers.fully_connected( map_output_flat, num_outputs=256, activation_fn=tf.nn.relu, scope="fc1", trainable=self.trainable ) # fc/action_id action_id_probs = layers.fully_connected( self.fc1, num_outputs=len(actions.FUNCTIONS), activation_fn=tf.nn.softmax, scope="action_id", trainable=self.trainable ) # fc/value self.value_estimate = tf.squeeze(layers.fully_connected( self.fc1, num_outputs=1, activation_fn=None, scope='value', trainable=self.trainable ), axis=1) # disregard non-allowed actions by setting zero prob and re-normalizing to 1 ((MINE) THE MASK) action_id_probs *= self.placeholders.available_action_ids action_id_probs /= tf.reduce_sum(action_id_probs, axis=1, keepdims=True) def logclip(x): return tf.log(tf.clip_by_value(x, 1e-12, 1.0)) spatial_action_log_probs = ( logclip(spatial_action_probs) * tf.expand_dims(self.placeholders.is_spatial_action_available, axis=1) ) # non-available actions get log(1e-10) value but that's ok because it's never used action_id_log_probs = logclip(action_id_probs) self.value_estimate = self.value_estimate self.action_id_probs = action_id_probs self.spatial_action_probs = spatial_action_probs self.action_id_log_probs = action_id_log_probs self.spatial_action_log_probs = spatial_action_log_probs selected_spatial_action_flat = ravel_index_pairs( self.placeholders.selected_spatial_action, self.spatial_dim ) selected_log_probs = self._get_select_action_probs(selected_spatial_action_flat) # maximum is to avoid 0 / 0 because this is used to calculate some means sum_spatial_action_available = tf.maximum( 1e-10, tf.reduce_sum(self.placeholders.is_spatial_action_available) ) neg_entropy_spatial = tf.reduce_sum( self.spatial_action_probs * self.spatial_action_log_probs ) / sum_spatial_action_available neg_entropy_action_id = tf.reduce_mean(tf.reduce_sum( self.action_id_probs * self.action_id_log_probs, axis=1 )) # Sample now actions from the corresponding dstrs defined by the policy network theta self.sampled_action_id = weighted_random_sample(self.action_id_probs) self.sampled_spatial_action = weighted_random_sample(self.spatial_action_probs) self.value_estimate = self.value_estimate policy_loss = -tf.reduce_mean(selected_log_probs.total * self.placeholders.advantage) value_loss = tf.losses.mean_squared_error( self.placeholders.value_target, self.value_estimate) loss = ( policy_loss + value_loss * self.loss_value_weight + neg_entropy_spatial * self.entropy_weight_spatial + neg_entropy_action_id * self.entropy_weight_action_id ) self.train_op = layers.optimize_loss( loss=loss, global_step=tf.train.get_global_step(), optimizer=self.optimiser, clip_gradients=self.max_gradient_norm, summaries=OPTIMIZER_SUMMARIES, learning_rate=None, name="train_op" ) self._scalar_summary("value/estimate", tf.reduce_mean(self.value_estimate)) self._scalar_summary("value/target", tf.reduce_mean(self.placeholders.value_target)) self._scalar_summary("action/is_spatial_action_available", tf.reduce_mean(self.placeholders.is_spatial_action_available)) self._scalar_summary("action/selected_id_log_prob", tf.reduce_mean(selected_log_probs.action_id)) self._scalar_summary("loss/policy", policy_loss) self._scalar_summary("loss/value", value_loss) self._scalar_summary("loss/neg_entropy_spatial", neg_entropy_spatial) self._scalar_summary("loss/neg_entropy_action_id", neg_entropy_action_id) self._scalar_summary("loss/total", loss) self._scalar_summary("value/advantage", tf.reduce_mean(self.placeholders.advantage)) self._scalar_summary("action/selected_total_log_prob", tf.reduce_mean(selected_log_probs.total)) self._scalar_summary("action/selected_spatial_log_prob", tf.reduce_sum(selected_log_probs.spatial) / sum_spatial_action_available) self.init_op = tf.global_variables_initializer() self.saver = tf.train.Saver(max_to_keep=2) self.all_summary_op = tf.summary.merge_all(tf.GraphKeys.SUMMARIES) self.scalar_summary_op = tf.summary.merge(tf.get_collection(self._scalar_summary_key))
def build(self): """build Build the actual network, using the values passed over the from agent object, which themselves are derived from the Obs object. This has no concept of transfer learning. """ # Maps a series of symbols to embeddings, # where an embedding is a mapping from discrete objects, # such as words, to vectors of real numbers. # In this case it is from the unit types. units_embedded = layers.embed_sequence( self.placeholders.screen_unit_type, vocab_size=SCREEN_FEATURES.unit_type.scale, embed_dim=self.unittype_emb_dim, scope="unit_type_emb", trainable=self.trainable, ) # "One hot" encoding performs "binarization" on the input # meaning we end up with features we can suitably learn # from. # Basically, learning from categories isn't possible, # but learning from ints (i.e. 0/1/2 for 3 categories) # ends up with further issues, like the ML algorithm # picking up some pattern in the categories, when none exists. # Instead we want it in a binary form instead, to prevent this. # This is not needed for the background, since it is # not used, which is why we ignore channel 0 in the # last sub-array. player_relative_screen_one_hot = layers.one_hot_encoding( self.placeholders.player_relative_screen, num_classes=SCREEN_FEATURES.player_relative.scale, )[:, :, :, 1:] player_relative_minimap_one_hot = layers.one_hot_encoding( self.placeholders.player_relative_minimap, num_classes=MINIMAP_FEATURES.player_relative.scale, )[:, :, :, 1:] channel_axis = 3 # Group together all the inputs, such that a conv # layer can be built upon them. screen_numeric_all = tf.concat( [ self.placeholders.screen_numeric, units_embedded, player_relative_screen_one_hot, ], axis=channel_axis, ) minimap_numeric_all = tf.concat( [self.placeholders.minimap_numeric, player_relative_minimap_one_hot], axis=channel_axis, ) non_spatial_features = tf.cast( self.placeholders.non_spatial_features, tf.float32 ) log_non_spatial_features = tf.log(non_spatial_features + 1.0) four_d_non_spatial = reference_tiling_method(self, log_non_spatial_features) if DEBUG: # We want to print the values of the tensor four_d_non_spatial = tf.Print( four_d_non_spatial, [four_d_non_spatial], "4D non spatial tensor values: ", summarize=1024, # this is the number of values TF will print from the Tensor ) # Build the 2 convolutional layers based on the screen # and the mini-map. screen_conv_layer_output = self.build_conv_layers_for_input( screen_numeric_all, "screen_network" ) minimap_conv_layer_output = self.build_conv_layers_for_input( minimap_numeric_all, "minimap_network" ) # Group these two convolutional layers now, and the non_spatial # features. build a further convolutional layer on top of it. visual_inputs = tf.concat( [screen_conv_layer_output, minimap_conv_layer_output, four_d_non_spatial], axis=channel_axis, ) spatial_actions = layers.conv2d( visual_inputs, data_format="NHWC", num_outputs=1, kernel_size=1, stride=1, activation_fn=None, scope="spatial_action", trainable=self.trainable, ) if self.trainable: tf.summary.image( f"spatial_action", tf.reshape(spatial_actions, [-1, 32, 32, 1]), 3 ) # Take the softmax of this final convolutional layer. spatial_action_probs = tf.nn.softmax(layers.flatten(spatial_actions)) # Build a full connected layer of this final convolutional layer. # Could possibly pass in additional variables here, alongside the # convolutional layer. map_output_flat = layers.flatten(visual_inputs) fully_connected_layer1 = layers.fully_connected( map_output_flat, num_outputs=256, activation_fn=tf.nn.relu, scope="fully_connected_layer1", trainable=self.trainable, ) # Generate the probability of a given action from the # fully connected layer. Finally, produce a value # estimate for the given actions. action_id_probs = layers.fully_connected( fully_connected_layer1, num_outputs=len(actions.FUNCTIONS), activation_fn=tf.nn.softmax, scope="action_id", trainable=self.trainable, ) value_estimate = tf.squeeze( layers.fully_connected( fully_connected_layer1, num_outputs=1, activation_fn=None, scope="value", trainable=self.trainable, ), axis=1, ) # Disregard all the non-allowed actions by giving them a # probability of zero, before re-normalizing to 1. action_id_probs *= self.placeholders.available_action_ids action_id_probs /= tf.reduce_sum(action_id_probs, axis=1, keepdims=True) spatial_action_log_probs = self.logclip(spatial_action_probs) * tf.expand_dims( self.placeholders.is_spatial_action_available, axis=1 ) action_id_log_probs = self.logclip(action_id_probs) self.value_estimate = value_estimate self.action_id_probs = action_id_probs self.spatial_action_probs = spatial_action_probs self.action_id_log_probs = action_id_log_probs self.spatial_action_log_probs = spatial_action_log_probs return self
embedded1 = [] for f, n, w in zip(features, n_cat_by_feature, initial_emb_weights): e = layers.embed_sequence( f, vocab_size=n, embed_dim=embedding_dim, initializer=tf.constant_initializer(w) ) embedded1.append(e) out11 = tf.concat(embedded1, axis=2) # 1.2) onehot on channel -> 1x1 conv separately -> concat on channel embedded2 = [] for f, n, w in zip(features, n_cat_by_feature, initial_emb_weights): one_hot = layers.one_hot_encoding(f, num_classes=n) conv_out = layers.conv2d( inputs=one_hot, num_outputs=embedding_dim, weights_initializer=tf.constant_initializer(w), kernel_size=1, stride=1 ) embedded2.append(conv_out) out12 = tf.concat(embedded2, axis=2) # 2.1) sum embeddings on channel instead of concatenating out21 = tf.add_n(embedded1)
def toOneHot(input, num_classes): return contrib_layers.one_hot_encoding(input, num_classes)
def __init__(self, lr, o_size_h, o_size_w, a_size, h_size, epsilon, beta, max_step): """ Creates Discrete Control Actor-Critic model for use with visual observations (images). :param o_size_h: Observation height. :param o_size_w: Observation width. :param a_size: Action-space size. :param h_size: Hidden layer size. """ self.observation_in = tf.placeholder( shape=[None, o_size_h, o_size_w, 1], dtype=tf.float32, name='observation_0') self.conv1 = tf.layers.conv2d(self.observation_in, 32, kernel_size=[3, 3], strides=[2, 2], use_bias=False, activation=tf.nn.elu) self.conv2 = tf.layers.conv2d(self.conv1, 64, kernel_size=[3, 3], strides=[2, 2], use_bias=False, activation=tf.nn.elu) self.batch_size = tf.placeholder(shape=None, dtype=tf.int32) hidden = tf.layers.dense(c_layers.flatten(self.conv2), h_size, use_bias=False, activation=tf.nn.elu) self.policy = tf.layers.dense( hidden, a_size, activation=None, use_bias=False, kernel_initializer=c_layers.variance_scaling_initializer( factor=0.1)) self.probs = tf.nn.softmax(self.policy) self.action = tf.multinomial(self.policy, 1) self.output = tf.identity(self.action, name='action') self.value = tf.layers.dense(hidden, 1, activation=None, use_bias=False) self.entropy = -tf.reduce_sum(self.probs * tf.log(self.probs + 1e-10), axis=1) self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32) self.selected_actions = c_layers.one_hot_encoding( self.action_holder, a_size) self.old_probs = tf.placeholder(shape=[None, a_size], dtype=tf.float32, name='old_probabilities') self.responsible_probs = tf.reduce_sum(self.probs * self.selected_actions, axis=1) self.old_responsible_probs = tf.reduce_sum(self.old_probs * self.selected_actions, axis=1) PPOModel.__init__(self, self.responsible_probs, self.old_responsible_probs, self.value, self.entropy, beta, epsilon, lr, max_step)
def _build_fullyconv_network(self): units_embedded = layers.embed_sequence( self.ph_screen_unit_type, vocab_size=SCREEN_FEATURES.unit_type.scale, embed_dim=self.unit_type_emb_dim, scope="unit_type_emb" ) # Let's not one-hot zero which is background player_relative_screen_one_hot = layers.one_hot_encoding( self.ph_player_relative_screen, num_classes=SCREEN_FEATURES.player_relative.scale )[:, :, :, 1:] player_relative_minimap_one_hot = layers.one_hot_encoding( self.ph_player_relative_minimap, num_classes=MINIMAP_FEATURES.player_relative.scale )[:, :, :, 1:] channel_axis = 3 screen_numeric_all = tf.concat( [self.ph_screen_numeric, units_embedded, player_relative_screen_one_hot], axis=channel_axis ) minimap_numeric_all = tf.concat( [self.ph_minimap_numeric, player_relative_minimap_one_hot], axis=channel_axis ) screen_output = _build_convs(screen_numeric_all, "screen_network") minimap_output = _build_convs(minimap_numeric_all, "minimap_network") map_output = tf.concat([screen_output, minimap_output], axis=channel_axis) spatial_action_logits = layers.conv2d( map_output, data_format="NHWC", num_outputs=1, kernel_size=1, stride=1, activation_fn=None, scope='spatial_action' ) spatial_action_probs = tf.nn.softmax(layers.flatten(spatial_action_logits)) map_output_flat = layers.flatten(map_output) fc1 = layers.fully_connected( map_output_flat, num_outputs=256, activation_fn=tf.nn.relu, scope="fc1" ) action_id_probs = layers.fully_connected( fc1, num_outputs=len(actions.FUNCTIONS), activation_fn=tf.nn.softmax, scope="action_id" ) value_estimate = tf.squeeze(layers.fully_connected( fc1, num_outputs=1, activation_fn=None, scope='value' ), axis=1) # disregard non-allowed actions by setting zero prob and re-normalizing to 1 action_id_probs *= self.ph_available_action_ids action_id_probs /= tf.reduce_sum(action_id_probs, axis=1, keep_dims=True) return spatial_action_probs, action_id_probs, value_estimate
import matplotlib.pyplot as plt conv = layers.convolution2d pool = layers.max_pool2d fc = layers.fully_connected bn = layers.batch_norm h = 50 w = 50 h_trans = 30 w_trans = 30 x = tf.placeholder(tf.float32, [None, h*w]) x_tensor = tf.reshape(x, [-1, h, w, 1]) y = tf.placeholder(tf.int32, [None]) y_one_hot = layers.one_hot_encoding(y, 10) # localization net """ loc = pool(conv(x_tensor, 8, [5, 5], padding='VALID'), [2, 2]) loc = pool(conv(loc, 16, [5, 5], padding='VALID'), [2, 2]) loc = fc(layers.flatten(loc), 50) """ loc = pool(x_tensor, [2, 2]) loc = conv(loc, 5, [5, 5], padding='VALID') loc = pool(loc, [2, 2]) loc = conv(loc, 10, [5, 5], padding='VALID') """ loc = fc(fc(x, 500), 50)
# 1.1) embed on channel -> concat on channel embedded1 = [] for f, n, w in zip(features, n_cat_by_feature, initial_emb_weights): e = layers.embed_sequence(f, vocab_size=n, embed_dim=embedding_dim, initializer=tf.constant_initializer(w)) embedded1.append(e) out11 = tf.concat(embedded1, axis=2) # 1.2) onehot on channel -> 1x1 conv separately -> concat on channel embedded2 = [] for f, n, w in zip(features, n_cat_by_feature, initial_emb_weights): one_hot = layers.one_hot_encoding(f, num_classes=n) conv_out = layers.conv2d(inputs=one_hot, num_outputs=embedding_dim, weights_initializer=tf.constant_initializer(w), kernel_size=1, stride=1) embedded2.append(conv_out) out12 = tf.concat(embedded2, axis=2) # 2.1) sum embeddings on channel instead of concatenating out21 = tf.add_n(embedded1) # 2.2) onehot on channel -> concat on channel -> 1x1 conv one_hotted_features = tf.concat([
def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reuse=False): nbatch = nenv * nsteps nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc * nstack) X = tf.placeholder(tf.int32, ob_shape) # obs with tf.variable_scope("fullyconv_model", reuse=reuse): x_onehot = layers.one_hot_encoding( # assuming we have only one channel X[:, :, :, 0], num_classes=SCREEN_FEATURES.player_relative.scale) #don't one hot 0-category x_onehot = x_onehot[:, :, :, 1:] h = layers.conv2d(x_onehot, num_outputs=16, kernel_size=5, stride=1, padding='SAME', scope="conv1") h2 = layers.conv2d(h, num_outputs=32, kernel_size=3, stride=1, padding='SAME', scope="conv2") pi = layers.flatten( layers.conv2d(h, num_outputs=1, kernel_size=1, stride=1, scope="spatial_action", activation_fn=None)) pi *= 3.0 # make it little bit more deterministic, not sure if good idea f = layers.fully_connected(layers.flatten(h2), num_outputs=64, activation_fn=tf.nn.relu, scope="value_h_layer") vf = layers.fully_connected(f, num_outputs=1, activation_fn=None, scope="value_out") v0 = vf[:, 0] a0 = sample(pi) self.initial_state = [] # not stateful def step(ob, *_args, **_kwargs): a, v = sess.run([a0, v0], {X: ob}) return a, v, [] # dummy state def value(ob, *_args, **_kwargs): return sess.run(v0, {X: ob}) self.X = X self.pi = pi self.vf = vf self.step = step self.value = value
def build_transfer(self, previous_model): """build_transfer Build the actual network, using the values passed over the from agent object, which themselves are derived from the Obs object. This model is built using a previous model. """ # Maps a series of symbols to embeddings, # where an embedding is a mapping from discrete objects, # such as words, to vectors of real numbers. # In this case it is from the unit types. units_embedded = layers.embed_sequence( self.placeholders.screen_unit_type, vocab_size=SCREEN_FEATURES.unit_type.scale, embed_dim=self.unittype_emb_dim, scope="unit_type_emb", trainable=self.trainable, ) # "One hot" encoding performs "binarization" on the input # meaning we end up with features we can suitably learn # from. # Basically, learning from categories isn't possible, # but learning from ints (i.e. 0/1/2 for 3 categories) # ends up with further issues, like the ML algorithm # picking up some pattern in the categories, when none exists. # Instead we want it in a binary form instead, to prevent this. # This is not needed for the background, since it is # not used, which is why we ignore channel 0 in the # last sub-array. player_relative_screen_one_hot = layers.one_hot_encoding( self.placeholders.player_relative_screen, num_classes=SCREEN_FEATURES.player_relative.scale, )[:, :, :, 1:] player_relative_minimap_one_hot = layers.one_hot_encoding( self.placeholders.player_relative_minimap, num_classes=MINIMAP_FEATURES.player_relative.scale, )[:, :, :, 1:] channel_axis = 3 # Group together all the inputs, such that a conv # layer can be built upon them. screen_numeric_all = tf.concat( [ self.placeholders.screen_numeric, units_embedded, player_relative_screen_one_hot, ], axis=channel_axis, ) minimap_numeric_all = tf.concat( [self.placeholders.minimap_numeric, player_relative_minimap_one_hot], axis=channel_axis, ) # Build the 2 convolutional layers based on the screen # and the mini-map. screen_conv_layer_output = self.build_conv_layers_for_input( screen_numeric_all, "screen_network", previous_model.screen_conv_1 ) # And now the minimap minimap_conv_layer_output = self.build_conv_layers_for_input( minimap_numeric_all, "minimap_network", previous_model.minimap_conv_1 ) # Group these two convolutional layers now, and # build a further convolutional layer on top of it. visual_inputs = tf.concat( [screen_conv_layer_output, minimap_conv_layer_output], axis=channel_axis ) spatial_actions_normal = layers.conv2d( visual_inputs, data_format="NHWC", num_outputs=1, kernel_size=1, stride=1, activation_fn=None, scope=f"spatial_actions/model_{self.curriculum_number}", trainable=self.trainable, ) # Sort the previous models spatial action layers. previous_spatial_actions = [] for model_number, prev_out in enumerate(previous_model.concat_2): spatial_actions_previous = layers.conv2d( prev_out, data_format="NHWC", num_outputs=1, kernel_size=1, stride=1, activation_fn=None, scope=f"spatial_actions/model_{model_number}", trainable=self.trainable, ) previous_spatial_actions.append(spatial_actions_previous) previous_spatial_actions_added = self.add_all_previous( previous_spatial_actions, "spatial_actions" ) joint_spatial_actions = tf.add( spatial_actions_normal, previous_spatial_actions_added, "spatial_actions_add", ) if self.trainable: tf.summary.image( f"spatial_action_normal", tf.reshape(spatial_actions_normal, [-1, 32, 32, 1]), 3, ) tf.summary.image( f"spatial_action_previous", tf.reshape(spatial_actions_previous, [-1, 32, 32, 1]), 3, ) tf.summary.image( f"joint_connected_layers", tf.reshape(joint_spatial_actions, [-1, 32, 32, 1]), 3, ) # Take the softmax of this final convolutional layer. spatial_action_probs = tf.nn.softmax(layers.flatten(joint_spatial_actions)) # Build a full connected layer of this final convolutional layer. # Could possibly pass in additional variables here, alongside the # convolutional layer. map_output_flat = layers.flatten(visual_inputs) fully_connected_layer_normal = layers.fully_connected( map_output_flat, num_outputs=256, activation_fn=None, scope=f"fully_connected_layer1/model_{self.curriculum_number}", trainable=self.trainable, ) previous_fully_con_1 = [] for model_number, prev_out in enumerate(previous_model.flatten_1): fully_connected_previous = layers.fully_connected( prev_out, num_outputs=256, activation_fn=None, scope=f"fully_connected_layer1/model_{model_number}", trainable=self.trainable, ) previous_fully_con_1.append(fully_connected_previous) previous_fully_con_1_added = self.add_all_previous( previous_fully_con_1, "fully_connected_layer1" ) # Combine the new and old models values, and then apply RELU to the result. joint_connected_layers = tf.add( fully_connected_layer_normal, previous_fully_con_1_added, "fully_connected_layer_add", ) relu_connected_layer = tf.nn.relu( joint_connected_layers, name="fully_connected_layer1_normal_relu" ) # Generate the probability of a given action from the # fully connected layer. Finally, produce a value # estimate for the given actions. action_id_probs_new = layers.fully_connected( relu_connected_layer, num_outputs=len(actions.FUNCTIONS), activation_fn=None, scope=f"action_id/model_{self.curriculum_number}", trainable=self.trainable, ) previous_action_ids = [] for model_number, prev_out in enumerate(previous_model.fully_connected_layer1): previous_action_id_probs = layers.fully_connected( prev_out, num_outputs=len(actions.FUNCTIONS), activation_fn=None, scope=f"action_id/model_{model_number}", trainable=self.trainable, ) previous_action_ids.append(previous_action_id_probs) previous_action_ids_added = self.add_all_previous( previous_action_ids, "action_id" ) joint_action_ids = tf.add( action_id_probs_new, previous_action_ids_added, "id_probs_add" ) # Combine the new and old models values, and then apply softmax to the result. action_id_probs = tf.nn.softmax(joint_action_ids) # Sort value estimate. value_estimate_new = layers.fully_connected( relu_connected_layer, num_outputs=1, activation_fn=None, scope=f"value/model_{self.curriculum_number}", trainable=self.trainable, ) previous_value_estimates = [] for model_number, prev_out in enumerate(previous_model.fully_connected_layer1): value_estimate_previous = layers.fully_connected( prev_out, num_outputs=1, activation_fn=None, scope=f"value/model_{model_number}", trainable=self.trainable, ) previous_value_estimates.append(value_estimate_previous) previous_value_estimates_added = self.add_all_previous( previous_value_estimates, "value" ) # Combine the new and old models values, and then squeeze the result. joint_value_estimate = tf.add( value_estimate_new, previous_value_estimates_added, "value_estimate_add" ) value_estimate = tf.squeeze(joint_value_estimate, axis=1) # Disregard all the non-allowed actions by giving them a # probability of zero, before re-normalizing to 1. action_id_probs *= self.placeholders.available_action_ids action_id_probs /= tf.reduce_sum(action_id_probs, axis=1, keepdims=True) spatial_action_log_probs = self.logclip(spatial_action_probs) * tf.expand_dims( self.placeholders.is_spatial_action_available, axis=1 ) action_id_log_probs = self.logclip(action_id_probs) self.value_estimate = value_estimate self.action_id_probs = action_id_probs self.spatial_action_probs = spatial_action_probs self.action_id_log_probs = action_id_log_probs self.spatial_action_log_probs = spatial_action_log_probs return self
def build(self): units_embedded = layers.embed_sequence( self.placeholders.screen_unit_type, vocab_size=SCREEN_FEATURES.unit_type.scale, embed_dim=self.unittype_emb_dim, scope="unit_type_emb", trainable=self.trainable) print("*model* units_embedded={},input={}, dim={}".format( units_embedded.shape, self.placeholders.screen_unit_type.shape, self.unittype_emb_dim)) # Let's not one-hot zero which is background player_relative_screen_one_hot = layers.one_hot_encoding( self.placeholders.player_relative_screen, num_classes=SCREEN_FEATURES.player_relative.scale)[:, :, :, 1:] player_relative_minimap_one_hot = layers.one_hot_encoding( self.placeholders.player_relative_minimap, num_classes=MINIMAP_FEATURES.player_relative.scale)[:, :, :, 1:] print( "*model* player_relative_screen_one_hot={},input={}, num_classes". format(player_relative_screen_one_hot.shape, self.placeholders.player_relative_screen.shape, SCREEN_FEATURES.player_relative.scale)) print("*model* player_relative_minimap_one_hot={},input={}".format( player_relative_minimap_one_hot.shape, self.placeholders.player_relative_minimap.shape, MINIMAP_FEATURES.player_relative.scale)) channel_axis = 3 screen_numeric_all = tf.concat([ self.placeholders.screen_numeric, units_embedded, player_relative_screen_one_hot ], axis=channel_axis) print("*model* screen_numric_all={}, input=[{},{},{}]".format( screen_numeric_all.shape, self.placeholders.screen_numeric.shape, units_embedded.shape, player_relative_screen_one_hot.shape)) minimap_numeric_all = tf.concat([ self.placeholders.minimap_numeric, player_relative_minimap_one_hot ], axis=channel_axis) print("*model* minimap_numric_all={}, input=[{},{}]".format( minimap_numeric_all.shape, self.placeholders.minimap_numeric.shape, player_relative_minimap_one_hot.shape)) screen_output = self._build_convs(screen_numeric_all, "screen_network") minimap_output = self._build_convs(minimap_numeric_all, "minimap_network") print("*model* conv_screen={},input={},".format( screen_output.shape, screen_numeric_all.shape)) print("*model* conv_screen={},input={},".format( minimap_output.shape, minimap_numeric_all.shape)) map_output = tf.concat([screen_output, minimap_output], axis=channel_axis) spatial_action_logits = layers.conv2d(map_output, data_format="NHWC", num_outputs=1, kernel_size=1, stride=1, activation_fn=None, scope='spatial_action', trainable=self.trainable) spatial_action_probs = tf.nn.softmax( layers.flatten(spatial_action_logits)) print( "*model* action_probs={}, action_logits={}, map_output={}".format( spatial_action_probs.shape, spatial_action_logits.shape, map_output.shape)) map_output_flat = layers.flatten(map_output) fc1 = layers.fully_connected(map_output_flat, num_outputs=256, activation_fn=tf.nn.relu, scope="fc1", trainable=self.trainable) action_id_probs = layers.fully_connected(fc1, num_outputs=len( actions.FUNCTIONS), activation_fn=tf.nn.softmax, scope="action_id", trainable=self.trainable) value_estimate = tf.squeeze(layers.fully_connected( fc1, num_outputs=1, activation_fn=None, scope='value', trainable=self.trainable), axis=1) print( "*model* action_id_probs={}, value_estimate={}, map_output_flat={}" .format(action_id_probs.shape, value_estimate.shape, map_output_flat.shape)) # disregard non-allowed actions by setting zero prob and re-normalizing to 1 action_id_probs *= self.placeholders.available_action_ids action_id_probs /= tf.reduce_sum(action_id_probs, axis=1, keep_dims=True) def logclip(x): return tf.log(tf.clip_by_value(x, 1e-12, 1.0)) spatial_action_log_probs = ( logclip(spatial_action_probs) * tf.expand_dims( self.placeholders.is_spatial_action_available, axis=1)) # non-available actions get log(1e-10) value but that's ok because it's never used action_id_log_probs = logclip(action_id_probs) self.value_estimate = value_estimate self.action_id_probs = action_id_probs self.spatial_action_probs = spatial_action_probs self.action_id_log_probs = action_id_log_probs self.spatial_action_log_probs = spatial_action_log_probs return self
def run_module_unit_test(use_fake_data=False, test_mode="full_model"): #Test mode can either be "module", "stem", "classifier_auxiliary", "classifier_basic" or "full_model" fl = tf.app.flags.FLAGS BATCH_SIZE = fl.batch_size L2_WEIGHT = fl.l2_lambda_weight if use_fake_data: #load fake data. imagenet uses 224*224*3, but put whatever you want here. train_X = np.random.rand(BATCH_SIZE*5, 224, 224, 3) train_y = np.random.randint(low=0, high=1000, size=(BATCH_SIZE*5, 1)) NUM_LABELS = 1000 test_X = np.random.rand(BATCH_SIZE, 224, 224, 3) test_y = np.random.randint(low=0, high=1000, size=(BATCH_SIZE*4, 1)) else: #TODO - toss away this NUM_LABELS when done testing (train_X, train_y), (test_X, test_y), NUM_LABELS = data_utils.load_dataset(fl.dataset) #extract a random validation set from the training set validation_size = np.floor(train_X.shape[0]*fl.validation_ratio).astype(int) shuf = np.random.permutation(train_X.shape[0]) train_X = train_X[shuf] train_y = train_y[shuf] validation_X, validation_y = train_X[:validation_size], train_y[:validation_size] train_X, train_y = train_X[validation_size:], train_y[validation_size:] IMAGE_LEN = train_X.shape[1] IMAGE_WID = train_X.shape[2] IMAGE_SIZE = IMAGE_LEN NUM_CHANNELS = train_X.shape[3] g = tf.Graph() with g.as_default(): tf_train_X = tf.placeholder(tf.float32, shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)) tf_train_y = tf.placeholder(tf.int32, shape=(BATCH_SIZE,1)) tf_validation_X = tf.placeholder(tf.float32, shape=validation_X.shape) tf_validation_y = tf.placeholder(tf.int32, shape=(validation_y.shape[0],1)) tf_test_X = tf.placeholder(tf.float32, shape=test_X.shape) tf_test_y = tf.placeholder(tf.int32, shape=(test_y.shape[0],1)) if test_mode == "stem": expected_output_shape = [BATCH_SIZE, IMAGE_SIZE/8, IMAGE_SIZE/8, 192] #three times the length and width of an image are halved, hence the 8. inception_model = InceptionStemV1(filter_sizes=[64, 64, 192], input_shape=tf_train_X.get_shape(), output_shape=expected_output_shape, scope="stem1") elif test_mode == "module": expected_output_shape = [BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, 64] inception_model = InceptionModuleV1(dtype = tf.float32, input_shape = tf_train_X.get_shape().as_list(), output_shape = expected_output_shape, filter_sizes = [16, 24, 32, 4, 8, 8], #indexes 0,2,4,5 must add up to output[-1] scope="module1") elif test_mode == "classifier_auxiliary": expected_output_shape = [BATCH_SIZE, NUM_LABELS] inception_model = InceptionClassifierV1(dtype=tf.float32, auxiliary_weight_constant=0.3, filter_sizes=[10,1024], auxiliary_classifier=True, input_shape=tf_train_X.get_shape().as_list(), output_shape=expected_output_shape, scope="classifier_auxiliary1") elif test_mode == "classifier_basic": expected_output_shape = [BATCH_SIZE, NUM_LABELS] inception_model = InceptionClassifierV1(dtype=tf.float32, input_shape=tf_train_X.get_shape().as_list(), output_shape=expected_output_shape, scope="classifier_basic1") elif test_mode == "full_model": expected_output_shape = [BATCH_SIZE, NUM_LABELS] inception_model = InceptionV1(dtype=tf.float32, filter_size_dict="imagenet_auto", filter_size_reduction_factor=4, auxiliary_classifier_weights=[0.3,0.3], use_mini_model=False, input_shape=tf_train_X.get_shape().as_list(), #224x224x3 imagenet images output_shape=expected_output_shape, scope="inception1") inception_model.create_model() global_step = tf.Variable(0) #set up a learning rate and learning rate decay mechanism lr_calc = tf.train.exponential_decay(0.01, global_step, 100, 0.999, staircase=True) lr_min = 0.0001 lr = tf.maximum(lr_calc, lr_min) #set up an l2 regulariztaion and its decay mechanism operation l2_lambda_weight = tf.Variable(fl.l2_lambda_weight, dtype=tf.float32) l2_lambda_decay = tf.constant(fl.l2_lambda_weight_decay, dtype=tf.float32) l2_lambda_decay_op = l2_lambda_weight.assign( l2_lambda_weight * l2_lambda_decay) #reshape the images and their labels #flat_inputs = flatten(tf_train_X, scope="flatten_pixel_channels") one_hot_train_outputs = one_hot_encoding(tf.squeeze(tf_train_y), NUM_LABELS, on_value=1.0, off_value=0.0) one_hot_validation_outputs = one_hot_encoding(tf.squeeze(tf_validation_y), NUM_LABELS, on_value=1.0, off_value=0.0) one_hot_test_outputs = one_hot_encoding(tf.squeeze(tf_test_y), NUM_LABELS, on_value=1.0, off_value=0.0) #A cheap model that tosses a fully-connected layer on to the flattened result of the 4d Tensor if test_mode in ["module", "stem"]: #not testing any classification, so we build a dummy FC layer to connect to logits flattened_incept_out_size = expected_output_shape[1]*expected_output_shape[2]*expected_output_shape[3] w_l2 = tf.get_variable("w_l2", shape=(flattened_incept_out_size, one_hot_train_outputs.get_shape()[1]), dtype=tf.float32, initializer=xavier_initializer()) b_l2 = tf.get_variable("b_l2", shape=(one_hot_train_outputs.get_shape()[1]), dtype=tf.float32, initializer=tf.zeros_initializer()) def model_with_linear_classifier(inp, training=True): inception_out = inception_model.run_model(inp) flat_inputs = flatten(inception_out) return tf.matmul(flat_inputs, w_l2) + b_l2 train_out = model_with_linear_classifier(tf_train_X) train_predictions = tf.nn.softmax(train_out) validation_out = model_with_linear_classifier(tf_validation_X, training=False) validation_predictions = tf.nn.softmax(validation_out) test_out = model_with_linear_classifier(tf_test_X, training=False) test_predictions = tf.nn.softmax(test_out) #using a model with a classifier in it else: train_out = inception_model.run_model(tf_train_X, training=True) train_predictions = tf.nn.softmax(train_out) validation_out = inception_model.run_model(tf_validation_X, training=False) validation_predictions = tf.nn.softmax(validation_out) test_out = inception_model.run_model(tf_test_X, training=False) test_predictions = tf.nn.softmax(test_out) #separate the losses so we can compare them in the session ce_loss = loss.softmax_cross_entropy_with_laplace_smoothing(train_out, one_hot_train_outputs, laplace_pseudocount=0.00001, scale=[0.3,0.3,1.0] if test_mode=='full_model' else 1.0) #collect all the parameters in the model to do l2 regulariztion regularization_parameters = inception_model.model_parameters if test_mode in ["module", "stem"]: regularization_parameters.extend((w_l2, b_l2)) reg_loss = loss.regularizer(regularization_parameters, reg_type='l2', weight_lambda=0.001) total_loss = tf.reduce_mean(ce_loss + reg_loss) opt = tf.train.GradientDescentOptimizer(lr).minimize(total_loss, global_step=global_step) #we also declare this in the graph and run it in the session init_op = tf.global_variables_initializer() with tf.Session(graph=g, config=tf.ConfigProto(log_device_placement=True)) as sess: sess.run(init_op) total_steps = 0 num_epochs = 100 for epoch in range(num_epochs): shuf = np.random.permutation(train_X.shape[0]) train_X = train_X[shuf] train_y = train_y[shuf] processed=0 while processed+BATCH_SIZE <= train_X.shape[0]: batch_X = train_X[processed:processed+BATCH_SIZE] batch_y = train_y[processed:processed+BATCH_SIZE] processed += BATCH_SIZE feed_dict = {tf_train_X:batch_X, tf_train_y:batch_y} _, l, rl, pred, l2lw = sess.run([opt, total_loss, reg_loss, train_predictions, l2_lambda_weight], feed_dict=feed_dict) total_steps += 1 if total_steps % fl.l2_lambda_weight_decay_steps == 0: sess.run(l2_lambda_decay_op) #Validation Set if total_steps % fl.validation_frequency == 0: feed_dict = {tf_validation_X:validation_X, tf_validation_y:validation_y} pred_labels, true_labels = sess.run([validation_predictions, one_hot_validation_outputs], feed_dict=feed_dict) print("Validation Top-1 accuracy is " + str(100.0*data_utils.n_accuracy(pred_labels, true_labels, 1)) + "%") #Test Set feed_dict = {tf_test_X:test_X,tf_test_y:test_y} pred_labels, true_labels = sess.run([test_predictions, one_hot_test_outputs], feed_dict=feed_dict) print("Test Top-1 accuracy is " + str(100.0*data_utils.n_accuracy(pred_labels, true_labels, 1)) + "%")