def automatic_dpatch(hidden): """ """ if self.parameters['obs_type'] == 'image': shape = hidden.get_shape().as_list() num_regions = shape[1]*shape[2] hidden = tf.reshape(hidden, [-1, num_regions, shape[3]]) inf_hidden = net.fcn(hidden, 1, self.parameters["inf_num_predictors"], None, 'fcn_auto_dset') # for predictor in range(self.parameters['inf_num_predictors']): # name = "weights"+str(predictor) # weights = tf.get_variable(name, shape=(num_regions,1), dtype=tf.dtypes.float32, # initializer=tf.ones_initializer, trainable=True) # softmax_weights = tf.contrib.distributions.RelaxedOneHotCategorical(0.1, weights) # softmax_weights = tf.reshape(softmax_weights,[num_regions,1]) # softmax_weights = tf.nn.softmax(weights, axis=0) # inf_hidden.append(tf.reduce_sum(softmax_weights*hidden, axis=1)) # inf_hidden = tf.stack(inf_hidden, axis=1) hidden_size = inf_hidden.get_shape().as_list()[2]*self.parameters['inf_num_predictors'] inf_hidden = tf.reshape(inf_hidden, shape=[-1, hidden_size]) else: inf_hidden = net.fcn(hidden, 1, self.parameters["inf_num_predictors"], None, 'fcn_auto_dset') # shape = hidden.get_shape().as_list() # num_variables = shape[1] # for predictor in range(self.parameters['inf_num_predictors']): # name = "weights"+str(predictor) # weights = tf.get_variable(name, shape=(1, num_variables), dtype=tf.dtypes.float32, # initializer=tf.ones_initializer, trainable=True) # softmax_weights = tf.nn.softmax(weights, axis=0) # inf_hidden.append(tf.reduce_sum(softmax_weights*hidden, axis=1)) # inf_hidden = tf.stack(inf_hidden, axis=1) return inf_hidden#, softmax_weights
def attention(hidden_conv, inf_hidden): """ """ shape = hidden_conv.get_shape().as_list() num_regions = shape[1] * shape[2] hidden_conv = tf.reshape(hidden_conv, [-1, num_regions, shape[3]]) inf_hidden_vec = [] for head in range(self.parameters['num_heads']): linear_conv = net.fcn(hidden_conv, 1, self.parameters['num_att_units'], None, 'att', 'att1_' + str(head)) linear_hidden = net.fcn(inf_hidden, 1, self.parameters['num_att_units'], None, 'att', 'att2_' + str(head)) context = tf.nn.tanh(linear_conv + tf.expand_dims(linear_hidden, 1)) attention_weights = net.fcn(context, 1, [1], None, 'att', 'att3_' + str(head)) attention_weights = tf.nn.softmax(attention_weights, axis=1) d_patch = tf.reduce_sum(attention_weights * hidden_conv, axis=1) inf_hidden_vec.append( tf.concat([ d_patch, tf.reshape(attention_weights, shape=[-1, num_regions]) ], axis=1)) inf_hidden = tf.concat(inf_hidden_vec, axis=1) return inf_hidden
def build_main_model(self): """ Builds neural network model to approximate policy and value functions """ if self.parameters['obs_type'] == 'image': self.observation = tf.placeholder(shape=[ None, self.parameters["frame_height"], self.parameters["frame_width"], self.parameters["num_frames"] ], dtype=tf.float32, name='observation') else: self.observation = tf.placeholder( shape=[None, self.parameters['obs_size']], dtype=tf.float32, name='observation') # normalize input if self.parameters['env_type'] == 'atari': self.observation = tf.cast(self.observation, tf.float32) / 255. if self.convolutional: self.feature_vector = net.cnn(self.observation, self.parameters["num_conv_layers"], self.parameters["num_filters"], self.parameters["kernel_sizes"], self.parameters["strides"], tf.nn.relu, False, 'cnn') network_input = c_layers.flatten(self.feature_vector) else: self.feature_vector = self.observation network_input = self.feature_vector if self.fully_connected: hidden = net.fcn(network_input, self.parameters["num_fc_layers"], self.parameters["num_fc_units"], tf.nn.relu, 'fcn') if self.recurrent: self.prev_action = tf.placeholder(shape=[None], dtype=tf.int32, name='prev_action') self.prev_action_onehot = c_layers.one_hot_encoding( self.prev_action, self.act_size) # network_input = tf.concat([network_input, self.prev_action_onehot], axis=1) c_in = tf.placeholder(tf.float32, [None, self.parameters['num_rec_units']], name='c_state') h_in = tf.placeholder(tf.float32, [None, self.parameters['num_rec_units']], name='h_state') self.seq_len = tf.placeholder(shape=None, dtype=tf.int32, name='sequence_length') self.state_in = tf.contrib.rnn.LSTMStateTuple(c_in, h_in) hidden, self.state_out = net.rnn(network_input, self.state_in, self.parameters['num_rec_units'], self.seq_len, 'rnn') self.hidden = hidden