def hook_abstraction(vis, num_abstract_states, batch_size, I=None): with tf.variable_scope('fc1'): fc1 = th.fully_connected(vis, 512, tf.nn.sigmoid) with tf.variable_scope('fc2'): it_doesnt_matter = th.fully_connected(fc1, num_abstract_states, lambda x: x) return hardmax(it_doesnt_matter, batch_size, I=I)
def hook_base(vis, num_actions): with tf.variable_scope('fc1'): fc1 = th.fully_connected(vis, 512, tf.nn.relu) with tf.variable_scope('fc2'): q_values = tf.reshape( th.fully_connected(fc1, num_actions, lambda x: x), [-1, num_actions]) return q_values
def make_encoder_fc(x): with tf.variable_scope('fc1_mu'): fc1 = th.fully_connected(tf.reshape(x, [-1, 11*11]), 50, tf.nn.elu) with tf.variable_scope('enc_mu'): mu_z = th.fully_connected(fc1, 10, lambda x: x) with tf.variable_scope('fc1_sigma'): fc1 = th.fully_connected(tf.reshape(x, [-1, 11 * 11]), 50, tf.nn.elu) with tf.variable_scope('enc_sigma'): sigma_z = th.fully_connected(fc1, 10, tf.square) return mu_z, sigma_z
def make_decoder_fc(z): with tf.variable_scope('fc2_mu'): fc2 = th.fully_connected(z, 50, tf.nn.elu) with tf.variable_scope('dec_mu'): mu_x = th.fully_connected(fc2, 11*11, lambda x: x) with tf.variable_scope('fc2_sigma'): fc2 = th.fully_connected(z, 50, tf.nn.elu) with tf.variable_scope('dec_sigma'): sigma_x = th.fully_connected(fc2, 11*11, lambda x: x) return mu_x, sigma_x
def make_embedding_network(state, embedding_size): shape = [x.value for x in state.get_shape()[1:]] state = tf.image.convert_image_dtype(tf.reshape(state, [-1] + shape + [1]), tf.float32) with tf.variable_scope('c1'): c1 = th.down_convolution(state, 5, 2, 1, 32, tf.nn.relu) with tf.variable_scope('c2'): c2 = th.down_convolution(c1, 5, 2, 32, 64, tf.nn.relu) with tf.variable_scope('c3'): c3 = th.down_convolution(c2, 5, 2, 64, 64, tf.nn.relu) N = np.prod([x.value for x in c3.get_shape()[1:]]) with tf.variable_scope('fc1'): fc1 = th.fully_connected(tf.reshape(c3, [-1, N]), 512, tf.nn.relu) with tf.variable_scope('fc2'): E = th.fully_connected(fc1, embedding_size, lambda x: x) return E
def construct_heads_network(input, num_actions, num_abstract_states): num_heads = num_abstract_states * num_abstract_states with tf.variable_scope('fc1'): fc1 = th.fully_connected(input, 512, tf.nn.relu) with tf.variable_scope('fc2'): q_values = th.fully_connected_multi_shared_bias(fc1, num_actions, num_heads, lambda x: x) q_values = tf.reshape(q_values, [-1, num_heads, num_actions]) return q_values
def construct_dqn_with_subgoal_embedding(input, abs_state1, abs_state2, frame_history, num_actions): input = tf.image.convert_image_dtype(input, tf.float32) with tf.variable_scope('a1'): a1 = th.fully_connected(legacy_concat(data=[abs_state1, abs_state2], dim=1), 50, tf.nn.relu) with tf.variable_scope('c1'): c1 = th.down_convolution(input, 8, 4, frame_history, 32, tf.nn.relu) with tf.variable_scope('c2'): c2 = th.down_convolution(c1, 4, 2, 32, 64, tf.nn.relu) with tf.variable_scope('c3'): c3 = th.down_convolution(c2, 3, 1, 64, 64, tf.nn.relu) N = np.prod([x.value for x in c3.get_shape()[1:]]) c3 = tf.reshape(c3, [-1, N]) ac3 = legacy_concat(dim=1, data=[a1, c3]) with tf.variable_scope('fc1'): fc1 = th.fully_connected(ac3, 512, tf.nn.relu) with tf.variable_scope('fc2'): q_values = th.fully_connected_shared_bias(fc1, num_actions, lambda x: x) return q_values
def construct_embedding_network(abs_state1, abs_state2, hidden_size, embedding_size, weight_size): def shared_abs(inp, neurons): with tf.variable_scope('fc1'): fc1 = th.fully_connected(inp, neurons, tf.nn.relu) with tf.variable_scope('fc2'): A = th.fully_connected(fc1, neurons, tf.nn.relu) return A with tf.variable_scope('A'): A1 = shared_abs(abs_state1, hidden_size) with tf.variable_scope('A', reuse=True): A2 = shared_abs(abs_state2, hidden_size) with tf.variable_scope('pre_embedding'): pre_embedding = th.fully_connected(tf.concat(1, [A1, A2]), hidden_size*2, tf.nn.relu) with tf.variable_scope('embedding'): embedding = th.fully_connected(pre_embedding, embedding_size, lambda x: x) with tf.variable_scope('pre_weights'): pre_weights = th.fully_connected(embedding, embedding_size, tf.nn.relu) with tf.variable_scope('weights'): weights = th.fully_connected(pre_weights, weight_size, lambda x: x) return embedding, weights
def make_encoder(inp, encoding_size): with tf.variable_scope('c1'): c1 = th.down_convolution(inp, 5, 2, 1, 32, tf.nn.relu) with tf.variable_scope('c2'): c2 = th.down_convolution(c1, 5, 2, 32, 64, tf.nn.relu) with tf.variable_scope('c3'): c3 = th.down_convolution(c2, 5, 2, 64, 64, tf.nn.relu) N = np.prod([x.value for x in c3.get_shape()[1:]]) c3 = tf.reshape(c3, [-1, N]) with tf.variable_scope('mu_zGx'): with tf.variable_scope('fc1'): fc1 = th.fully_connected(c3, encoding_size, tf.nn.relu) with tf.variable_scope('fc2'): mu = th.fully_connected(fc1, encoding_size, lambda x:x) with tf.variable_scope('sigma_zGx'): with tf.variable_scope('fc1'): fc1 = th.fully_connected(c3, encoding_size, tf.nn.relu) with tf.variable_scope('fc2'): sigma = th.fully_connected(fc1, encoding_size, tf.nn.relu) return mu, sigma
def make_decoder(z): with tf.variable_scope('fc1'): fc1 = tf.reshape(th.fully_connected(z, 21*21*64, tf.nn.relu), [-1, 21, 21, 64]) with tf.variable_scope('d1'): d1 = th.up_convolution(fc1, 5, 64, 32, tf.nn.relu) with tf.variable_scope('d2_mu'): mu_x = th.up_convolution(d1, 5, 32, 1, tf.nn.sigmoid) with tf.variable_scope('d2_sigma'): sigma_x = th.up_convolution(d1, 5, 32, 1, tf.nn.relu) return mu_x, sigma_x
def construct_q_network(self, input): input = tf.image.convert_image_dtype(input, tf.float32) with tf.variable_scope('c1'): c1 = th.down_convolution(input, 8, 4, self.frame_history, 32, tf.nn.relu) with tf.variable_scope('c2'): c2 = th.down_convolution(c1, 4, 2, 32, 64, tf.nn.relu) with tf.variable_scope('c3'): c3 = th.down_convolution(c2, 3, 1, 64, 64, tf.nn.relu) N = np.prod([x.value for x in c3.get_shape()[1:]]) c3 = tf.reshape(c3, [-1, N]) with tf.variable_scope('fc1'): fc1 = th.fully_connected(c3, 512, tf.nn.relu) with tf.variable_scope('fc2'): if self.shared_bias: q_values = th.fully_connected_shared_bias( fc1, self.num_actions, lambda x: x) else: q_values = th.fully_connected(fc1, self.num_actions, lambda x: x) return q_values
def construct_dqn_with_embedding(input, abs_state1, abs_state2, frame_history, num_actions): embedding, weights = construct_embedding_network(abs_state1, abs_state2, 50, 50, 512*num_actions + 1) # plus 1 for shared bias w = tf.reshape(weights[:, 0:512*num_actions], [-1, 512, num_actions]) b = tf.reshape(weights[:, 512*num_actions:512*num_actions+1], [-1, 1]) input = tf.image.convert_image_dtype(input, tf.float32) with tf.variable_scope('c1'): c1 = th.down_convolution(input, 8, 4, frame_history, 32, tf.nn.relu) with tf.variable_scope('c2'): c2 = th.down_convolution(c1, 4, 2, 32, 64, tf.nn.relu) with tf.variable_scope('c3'): c3 = th.down_convolution(c2, 3, 1, 64, 64, tf.nn.relu) N = np.prod([x.value for x in c3.get_shape()[1:]]) c3 = tf.reshape(c3, [-1, N]) with tf.variable_scope('fc1'): fc1 = th.fully_connected(c3, 512, tf.nn.relu) with tf.variable_scope('fc2'): q_values = tf.reshape(tf.matmul(tf.reshape(fc1, [-1, 1, 512]), w), [-1, num_actions]) + b return q_values
def construct_q_network_weights_only_final(input, dqn_numbers, dqn_max_number, frame_history, num_actions): input = tf.image.convert_image_dtype(input, tf.float32) with tf.variable_scope('c1'): c1 = th.down_convolution(input, 8, 4, frame_history, 32, tf.nn.relu) with tf.variable_scope('c2'): c2 = th.down_convolution(c1, 4, 2, 32, 64, tf.nn.relu) with tf.variable_scope('c3'): c3 = th.down_convolution(c2, 3, 1, 64, 64, tf.nn.relu) N = np.prod([x.value for x in c3.get_shape()[1:]]) # N = tf.reduce_prod(tf.shape(c3)[1:4]) # N = [] c3 = tf.reshape(c3, [-1, N]) with tf.variable_scope('fc1'): fc1 = th.fully_connected(c3, 512, tf.nn.relu) with tf.variable_scope('fc2'): q_values = th.fully_connected_weights(fc1, dqn_numbers, dqn_max_number, num_actions, lambda x: x) # q_values_explore = th.fully_connected_weights_2(fc1, dqn_numbers_explore, dqn_max_number, num_actions, lambda x: x) return q_values, None
def construct_dqn_with_embedding_2_layer(input, abs_state1, abs_state2, frame_history, num_actions): #embedding, weights = construct_embedding_network(abs_state1, abs_state2, 200, 200, # 512 * num_actions + 1) # plus 1 for shared bias #w = tf.reshape(weights[:, 0:512 * num_actions], [-1, 512, num_actions]) #b = tf.reshape(weights[:, 512 * num_actions:512 * num_actions + 1], [-1, 1]) with tf.variable_scope('moop'): w = tf.get_variable('w', shape=[512, num_actions], initializer=tf.contrib.layers.xavier_initializer()) b = tf.get_variable('b', shape=[1], initializer=tf.constant_initializer(0)) input = tf.image.convert_image_dtype(input, tf.float32) with tf.variable_scope('c1'): c1 = th.down_convolution(input, 8, 4, frame_history, 32, tf.nn.relu) with tf.variable_scope('c2'): c2 = th.down_convolution(c1, 4, 2, 32, 64, tf.nn.relu) with tf.variable_scope('c3'): c3 = th.down_convolution(c2, 3, 1, 64, 64, tf.nn.relu) N = np.prod([x.value for x in c3.get_shape()[1:]]) c3 = tf.reshape(c3, [-1, N]) with tf.variable_scope('fc1'): fc1 = th.fully_connected(c3, 512, tf.nn.relu) with tf.variable_scope('fc2'): #q_values = tf.reshape(tf.matmul(tf.reshape(fc1, [-1, 1, 512]), w), [-1, num_actions]) + b q_values = tf.matmul(fc1, w) + b return q_values
def shared_abs(inp, neurons): with tf.variable_scope('fc1'): fc1 = th.fully_connected(inp, neurons, tf.nn.relu) with tf.variable_scope('fc2'): A = th.fully_connected(fc1, neurons, tf.nn.relu) return A
def hook_l1(inp_abstracted, num_abstract_actions): with tf.variable_scope('fc1'): q_values = th.fully_connected(inp_abstracted, num_abstract_actions, lambda x: x) return q_values
def construct_q_network(self, input): #standard CNN layers - used 4 in the RN paper but building off of standard q-network input = tf.image.convert_image_dtype(input, tf.float32) with tf.variable_scope('c1'): c1 = th.down_convolution(input, 8, 4, self.frame_history, 32, tf.nn.relu) with tf.variable_scope('c2'): c2 = th.down_convolution(c1, 4, 2, 32, 64, tf.nn.relu) with tf.variable_scope('c3'): c3 = th.down_convolution(c2, 3, 1, 64, 64, tf.nn.relu) with tf.variable_scope('rn'): #all pairs of "objects" sent through mlp_g #get all combinations of indices x_dim, y_dim = c3.get_shape()[1].value, c3.get_shape()[2].value #get all depth columns, modify with location objects = [] for i in range(x_dim): for j in range(y_dim): depth_col = tf.slice(c3, [0, i, j, 0], [-1, 1, 1, -1]) # reshape to 2-D array of [num_batches, num_kernels] depth_col = tf.reshape( depth_col, [-1, depth_col.get_shape()[3].value]) # tag with location info #normalized dimension loc = tf.constant([[float(i) / x_dim, float(j) / y_dim]]) location_info = tf.tile(loc, [tf.shape(depth_col)[0], 1]) # I think this is correct axis? should be shape [num_batches, num_kernels + 2] depth_appended = tf.concat([depth_col, location_info], 1) objects.append(depth_appended) # initialize shared variables for g rn.init_g(objects[0].get_shape()[1] * 2) # initializing with doubled tensor dimension relations = [] for obj1, obj2 in it.combinations(objects, 2): # concatenate together # should be shape [num_batches, 2*(num_kernels + 2) object_pair = tf.concat([obj1, obj2], 1) g_rel = rn.mlp_g(object_pair) relations.append(g_rel) # shape [num_batches, 256] # sum results elementwise # NOTE: is this the best way to combine results, overall? stacked = tf.stack(relations, axis=2) # shape [num_batches, 256, x_dim*y_dim] sum_relations = tf.reduce_sum( stacked, axis=2) # should be shape [num_batches, 256] # put output through mlp_f # TODO: does dropout make sense here? should we have different dropout val for the target network? f = rn.mlp_f(sum_relations, 0.5) # final linear output layer with second dimension self.num_actions with tf.variable_scope('fc2'): if self.shared_bias: q_values = th.fully_connected_shared_bias( f, self.num_actions, lambda x: x) else: q_values = th.fully_connected(f, self.num_actions, lambda x: x) return q_values