def construct_root_network(input, frame_history): input = tf.image.convert_image_dtype(input, tf.float32) with tf.variable_scope('c1'): c1 = th.down_convolution(input, 8, 4, frame_history, 32, tf.nn.relu) with tf.variable_scope('c2'): c2 = th.down_convolution(c1, 4, 2, 32, 64, tf.nn.relu) with tf.variable_scope('c3'): c3 = th.down_convolution(c2, 3, 1, 64, 64, tf.nn.relu) N = np.prod([x.value for x in c3.get_shape()[1:]]) c3 = tf.reshape(c3, [-1, N]) return c3
def hook_visual(inp, frame_history): inp = tf.image.convert_image_dtype(inp, tf.float32) with tf.variable_scope('c1'): c1 = th.down_convolution(inp, 8, 4, frame_history, 32, tf.nn.relu) with tf.variable_scope('c2'): c2 = th.down_convolution(c1, 4, 2, 32, 64, tf.nn.relu) with tf.variable_scope('c3'): c3 = th.down_convolution(c2, 3, 1, 64, 64, tf.nn.relu) N = np.prod([x.value for x in c3.get_shape()[1:]]) c3 = tf.reshape(c3, [-1, N]) # with tf.variable_scope('fc1'): # vis = th.fully_connected(c3, 512, tf.nn.relu) return c3
def make_embedding_network(state, embedding_size): shape = [x.value for x in state.get_shape()[1:]] state = tf.image.convert_image_dtype(tf.reshape(state, [-1] + shape + [1]), tf.float32) with tf.variable_scope('c1'): c1 = th.down_convolution(state, 5, 2, 1, 32, tf.nn.relu) with tf.variable_scope('c2'): c2 = th.down_convolution(c1, 5, 2, 32, 64, tf.nn.relu) with tf.variable_scope('c3'): c3 = th.down_convolution(c2, 5, 2, 64, 64, tf.nn.relu) N = np.prod([x.value for x in c3.get_shape()[1:]]) with tf.variable_scope('fc1'): fc1 = th.fully_connected(tf.reshape(c3, [-1, N]), 512, tf.nn.relu) with tf.variable_scope('fc2'): E = th.fully_connected(fc1, embedding_size, lambda x: x) return E
def construct_dqn_with_embedding(input, abs_state1, abs_state2, frame_history, num_actions): embedding, weights = construct_embedding_network(abs_state1, abs_state2, 50, 50, 512*num_actions + 1) # plus 1 for shared bias w = tf.reshape(weights[:, 0:512*num_actions], [-1, 512, num_actions]) b = tf.reshape(weights[:, 512*num_actions:512*num_actions+1], [-1, 1]) input = tf.image.convert_image_dtype(input, tf.float32) with tf.variable_scope('c1'): c1 = th.down_convolution(input, 8, 4, frame_history, 32, tf.nn.relu) with tf.variable_scope('c2'): c2 = th.down_convolution(c1, 4, 2, 32, 64, tf.nn.relu) with tf.variable_scope('c3'): c3 = th.down_convolution(c2, 3, 1, 64, 64, tf.nn.relu) N = np.prod([x.value for x in c3.get_shape()[1:]]) c3 = tf.reshape(c3, [-1, N]) with tf.variable_scope('fc1'): fc1 = th.fully_connected(c3, 512, tf.nn.relu) with tf.variable_scope('fc2'): q_values = tf.reshape(tf.matmul(tf.reshape(fc1, [-1, 1, 512]), w), [-1, num_actions]) + b return q_values
def construct_dqn_with_subgoal_embedding(input, abs_state1, abs_state2, frame_history, num_actions): input = tf.image.convert_image_dtype(input, tf.float32) with tf.variable_scope('a1'): a1 = th.fully_connected(legacy_concat(data=[abs_state1, abs_state2], dim=1), 50, tf.nn.relu) with tf.variable_scope('c1'): c1 = th.down_convolution(input, 8, 4, frame_history, 32, tf.nn.relu) with tf.variable_scope('c2'): c2 = th.down_convolution(c1, 4, 2, 32, 64, tf.nn.relu) with tf.variable_scope('c3'): c3 = th.down_convolution(c2, 3, 1, 64, 64, tf.nn.relu) N = np.prod([x.value for x in c3.get_shape()[1:]]) c3 = tf.reshape(c3, [-1, N]) ac3 = legacy_concat(dim=1, data=[a1, c3]) with tf.variable_scope('fc1'): fc1 = th.fully_connected(ac3, 512, tf.nn.relu) with tf.variable_scope('fc2'): q_values = th.fully_connected_shared_bias(fc1, num_actions, lambda x: x) return q_values
def make_encoder(inp, encoding_size): with tf.variable_scope('c1'): c1 = th.down_convolution(inp, 5, 2, 1, 32, tf.nn.relu) with tf.variable_scope('c2'): c2 = th.down_convolution(c1, 5, 2, 32, 64, tf.nn.relu) with tf.variable_scope('c3'): c3 = th.down_convolution(c2, 5, 2, 64, 64, tf.nn.relu) N = np.prod([x.value for x in c3.get_shape()[1:]]) c3 = tf.reshape(c3, [-1, N]) with tf.variable_scope('mu_zGx'): with tf.variable_scope('fc1'): fc1 = th.fully_connected(c3, encoding_size, tf.nn.relu) with tf.variable_scope('fc2'): mu = th.fully_connected(fc1, encoding_size, lambda x:x) with tf.variable_scope('sigma_zGx'): with tf.variable_scope('fc1'): fc1 = th.fully_connected(c3, encoding_size, tf.nn.relu) with tf.variable_scope('fc2'): sigma = th.fully_connected(fc1, encoding_size, tf.nn.relu) return mu, sigma
def construct_q_network_weights_only_final(input, dqn_numbers, dqn_max_number, frame_history, num_actions): input = tf.image.convert_image_dtype(input, tf.float32) with tf.variable_scope('c1'): c1 = th.down_convolution(input, 8, 4, frame_history, 32, tf.nn.relu) with tf.variable_scope('c2'): c2 = th.down_convolution(c1, 4, 2, 32, 64, tf.nn.relu) with tf.variable_scope('c3'): c3 = th.down_convolution(c2, 3, 1, 64, 64, tf.nn.relu) N = np.prod([x.value for x in c3.get_shape()[1:]]) # N = tf.reduce_prod(tf.shape(c3)[1:4]) # N = [] c3 = tf.reshape(c3, [-1, N]) with tf.variable_scope('fc1'): fc1 = th.fully_connected(c3, 512, tf.nn.relu) with tf.variable_scope('fc2'): q_values = th.fully_connected_weights(fc1, dqn_numbers, dqn_max_number, num_actions, lambda x: x) # q_values_explore = th.fully_connected_weights_2(fc1, dqn_numbers_explore, dqn_max_number, num_actions, lambda x: x) return q_values, None
def construct_q_network(self, input): input = tf.image.convert_image_dtype(input, tf.float32) with tf.variable_scope('c1'): c1 = th.down_convolution(input, 8, 4, self.frame_history, 32, tf.nn.relu) with tf.variable_scope('c2'): c2 = th.down_convolution(c1, 4, 2, 32, 64, tf.nn.relu) with tf.variable_scope('c3'): c3 = th.down_convolution(c2, 3, 1, 64, 64, tf.nn.relu) N = np.prod([x.value for x in c3.get_shape()[1:]]) c3 = tf.reshape(c3, [-1, N]) with tf.variable_scope('fc1'): fc1 = th.fully_connected(c3, 512, tf.nn.relu) with tf.variable_scope('fc2'): if self.shared_bias: q_values = th.fully_connected_shared_bias( fc1, self.num_actions, lambda x: x) else: q_values = th.fully_connected(fc1, self.num_actions, lambda x: x) return q_values
def construct_dqn_with_embedding_2_layer(input, abs_state1, abs_state2, frame_history, num_actions): #embedding, weights = construct_embedding_network(abs_state1, abs_state2, 200, 200, # 512 * num_actions + 1) # plus 1 for shared bias #w = tf.reshape(weights[:, 0:512 * num_actions], [-1, 512, num_actions]) #b = tf.reshape(weights[:, 512 * num_actions:512 * num_actions + 1], [-1, 1]) with tf.variable_scope('moop'): w = tf.get_variable('w', shape=[512, num_actions], initializer=tf.contrib.layers.xavier_initializer()) b = tf.get_variable('b', shape=[1], initializer=tf.constant_initializer(0)) input = tf.image.convert_image_dtype(input, tf.float32) with tf.variable_scope('c1'): c1 = th.down_convolution(input, 8, 4, frame_history, 32, tf.nn.relu) with tf.variable_scope('c2'): c2 = th.down_convolution(c1, 4, 2, 32, 64, tf.nn.relu) with tf.variable_scope('c3'): c3 = th.down_convolution(c2, 3, 1, 64, 64, tf.nn.relu) N = np.prod([x.value for x in c3.get_shape()[1:]]) c3 = tf.reshape(c3, [-1, N]) with tf.variable_scope('fc1'): fc1 = th.fully_connected(c3, 512, tf.nn.relu) with tf.variable_scope('fc2'): #q_values = tf.reshape(tf.matmul(tf.reshape(fc1, [-1, 1, 512]), w), [-1, num_actions]) + b q_values = tf.matmul(fc1, w) + b return q_values
def construct_q_network(self, input): #standard CNN layers - used 4 in the RN paper but building off of standard q-network input = tf.image.convert_image_dtype(input, tf.float32) with tf.variable_scope('c1'): c1 = th.down_convolution(input, 8, 4, self.frame_history, 32, tf.nn.relu) with tf.variable_scope('c2'): c2 = th.down_convolution(c1, 4, 2, 32, 64, tf.nn.relu) with tf.variable_scope('c3'): c3 = th.down_convolution(c2, 3, 1, 64, 64, tf.nn.relu) with tf.variable_scope('rn'): #all pairs of "objects" sent through mlp_g #get all combinations of indices x_dim, y_dim = c3.get_shape()[1].value, c3.get_shape()[2].value #get all depth columns, modify with location objects = [] for i in range(x_dim): for j in range(y_dim): depth_col = tf.slice(c3, [0, i, j, 0], [-1, 1, 1, -1]) # reshape to 2-D array of [num_batches, num_kernels] depth_col = tf.reshape( depth_col, [-1, depth_col.get_shape()[3].value]) # tag with location info #normalized dimension loc = tf.constant([[float(i) / x_dim, float(j) / y_dim]]) location_info = tf.tile(loc, [tf.shape(depth_col)[0], 1]) # I think this is correct axis? should be shape [num_batches, num_kernels + 2] depth_appended = tf.concat([depth_col, location_info], 1) objects.append(depth_appended) # initialize shared variables for g rn.init_g(objects[0].get_shape()[1] * 2) # initializing with doubled tensor dimension relations = [] for obj1, obj2 in it.combinations(objects, 2): # concatenate together # should be shape [num_batches, 2*(num_kernels + 2) object_pair = tf.concat([obj1, obj2], 1) g_rel = rn.mlp_g(object_pair) relations.append(g_rel) # shape [num_batches, 256] # sum results elementwise # NOTE: is this the best way to combine results, overall? stacked = tf.stack(relations, axis=2) # shape [num_batches, 256, x_dim*y_dim] sum_relations = tf.reduce_sum( stacked, axis=2) # should be shape [num_batches, 256] # put output through mlp_f # TODO: does dropout make sense here? should we have different dropout val for the target network? f = rn.mlp_f(sum_relations, 0.5) # final linear output layer with second dimension self.num_actions with tf.variable_scope('fc2'): if self.shared_bias: q_values = th.fully_connected_shared_bias( f, self.num_actions, lambda x: x) else: q_values = th.fully_connected(f, self.num_actions, lambda x: x) return q_values