Example #1
0
def hook_abstraction(vis, num_abstract_states, batch_size, I=None):
    with tf.variable_scope('fc1'):
        fc1 = th.fully_connected(vis, 512, tf.nn.sigmoid)
    with tf.variable_scope('fc2'):
        it_doesnt_matter = th.fully_connected(fc1, num_abstract_states,
                                              lambda x: x)
    return hardmax(it_doesnt_matter, batch_size, I=I)
Example #2
0
def hook_base(vis, num_actions):
    with tf.variable_scope('fc1'):
        fc1 = th.fully_connected(vis, 512, tf.nn.relu)
    with tf.variable_scope('fc2'):
        q_values = tf.reshape(
            th.fully_connected(fc1, num_actions, lambda x: x),
            [-1, num_actions])
    return q_values
def make_encoder_fc(x):
    with tf.variable_scope('fc1_mu'):
        fc1 = th.fully_connected(tf.reshape(x, [-1, 11*11]), 50, tf.nn.elu)
    with tf.variable_scope('enc_mu'):
        mu_z = th.fully_connected(fc1, 10, lambda x: x)

    with tf.variable_scope('fc1_sigma'):
        fc1 = th.fully_connected(tf.reshape(x, [-1, 11 * 11]), 50, tf.nn.elu)
    with tf.variable_scope('enc_sigma'):
        sigma_z = th.fully_connected(fc1, 10, tf.square)
    return mu_z, sigma_z
def make_decoder_fc(z):
    with tf.variable_scope('fc2_mu'):
        fc2 = th.fully_connected(z, 50, tf.nn.elu)
    with tf.variable_scope('dec_mu'):
        mu_x = th.fully_connected(fc2, 11*11, lambda x: x)

    with tf.variable_scope('fc2_sigma'):
        fc2 = th.fully_connected(z, 50, tf.nn.elu)
    with tf.variable_scope('dec_sigma'):
        sigma_x = th.fully_connected(fc2, 11*11, lambda x: x)
    return mu_x, sigma_x
Example #5
0
def make_embedding_network(state, embedding_size):
    shape = [x.value for x in state.get_shape()[1:]]
    state = tf.image.convert_image_dtype(tf.reshape(state, [-1] + shape + [1]),
                                         tf.float32)
    with tf.variable_scope('c1'):
        c1 = th.down_convolution(state, 5, 2, 1, 32, tf.nn.relu)
    with tf.variable_scope('c2'):
        c2 = th.down_convolution(c1, 5, 2, 32, 64, tf.nn.relu)
    with tf.variable_scope('c3'):
        c3 = th.down_convolution(c2, 5, 2, 64, 64, tf.nn.relu)
        N = np.prod([x.value for x in c3.get_shape()[1:]])
    with tf.variable_scope('fc1'):
        fc1 = th.fully_connected(tf.reshape(c3, [-1, N]), 512, tf.nn.relu)
    with tf.variable_scope('fc2'):
        E = th.fully_connected(fc1, embedding_size, lambda x: x)
    return E
def construct_heads_network(input, num_actions, num_abstract_states):
    num_heads = num_abstract_states * num_abstract_states
    with tf.variable_scope('fc1'):
        fc1 = th.fully_connected(input, 512, tf.nn.relu)
    with tf.variable_scope('fc2'):
        q_values = th.fully_connected_multi_shared_bias(fc1, num_actions, num_heads, lambda x: x)
        q_values = tf.reshape(q_values, [-1, num_heads, num_actions])
    return q_values
def construct_dqn_with_subgoal_embedding(input, abs_state1, abs_state2, frame_history, num_actions):
    input = tf.image.convert_image_dtype(input, tf.float32)
    with tf.variable_scope('a1'):
        a1 = th.fully_connected(legacy_concat(data=[abs_state1, abs_state2], dim=1), 50, tf.nn.relu)
    with tf.variable_scope('c1'):
        c1 = th.down_convolution(input, 8, 4, frame_history, 32, tf.nn.relu)
    with tf.variable_scope('c2'):
        c2 = th.down_convolution(c1, 4, 2, 32, 64, tf.nn.relu)
    with tf.variable_scope('c3'):
        c3 = th.down_convolution(c2, 3, 1, 64, 64, tf.nn.relu)
        N = np.prod([x.value for x in c3.get_shape()[1:]])
        c3 = tf.reshape(c3, [-1, N])
        ac3 = legacy_concat(dim=1, data=[a1, c3])
    with tf.variable_scope('fc1'):
        fc1 = th.fully_connected(ac3, 512, tf.nn.relu)
    with tf.variable_scope('fc2'):
        q_values = th.fully_connected_shared_bias(fc1, num_actions, lambda x: x)
    return q_values
def construct_embedding_network(abs_state1, abs_state2, hidden_size, embedding_size, weight_size):
    def shared_abs(inp, neurons):
        with tf.variable_scope('fc1'):
            fc1 = th.fully_connected(inp, neurons, tf.nn.relu)
        with tf.variable_scope('fc2'):
            A = th.fully_connected(fc1, neurons, tf.nn.relu)
        return A
    with tf.variable_scope('A'):
        A1 = shared_abs(abs_state1, hidden_size)
    with tf.variable_scope('A', reuse=True):
        A2 = shared_abs(abs_state2, hidden_size)
    with tf.variable_scope('pre_embedding'):
        pre_embedding = th.fully_connected(tf.concat(1, [A1, A2]), hidden_size*2, tf.nn.relu)
    with tf.variable_scope('embedding'):
        embedding = th.fully_connected(pre_embedding, embedding_size, lambda x: x)
    with tf.variable_scope('pre_weights'):
        pre_weights = th.fully_connected(embedding, embedding_size, tf.nn.relu)
    with tf.variable_scope('weights'):
        weights = th.fully_connected(pre_weights, weight_size, lambda x: x)
    return embedding, weights
def make_encoder(inp, encoding_size):
    with tf.variable_scope('c1'):
        c1 = th.down_convolution(inp, 5, 2, 1, 32, tf.nn.relu)
    with tf.variable_scope('c2'):
        c2 = th.down_convolution(c1, 5, 2, 32, 64, tf.nn.relu)
    with tf.variable_scope('c3'):
        c3 = th.down_convolution(c2, 5, 2, 64, 64, tf.nn.relu)
        N = np.prod([x.value for x in c3.get_shape()[1:]])
        c3 = tf.reshape(c3, [-1, N])
    with tf.variable_scope('mu_zGx'):
        with tf.variable_scope('fc1'):
            fc1 = th.fully_connected(c3, encoding_size, tf.nn.relu)
        with tf.variable_scope('fc2'):
            mu = th.fully_connected(fc1, encoding_size, lambda x:x)
    with tf.variable_scope('sigma_zGx'):
        with tf.variable_scope('fc1'):
            fc1 = th.fully_connected(c3, encoding_size, tf.nn.relu)
        with tf.variable_scope('fc2'):
            sigma = th.fully_connected(fc1, encoding_size, tf.nn.relu)
    return mu, sigma
def make_decoder(z):
    with tf.variable_scope('fc1'):
        fc1 = tf.reshape(th.fully_connected(z, 21*21*64, tf.nn.relu), [-1, 21, 21, 64])
    with tf.variable_scope('d1'):
        d1 = th.up_convolution(fc1, 5, 64, 32, tf.nn.relu)
    with tf.variable_scope('d2_mu'):
        mu_x = th.up_convolution(d1, 5, 32, 1, tf.nn.sigmoid)
    with tf.variable_scope('d2_sigma'):
        sigma_x = th.up_convolution(d1, 5, 32, 1, tf.nn.relu)

    return mu_x, sigma_x
Example #11
0
 def construct_q_network(self, input):
     input = tf.image.convert_image_dtype(input, tf.float32)
     with tf.variable_scope('c1'):
         c1 = th.down_convolution(input, 8, 4, self.frame_history, 32,
                                  tf.nn.relu)
     with tf.variable_scope('c2'):
         c2 = th.down_convolution(c1, 4, 2, 32, 64, tf.nn.relu)
     with tf.variable_scope('c3'):
         c3 = th.down_convolution(c2, 3, 1, 64, 64, tf.nn.relu)
         N = np.prod([x.value for x in c3.get_shape()[1:]])
         c3 = tf.reshape(c3, [-1, N])
     with tf.variable_scope('fc1'):
         fc1 = th.fully_connected(c3, 512, tf.nn.relu)
     with tf.variable_scope('fc2'):
         if self.shared_bias:
             q_values = th.fully_connected_shared_bias(
                 fc1, self.num_actions, lambda x: x)
         else:
             q_values = th.fully_connected(fc1, self.num_actions,
                                           lambda x: x)
     return q_values
def construct_dqn_with_embedding(input, abs_state1, abs_state2, frame_history, num_actions):
    embedding, weights = construct_embedding_network(abs_state1, abs_state2, 50, 50, 512*num_actions + 1) # plus 1 for shared bias
    w = tf.reshape(weights[:, 0:512*num_actions], [-1, 512, num_actions])
    b = tf.reshape(weights[:, 512*num_actions:512*num_actions+1], [-1, 1])
    input = tf.image.convert_image_dtype(input, tf.float32)
    with tf.variable_scope('c1'):
        c1 = th.down_convolution(input, 8, 4, frame_history, 32, tf.nn.relu)
    with tf.variable_scope('c2'):
        c2 = th.down_convolution(c1, 4, 2, 32, 64, tf.nn.relu)
    with tf.variable_scope('c3'):
        c3 = th.down_convolution(c2, 3, 1, 64, 64, tf.nn.relu)
        N = np.prod([x.value for x in c3.get_shape()[1:]])
        c3 = tf.reshape(c3, [-1, N])
    with tf.variable_scope('fc1'):
        fc1 = th.fully_connected(c3, 512, tf.nn.relu)
    with tf.variable_scope('fc2'):
        q_values = tf.reshape(tf.matmul(tf.reshape(fc1, [-1, 1, 512]), w), [-1, num_actions]) + b
    return q_values
def construct_q_network_weights_only_final(input, dqn_numbers, dqn_max_number,
                                           frame_history, num_actions):
    input = tf.image.convert_image_dtype(input, tf.float32)
    with tf.variable_scope('c1'):
        c1 = th.down_convolution(input, 8, 4, frame_history, 32, tf.nn.relu)
    with tf.variable_scope('c2'):
        c2 = th.down_convolution(c1, 4, 2, 32, 64, tf.nn.relu)
    with tf.variable_scope('c3'):
        c3 = th.down_convolution(c2, 3, 1, 64, 64, tf.nn.relu)
        N = np.prod([x.value for x in c3.get_shape()[1:]])
        # N = tf.reduce_prod(tf.shape(c3)[1:4])
        # N = []
        c3 = tf.reshape(c3, [-1, N])
    with tf.variable_scope('fc1'):
        fc1 = th.fully_connected(c3, 512, tf.nn.relu)
    with tf.variable_scope('fc2'):
        q_values = th.fully_connected_weights(fc1, dqn_numbers, dqn_max_number,
                                              num_actions, lambda x: x)
        # q_values_explore = th.fully_connected_weights_2(fc1, dqn_numbers_explore, dqn_max_number, num_actions, lambda x: x)
    return q_values, None
def construct_dqn_with_embedding_2_layer(input, abs_state1, abs_state2, frame_history, num_actions):
    #embedding, weights = construct_embedding_network(abs_state1, abs_state2, 200, 200,
    #                                                 512 * num_actions + 1)  # plus 1 for shared bias
    #w = tf.reshape(weights[:, 0:512 * num_actions], [-1, 512, num_actions])
    #b = tf.reshape(weights[:, 512 * num_actions:512 * num_actions + 1], [-1, 1])
    with tf.variable_scope('moop'):
        w = tf.get_variable('w', shape=[512, num_actions], initializer=tf.contrib.layers.xavier_initializer())
        b = tf.get_variable('b', shape=[1], initializer=tf.constant_initializer(0))
    input = tf.image.convert_image_dtype(input, tf.float32)
    with tf.variable_scope('c1'):
        c1 = th.down_convolution(input, 8, 4, frame_history, 32, tf.nn.relu)
    with tf.variable_scope('c2'):
        c2 = th.down_convolution(c1, 4, 2, 32, 64, tf.nn.relu)
    with tf.variable_scope('c3'):
        c3 = th.down_convolution(c2, 3, 1, 64, 64, tf.nn.relu)
        N = np.prod([x.value for x in c3.get_shape()[1:]])
        c3 = tf.reshape(c3, [-1, N])
    with tf.variable_scope('fc1'):
        fc1 = th.fully_connected(c3, 512, tf.nn.relu)
    with tf.variable_scope('fc2'):
        #q_values = tf.reshape(tf.matmul(tf.reshape(fc1, [-1, 1, 512]), w), [-1, num_actions]) + b
        q_values = tf.matmul(fc1, w) + b
    return q_values
 def shared_abs(inp, neurons):
     with tf.variable_scope('fc1'):
         fc1 = th.fully_connected(inp, neurons, tf.nn.relu)
     with tf.variable_scope('fc2'):
         A = th.fully_connected(fc1, neurons, tf.nn.relu)
     return A
Example #16
0
def hook_l1(inp_abstracted, num_abstract_actions):
    with tf.variable_scope('fc1'):
        q_values = th.fully_connected(inp_abstracted, num_abstract_actions,
                                      lambda x: x)
    return q_values
Example #17
0
    def construct_q_network(self, input):
        #standard CNN layers - used 4 in the RN paper but building off of standard q-network
        input = tf.image.convert_image_dtype(input, tf.float32)
        with tf.variable_scope('c1'):
            c1 = th.down_convolution(input, 8, 4, self.frame_history, 32,
                                     tf.nn.relu)
        with tf.variable_scope('c2'):
            c2 = th.down_convolution(c1, 4, 2, 32, 64, tf.nn.relu)
        with tf.variable_scope('c3'):
            c3 = th.down_convolution(c2, 3, 1, 64, 64, tf.nn.relu)
        with tf.variable_scope('rn'):
            #all pairs of "objects" sent through mlp_g
            #get all combinations of indices
            x_dim, y_dim = c3.get_shape()[1].value, c3.get_shape()[2].value
            #get all depth columns, modify with location
            objects = []
            for i in range(x_dim):
                for j in range(y_dim):
                    depth_col = tf.slice(c3, [0, i, j, 0], [-1, 1, 1, -1])
                    # reshape to 2-D array of [num_batches, num_kernels]
                    depth_col = tf.reshape(
                        depth_col, [-1, depth_col.get_shape()[3].value])
                    # tag with location info
                    #normalized dimension
                    loc = tf.constant([[float(i) / x_dim, float(j) / y_dim]])
                    location_info = tf.tile(loc, [tf.shape(depth_col)[0], 1])
                    # I think this is correct axis? should be shape [num_batches, num_kernels + 2]
                    depth_appended = tf.concat([depth_col, location_info], 1)
                    objects.append(depth_appended)

            # initialize shared variables for g
            rn.init_g(objects[0].get_shape()[1] *
                      2)  # initializing with doubled tensor dimension

            relations = []
            for obj1, obj2 in it.combinations(objects, 2):
                # concatenate together
                # should be shape [num_batches, 2*(num_kernels + 2)
                object_pair = tf.concat([obj1, obj2], 1)
                g_rel = rn.mlp_g(object_pair)
                relations.append(g_rel)  # shape [num_batches, 256]

            # sum results elementwise
            # NOTE: is this the best way to combine results, overall?
            stacked = tf.stack(relations,
                               axis=2)  # shape [num_batches, 256, x_dim*y_dim]
            sum_relations = tf.reduce_sum(
                stacked, axis=2)  # should be shape [num_batches, 256]

            # put output through mlp_f
            # TODO: does dropout make sense here? should we have different dropout val for the target network?
            f = rn.mlp_f(sum_relations, 0.5)

            # final linear output layer with second dimension self.num_actions
        with tf.variable_scope('fc2'):
            if self.shared_bias:
                q_values = th.fully_connected_shared_bias(
                    f, self.num_actions, lambda x: x)
            else:
                q_values = th.fully_connected(f, self.num_actions, lambda x: x)
        return q_values