Exemple #1
0
    def Planner(self, training_input, testing_input, label_status, length,
                mask):
        with tf.variable_scope('planner'):
            batch_size = self.batch_size / self.gpu_num

            rnn_cell = model_utils._lstm_cell(self.n_hidden, self.n_layers)

            w_status = tf.get_variable(
                'w_status', [self.n_hidden, 2],
                initializer=tf.contrib.layers.xavier_initializer())
            b_status = tf.get_variable(
                'b_status', [2],
                initializer=tf.contrib.layers.xavier_initializer())

            # training
            training_input_dropout = tf.nn.dropout(training_input,
                                                   self.keep_prob)  # b*l, h
            shape = training_input_dropout.get_shape().as_list()
            training_input_reshape = tf.reshape(
                training_input_dropout,
                [batch_size, self.max_step, shape[1]])  # b, l, h
            rnn_output, _ = tf.nn.dynamic_rnn(rnn_cell,
                                              training_input_reshape,
                                              sequence_length=length,
                                              dtype=tf.float32)  # b, l, h
            rnn_output_dropout = tf.nn.dropout(rnn_output, self.keep_prob)
            rnn_output_reshape = tf.reshape(rnn_output_dropout,
                                            [-1, self.n_hidden])  # b*l, h
            logits = tf.reshape(tf.matmul(rnn_output_reshape, w_status),
                                [-1, 2]) + b_status  # b*l, n

            label_status_reshape = tf.reshape(label_status, [-1])
            loss_status = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=label_status_reshape, logits=logits)

            loss_status_scalar = tf.reduce_sum(loss_status * mask)

            # testing
            prev_state = []
            for l in xrange(self.n_layers):
                prev_state.append(
                    LSTMStateTuple(
                        tf.placeholder(tf.float32,
                                       shape=[None, self.n_hidden],
                                       name='initial_state{0}.c'.format(l)),
                        tf.placeholder(tf.float32,
                                       shape=[None, self.n_hidden],
                                       name='initial_state{0}.h'.format(l))))
            if self.n_layers == 1:
                prev_state = prev_state[0]

            rnn_output_test, state = rnn_cell(testing_input,
                                              prev_state)  # b*l, h
            prob = tf.reshape(
                tf.nn.softmax(tf.matmul(rnn_output_test, w_status) + b_status),
                [-1, 2])
            # pred_status_test = tf.argmax(prob, axis=1)
            return loss_status_scalar, prob, state, prev_state
Exemple #2
0
    def Model(self, inputs):
        input_depth, input_cmd, input_prev_a, rnn_h_in = inputs
        # encode depth image
        conv1 = model_utils.conv2d(input_depth,
                                   4,
                                   5,
                                   4,
                                   scope='conv1',
                                   max_pool=False)
        conv2 = model_utils.conv2d(conv1,
                                   16,
                                   5,
                                   4,
                                   scope='conv2',
                                   max_pool=False)
        conv3 = model_utils.conv2d(conv2,
                                   32,
                                   3,
                                   2,
                                   scope='conv3',
                                   max_pool=False)
        shape = conv3.get_shape().as_list()
        depth_vect = tf.reshape(conv3,
                                shape=[-1,
                                       shape[1] * shape[2] * shape[3]])  # b,d
        # encode cmd
        embedding_cmd = tf.get_variable('cmd_embedding',
                                        [self.n_cmd_type, self.dim_emb])
        cmd_vect = tf.reshape(tf.nn.embedding_lookup(embedding_cmd, input_cmd),
                              [-1, self.dim_emb])
        # encode prev action
        embedding_w_action = tf.get_variable('embedding_w_action',
                                             [self.dim_action, self.dim_emb])
        embedding_b_action = tf.get_variable('embedding_b_action',
                                             [self.dim_emb])
        prev_a_vect = tf.matmul(input_prev_a,
                                embedding_w_action) + embedding_b_action

        input_vect = tf.concat([depth_vect, cmd_vect, prev_a_vect], axis=1)

        # rnn
        if self.rnn_type == 'lstm':
            rnn_cell = model_utils._lstm_cell(self.n_hidden,
                                              1,
                                              name='rnn_cell')
        else:
            rnn_cell = model_utils._gru_cell(self.n_hidden, 1, name='rnn_cell')
        rnn_output, rnn_h_out = rnn_cell(input_vect, self.rnn_h_in)
        # action
        a_linear = model_utils.dense_layer(
            rnn_output, 1, 'a_linear',
            activation=tf.nn.sigmoid) * self.action_range[0]
        a_angular = model_utils.dense_layer(
            rnn_output, 1, 'a_angular',
            activation=tf.nn.tanh) * self.action_range[1]
        pred_action = tf.concat([a_linear, a_angular], axis=1)

        return pred_action, rnn_h_out
Exemple #3
0
    def Model(self, inputs):
        laser, cmd, cmd_next, cmd_skip, prev_action, obj_goal, prev_state_2 = inputs
        with tf.variable_scope('encoder'):
            embedding_w_goal = tf.get_variable('embedding_w_goal',
                                               [self.dim_action, self.dim_emb])
            embedding_b_goal = tf.get_variable('embedding_b_goal',
                                               [self.dim_emb])
            embedding_status = tf.get_variable(
                'embedding_status', [self.n_cmd_type**2, self.dim_emb])
            embedding_w_action = tf.get_variable(
                'embedding_w_action', [self.dim_action, self.dim_emb])
            embedding_b_action = tf.get_variable('embedding_b_action',
                                                 [self.dim_emb])
            embedding_w_status = tf.get_variable('embedding_w_status',
                                                 [self.dim_cmd, self.dim_emb])
            embedding_b_status = tf.get_variable('embedding_b_status',
                                                 [self.dim_emb])

            # training input
            conv1 = model_utils.Conv1D(laser, 2, 5, 4, scope='conv1')
            conv2 = model_utils.Conv1D(conv1, 4, 5, 4, scope='conv2')
            conv3 = model_utils.Conv1D(conv2, 8, 5, 4, scope='conv3')
            shape = conv3.get_shape().as_list()
            vector_laser = tf.reshape(conv3, (-1, shape[1] * shape[2]))

            curr_status = cmd * self.n_cmd_type + cmd_next
            next_status = cmd_next * self.n_cmd_type + cmd_skip
            vector_curr_status = tf.reshape(
                tf.nn.embedding_lookup(embedding_status, curr_status),
                (-1, self.dim_emb))

            vector_prev_action = tf.matmul(
                prev_action, embedding_w_action) + embedding_b_action

            vector_obj_goal = tf.matmul(obj_goal,
                                        embedding_w_goal) + embedding_b_goal

            input_vector = tf.concat([
                vector_laser, vector_curr_status, vector_prev_action,
                vector_obj_goal
            ],
                                     axis=1)

        with tf.variable_scope('controller'):
            shape = input_vector.get_shape().as_list()
            w_hidden = tf.get_variable(
                'w_hidden', [shape[1], self.n_hidden],
                initializer=tf.contrib.layers.xavier_initializer())
            b_hidden = tf.get_variable(
                'b_hidden', [self.n_hidden],
                initializer=tf.contrib.layers.xavier_initializer())

            w_action_linear = tf.get_variable(
                'w_action_linear', [self.n_hidden, self.dim_action / 2],
                initializer=tf.contrib.layers.xavier_initializer())
            b_action_linear = tf.get_variable(
                'b_action_linear', [self.dim_action / 2],
                initializer=tf.contrib.layers.xavier_initializer())
            w_action_angular = tf.get_variable(
                'w_action_angular', [self.n_hidden, self.dim_action / 2],
                initializer=tf.contrib.layers.xavier_initializer())
            b_action_angular = tf.get_variable(
                'b_action_angular', [self.dim_action / 2],
                initializer=tf.contrib.layers.xavier_initializer())

            hidden = tf.nn.leaky_relu(
                tf.matmul(input_vector, w_hidden) + b_hidden)
            a_linear = tf.nn.sigmoid(
                tf.matmul(hidden, w_action_linear) +
                b_action_linear) * self.action_range[0]
            a_angular = tf.nn.tanh(
                tf.matmul(hidden, w_action_angular) +
                b_action_angular) * self.action_range[1]
            pred_action = tf.concat([a_linear, a_angular], axis=1)

        with tf.variable_scope('planner'):
            rnn_cell_2 = model_utils._lstm_cell(self.n_hidden,
                                                self.n_layers,
                                                name='rnn/basic_lstm_cell')

            w_status_matrix = tf.get_variable(
                'w_status_matrix', [self.n_cmd_type**2, self.n_hidden],
                initializer=tf.contrib.layers.xavier_initializer())
            b_status_matrix = tf.get_variable(
                'b_status_matrix', [self.n_cmd_type**2],
                initializer=tf.contrib.layers.xavier_initializer())
            status_curr = tf.reshape(cmd * self.n_cmd_type + cmd_next,
                                     [-1])  # b*l, 1 -> (1)
            status_next = tf.reshape(cmd_next * self.n_cmd_type + cmd_skip,
                                     [-1])
            w_status_curr = tf.reshape(tf.gather(w_status_matrix, status_curr),
                                       [-1, self.n_hidden, 1])  # b, h, 1
            w_status_next = tf.reshape(tf.gather(w_status_matrix, status_next),
                                       [-1, self.n_hidden, 1])
            b_status_curr = tf.reshape(tf.gather(b_status_matrix, status_curr),
                                       [-1, 1])  # b, 1
            b_status_next = tf.reshape(tf.gather(b_status_matrix, status_next),
                                       [-1, 1])
            w_status = tf.concat([w_status_curr, w_status_next],
                                 axis=2)  # b, h, 2
            b_status = tf.concat([b_status_curr, b_status_next],
                                 axis=1)  # b, 2

            rnn_output_2, state_2 = rnn_cell_2(input_vector, prev_state_2)
            rnn_output_expand = tf.expand_dims(rnn_output_2, 1)  # b, h, 1
            logits = tf.reshape(tf.matmul(rnn_output_expand, w_status),
                                [-1, 2]) + b_status

        return pred_action, logits, state_2
Exemple #4
0
    def ControllerLSTM(self, training_input, testing_input, label_action,
                       length, mask):
        with tf.variable_scope('controller'):
            batch_size = self.batch_size / self.gpu_num

            rnn_cell = model_utils._lstm_cell(self.n_hidden, self.n_layers)

            w_action_linear = tf.get_variable(
                'w_action_linear', [self.n_hidden, self.dim_action / 2],
                initializer=tf.contrib.layers.xavier_initializer())
            b_action_linear = tf.get_variable(
                'b_action_linear', [self.dim_action / 2],
                initializer=tf.contrib.layers.xavier_initializer())
            w_action_angular = tf.get_variable(
                'w_action_angular', [self.n_hidden, self.dim_action / 2],
                initializer=tf.contrib.layers.xavier_initializer())
            b_action_angular = tf.get_variable(
                'b_action_angular', [self.dim_action / 2],
                initializer=tf.contrib.layers.xavier_initializer())

            training_input_dropout = tf.nn.dropout(training_input,
                                                   self.keep_prob)  # b*l, h
            shape = training_input_dropout.get_shape().as_list()
            training_input_reshape = tf.reshape(
                training_input_dropout,
                [batch_size, self.max_step, shape[1]])  # b, l, h
            rnn_output, _ = tf.nn.dynamic_rnn(rnn_cell,
                                              training_input_reshape,
                                              sequence_length=length,
                                              dtype=tf.float32)  # b, l, h
            rnn_output_dropout = tf.nn.dropout(rnn_output, self.keep_prob)
            rnn_output_reshape = tf.reshape(rnn_output_dropout,
                                            [-1, self.n_hidden])  # b*l, h

            a_linear = tf.nn.sigmoid(
                tf.matmul(rnn_output_reshape, w_action_linear) +
                b_action_linear) * self.action_range[0]
            a_angular = tf.nn.tanh(
                tf.matmul(rnn_output_reshape, w_action_angular) +
                b_action_angular) * self.action_range[1]
            pred_action = tf.concat([a_linear, a_angular], axis=1)

            # calculate the mean and variance of the masked error
            mask_reshape = tf.reshape(
                mask, [batch_size * self.max_step, 1])  # b*l, 1
            mask_tile = tf.tile(mask_reshape, [1, 2])  # b*l, 2
            masked_error = (pred_action - label_action) * mask_tile  # b*l, 2
            mean = tf.reduce_sum(masked_error, axis=0) / tf.cast(
                tf.reduce_sum(length), tf.float32)  # 2
            mean_expand = tf.expand_dims(mean, axis=0)  # 1, 2
            mean_tile = tf.tile(mean_expand,
                                [batch_size * self.max_step, 1])  # b*l, 2
            variance = tf.square(
                tf.reduce_sum(
                    (masked_error - mean_tile) * mask_tile, axis=0)) / tf.cast(
                        tf.reduce_sum(length), tf.float32)

            loss_action = tf.losses.mean_squared_error(
                labels=label_action,
                predictions=pred_action,
                reduction=tf.losses.Reduction.NONE)
            loss_action = tf.reduce_sum(loss_action, axis=1)
            loss_action_scalar = tf.reduce_sum(loss_action * mask)

            # testing
            prev_state = []
            for l in xrange(self.n_layers):
                prev_state.append(
                    LSTMStateTuple(
                        tf.placeholder(tf.float32,
                                       shape=[None, self.n_hidden],
                                       name='initial_state{0}.c'.format(l)),
                        tf.placeholder(tf.float32,
                                       shape=[None, self.n_hidden],
                                       name='initial_state{0}.h'.format(l))))
            if self.n_layers == 1:
                prev_state = prev_state[0]

            rnn_output_test, state = rnn_cell(testing_input,
                                              prev_state)  # b*l, h
            a_linear_test = tf.nn.sigmoid(
                tf.matmul(rnn_output_test, w_action_linear) +
                b_action_linear) * self.action_range[0]
            a_angular_test = tf.nn.tanh(
                tf.matmul(rnn_output_test, w_action_angular) +
                b_action_angular) * self.action_range[1]
            pred_action_test = tf.concat([a_linear_test, a_angular_test],
                                         axis=1)

            return loss_action_scalar, pred_action_test, state, prev_state, mean, variance
Exemple #5
0
    def Model(self, inputs):
        laser, cmd, cmd_next, prev_action, obj_goal, action = inputs
        with tf.variable_scope('encoder'):

            embedding_w_goal = tf.get_variable('embedding_w_goal',
                                               [self.dim_action, self.dim_emb])
            embedding_b_goal = tf.get_variable('embedding_b_goal',
                                               [self.dim_emb])
            embedding_status = tf.get_variable(
                'embedding_status', [self.n_cmd_type**2, self.dim_emb])
            embedding_w_action = tf.get_variable(
                'embedding_w_action', [self.dim_action, self.dim_emb])
            embedding_b_action = tf.get_variable('embedding_b_action',
                                                 [self.dim_emb])
            embedding_w_status = tf.get_variable('embedding_w_status',
                                                 [self.dim_cmd, self.dim_emb])
            embedding_b_status = tf.get_variable('embedding_b_status',
                                                 [self.dim_emb])

            conv1 = model_utils.Conv1D(self.input_laser,
                                       2,
                                       5,
                                       4,
                                       scope='conv1')
            conv2 = model_utils.Conv1D(conv1, 4, 5, 4, scope='conv2')
            conv3 = model_utils.Conv1D(conv2, 8, 5, 4, scope='conv3')
            shape = conv3.get_shape().as_list()
            vector_laser = tf.reshape(conv3, (-1, shape[1] * shape[2]))
            curr_status = cmd * self.n_cmd_type + cmd_next
            vector_curr_status = tf.reshape(
                tf.nn.embedding_lookup(embedding_status, curr_status),
                (-1, self.dim_emb))
            vector_prev_action = tf.matmul(
                prev_action, embedding_w_action) + embedding_b_action
            vector_obj_goal = tf.matmul(obj_goal,
                                        embedding_w_goal) + embedding_b_goal
            vector_action = tf.matmul(action,
                                      embedding_w_action) + embedding_b_action

            input_vector = tf.concat([
                vector_laser, vector_curr_status, vector_prev_action,
                vector_obj_goal, vector_action
            ],
                                     axis=1)

        with tf.variable_scope('q'):
            rnn_cell = model_utils._lstm_cell(self.n_hidden,
                                              self.n_layers,
                                              name='rnn/basic_lstm_cell')
            w_q = tf.get_variable('w_q', [self.n_hidden, 1],
                                  initializer=tf.initializers.random_uniform(
                                      -0.003, 0.003))
            b_q = tf.get_variable('b_q', [1],
                                  initializer=tf.initializers.random_uniform(
                                      -0.003, 0.003))

            shape = input_vector.get_shape().as_list()
            input_vector_reshape = tf.reshape(
                input_vector, [self.batch_size, self.max_step, shape[1]])

            rnn_output, _ = tf.nn.dynamic_rnn(rnn_cell,
                                              input_vector_reshape,
                                              sequence_length=self.length,
                                              dtype=tf.float32)  # b, l, h
            rnn_output_reshape = tf.reshape(rnn_output,
                                            [-1, self.n_hidden])  # b*l, h
            q = tf.matmul(rnn_output_reshape, w_q) + b_q

        return q
Exemple #6
0
    def Planner(self, training_input, testing_input, input_cmd, input_cmd_next,
                input_cmd_skip, label_status, length, mask):
        with tf.variable_scope('planner'):
            batch_size = self.batch_size / self.gpu_num

            rnn_cell = model_utils._lstm_cell(self.n_hidden, self.n_layers)

            w_status_matrix = tf.get_variable(
                'w_status_matrix', [self.n_cmd_type**2, self.n_hidden],
                initializer=tf.contrib.layers.xavier_initializer())
            b_status_matrix = tf.get_variable(
                'b_status_matrix', [self.n_cmd_type**2],
                initializer=tf.contrib.layers.xavier_initializer())
            status_curr = tf.reshape(input_cmd * self.n_cmd_type +
                                     input_cmd_next, [-1])  # b*l, 1 -> (1)
            status_next = tf.reshape(
                input_cmd_next * self.n_cmd_type + input_cmd_skip, [-1])
            w_status_curr = tf.reshape(tf.gather(w_status_matrix, status_curr),
                                       [-1, self.n_hidden, 1])  # b*l, h, 1
            w_status_next = tf.reshape(tf.gather(w_status_matrix, status_next),
                                       [-1, self.n_hidden, 1])
            b_status_curr = tf.reshape(tf.gather(b_status_matrix, status_curr),
                                       [-1, 1])  # b*l, 1
            b_status_next = tf.reshape(tf.gather(b_status_matrix, status_next),
                                       [-1, 1])
            w_status = tf.concat([w_status_curr, w_status_next],
                                 axis=2)  # b*l, h, 2
            b_status = tf.concat([b_status_curr, b_status_next],
                                 axis=1)  # b*l, 2

            # training
            training_input_dropout = tf.nn.dropout(training_input,
                                                   self.keep_prob)  # b*l, h
            shape = training_input_dropout.get_shape().as_list()
            training_input_reshape = tf.reshape(
                training_input_dropout,
                [batch_size, self.max_step, shape[1]])  # b, l, h
            rnn_output, _ = tf.nn.dynamic_rnn(rnn_cell,
                                              training_input_reshape,
                                              sequence_length=length,
                                              dtype=tf.float32)  # b, l, h
            rnn_output_dropout = tf.nn.dropout(rnn_output, self.keep_prob)
            rnn_output_reshape = tf.reshape(rnn_output_dropout,
                                            [-1, self.n_hidden])  # b*l, h
            rnn_output_expand = tf.expand_dims(rnn_output_reshape,
                                               1)  # b*l, 1, h

            # 1. dot product distance
            logits = tf.reshape(tf.matmul(rnn_output_expand, w_status),
                                [-1, 2]) + b_status  # b*l, 2
            self.training_logits = logits
            self.training_pred = tf.argmax(logits, axis=1)
            # # 2. eucl distance
            # w_status_curr_reshape = tf.reshape(w_status_curr, [-1, self.n_hidden]) # b*l, h
            # w_status_next_reshape = tf.reshape(w_status_next, [-1, self.n_hidden]) # b*l, h
            # squared_dist_curr = tf.reduce_sum(tf.square(rnn_output_reshape - w_status_curr_reshape), axis=1, keepdims=True) # b*l
            # squared_dist_next = tf.reduce_sum(tf.square(rnn_output_reshape - w_status_next_reshape), axis=1, keepdims=True) # b*l
            # logits = tf.concat([squared_dist_curr, squared_dist_next], axis=1)

            # # 3. binary prediction
            # w_binary = tf.get_variable('w_status', [self.n_hidden, 2], initializer=tf.contrib.layers.xavier_initializer())
            # b_binary = tf.get_variable('b_status', [2], initializer=tf.contrib.layers.xavier_initializer())
            # logits = tf.matmul(rnn_output_reshape, w_binary) + b_binary

            # # 4. n^2 precition
            # w_square = tf.get_variable('w_square', [self.n_hidden, self.n_cmd_type**2 * 2], initializer=tf.contrib.layers.xavier_initializer())
            # b_square = tf.get_variable('b_square', [self.n_cmd_type**2 * 2], initializer=tf.contrib.layers.xavier_initializer())
            # logits = tf.matmul(rnn_output_reshape, w_square) + b_square
            # label_status_reshape = tf.reshape(label_status, [-1])
            # label_status = (1 - label_status_reshape) * status_curr + label_status_reshape * status_next

            label_status_reshape = tf.reshape(label_status, [-1])
            loss_status = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=label_status_reshape, logits=logits)

            loss_status_scalar = tf.reduce_sum(loss_status * mask)

            # testing
            prev_state = []
            for l in xrange(self.n_layers):
                prev_state.append(
                    LSTMStateTuple(
                        tf.placeholder(tf.float32,
                                       shape=[None, self.n_hidden],
                                       name='initial_state{0}.c'.format(l)),
                        tf.placeholder(tf.float32,
                                       shape=[None, self.n_hidden],
                                       name='initial_state{0}.h'.format(l))))
            if self.n_layers == 1:
                prev_state = prev_state[0]

            rnn_output_test, state = rnn_cell(testing_input,
                                              prev_state)  # b, h
            rnn_output_test_expand = tf.expand_dims(rnn_output_test,
                                                    1)  # b, 1, h

            w_status_curr = tf.reshape(
                tf.gather(w_status_matrix, self.test_status),
                [-1, self.n_hidden, 1])  # b, h, 1
            w_status_next = tf.reshape(
                tf.gather(w_status_matrix, self.test_status_next),
                [-1, self.n_hidden, 1])
            b_status_curr = tf.reshape(
                tf.gather(b_status_matrix, self.test_status), [-1, 1])  # b, 1
            b_status_next = tf.reshape(
                tf.gather(b_status_matrix, self.test_status_next), [-1, 1])
            w_status = tf.concat([w_status_curr, w_status_next],
                                 axis=2)  # b, h, 2
            b_status = tf.concat([b_status_curr, b_status_next], axis=1)
            logits = tf.reshape(tf.matmul(rnn_output_test_expand, w_status),
                                [-1, 2]) + b_status

            pred_done = tf.argmax(logits, axis=1)

            return loss_status_scalar, pred_done, logits, state, prev_state