Example #1
0
    def Model(self, inputs):
        input_depth, input_cmd, input_prev_a, input_goal, input_action = inputs
        # encode depth image
        conv1 = model_utils.conv2d(input_depth,
                                   4,
                                   5,
                                   4,
                                   scope='conv1',
                                   max_pool=False)
        conv2 = model_utils.conv2d(conv1,
                                   16,
                                   5,
                                   4,
                                   scope='conv2',
                                   max_pool=False)
        conv3 = model_utils.conv2d(conv2,
                                   32,
                                   3,
                                   2,
                                   scope='conv3',
                                   max_pool=False)
        shape = conv3.get_shape().as_list()
        depth_vect = tf.reshape(conv3,
                                shape=[-1,
                                       shape[1] * shape[2] * shape[3]])  # b,d
        # encode cmd
        embedding_cmd = tf.get_variable('cmd_embedding',
                                        [self.n_cmd_type, self.dim_emb])
        cmd_vect = tf.reshape(tf.nn.embedding_lookup(embedding_cmd, input_cmd),
                              [-1, self.dim_emb])
        # encode action
        embedding_w_action = tf.get_variable('embedding_w_action',
                                             [self.dim_action, self.dim_emb])
        embedding_b_action = tf.get_variable('embedding_b_action',
                                             [self.dim_emb])
        prev_a_vect = tf.matmul(input_prev_a,
                                embedding_w_action) + embedding_b_action
        action_vect = tf.matmul(
            input_action, embedding_w_action) + embedding_b_action  # b, d
        # encode goal
        embedding_w_goal = tf.get_variable('embedding_w_goal',
                                           [self.dim_action, self.dim_emb])
        embedding_b_goal = tf.get_variable('embedding_b_goal', [self.dim_emb])
        goal_vect = tf.matmul(input_goal, embedding_w_goal) + embedding_b_goal
        input_vect = tf.concat(
            [depth_vect, cmd_vect, prev_a_vect, goal_vect, action_vect],
            axis=1)

        hidden_1 = model_utils.dense_layer(input_vect, self.n_hidden,
                                           'hidden_1')
        hidden_2 = model_utils.dense_layer(hidden_1, self.n_hidden / 2,
                                           'hidden_2')
        q = model_utils.dense_layer(
            hidden_2,
            1,
            'q',
            activation=None,
            w_init=tf.initializers.random_uniform(-0.003, 0.003),
            b_init=tf.initializers.random_uniform(-0.003, 0.003))
        return q
Example #2
0
    def Model(self, inputs):
        input_depth, input_cmd, input_prev_a, rnn_h_in = inputs
        # encode depth image
        conv1 = model_utils.conv2d(input_depth,
                                   4,
                                   5,
                                   4,
                                   scope='conv1',
                                   max_pool=False)
        conv2 = model_utils.conv2d(conv1,
                                   16,
                                   5,
                                   4,
                                   scope='conv2',
                                   max_pool=False)
        conv3 = model_utils.conv2d(conv2,
                                   32,
                                   3,
                                   2,
                                   scope='conv3',
                                   max_pool=False)
        shape = conv3.get_shape().as_list()
        depth_vect = tf.reshape(conv3,
                                shape=[-1,
                                       shape[1] * shape[2] * shape[3]])  # b,d
        # encode cmd
        embedding_cmd = tf.get_variable('cmd_embedding',
                                        [self.n_cmd_type, self.dim_emb])
        cmd_vect = tf.reshape(tf.nn.embedding_lookup(embedding_cmd, input_cmd),
                              [-1, self.dim_emb])
        # encode prev action
        embedding_w_action = tf.get_variable('embedding_w_action',
                                             [self.dim_action, self.dim_emb])
        embedding_b_action = tf.get_variable('embedding_b_action',
                                             [self.dim_emb])
        prev_a_vect = tf.matmul(input_prev_a,
                                embedding_w_action) + embedding_b_action

        input_vect = tf.concat([depth_vect, cmd_vect, prev_a_vect], axis=1)

        # rnn
        if self.rnn_type == 'lstm':
            rnn_cell = model_utils._lstm_cell(self.n_hidden,
                                              1,
                                              name='rnn_cell')
        else:
            rnn_cell = model_utils._gru_cell(self.n_hidden, 1, name='rnn_cell')
        rnn_output, rnn_h_out = rnn_cell(input_vect, self.rnn_h_in)
        # action
        a_linear = model_utils.dense_layer(
            rnn_output, 1, 'a_linear',
            activation=tf.nn.sigmoid) * self.action_range[0]
        a_angular = model_utils.dense_layer(
            rnn_output, 1, 'a_angular',
            activation=tf.nn.tanh) * self.action_range[1]
        pred_action = tf.concat([a_linear, a_angular], axis=1)

        return pred_action, rnn_h_out
Example #3
0
    def process_demo_sum(self, input_demo_img, input_demo_cmd, demo_len):
        # process demo
        input_demo_img = tf.reshape(input_demo_img,
                                    [-1] + self.dim_img)  # b * n, h, w, c
        input_demo_cmd = tf.reshape(input_demo_cmd,
                                    [-1, self.dim_cmd])  # b * n, dim_cmd
        demo_img_vect = self.encode_image(
            input_demo_img)  # b * n, dim_img_feat
        demo_cmd_vect = tf.reshape(
            tf.nn.embedding_lookup(self.embedding_cmd, input_demo_cmd),
            [-1, self.dim_emb])  # b * n, dim_emb
        demo_vect = tf.concat([demo_img_vect, demo_cmd_vect],
                              axis=1)  # b * n, dim_img_feat+dim_emb
        # 1. sum
        shape = demo_vect.get_shape().as_list()
        demo_vect_seq = tf.reshape(
            demo_vect,
            [-1, self.max_n_demo, shape[-1]])  # b, n, dim_img_feat+dim_emb
        demo_mask = tf.expand_dims(tf.sequence_mask(demo_len,
                                                    maxlen=self.max_n_demo,
                                                    dtype=tf.float32),
                                   axis=2)  # b, n, 1
        demo_mask = tf.tile(demo_mask,
                            [1, 1, shape[-1]])  # b, n, dim_img_feat+dim_emb
        demo_vect_sum = tf.reduce_sum(demo_vect_seq * demo_mask,
                                      axis=1)  # b, dim_img_feat+dim_emb
        demo_dense = model_utils.dense_layer(demo_vect_sum,
                                             self.n_hidden,
                                             scope='demo_dense')  # b, n_hidden
        demo_dense_seq = tf.tile(tf.expand_dims(demo_dense, axis=1),
                                 [1, self.max_step, 1])  # b, l, n_hidden
        demo_dense_seq = tf.reshape(demo_dense_seq,
                                    [-1, self.n_hidden])  # b*l, n_hidden

        return demo_dense_seq, demo_dense
Example #4
0
    def process_demo_hard_att(self, input_demo_img, input_demo_cmd, img_vect,
                              test_flag, demo_len):
        input_demo_img = tf.reshape(input_demo_img,
                                    [-1] + self.dim_img)  # b * n, h, w, c
        demo_img_vect = self.encode_image(
            input_demo_img)  # b * n, dim_img_feat
        shape = demo_img_vect.get_shape().as_list()
        demo_img_vect = tf.reshape(
            demo_img_vect,
            [-1, self.max_n_demo, shape[-1]])  # b, n, dim_img_feat
        if not test_flag:
            demo_img_vect = tf.tile(
                tf.expand_dims(demo_img_vect, axis=1),
                [1, self.max_step, 1, 1])  # b, l, n, dim_img_feat
            demo_img_vect = tf.reshape(
                demo_img_vect,
                [-1, self.max_n_demo, shape[-1]])  # b*l, n, dim_img_feat
        img_vect = tf.tile(tf.expand_dims(img_vect, axis=1),
                           [1, self.max_n_demo, 1])  # b*l, n, dim_img_feat

        l2_norm = safe_norm(demo_img_vect - img_vect, axis=2)  # b*l, n
        norm_mask = tf.sequence_mask(demo_len,
                                     maxlen=self.max_n_demo,
                                     dtype=tf.float32)  # b, n
        if not test_flag:
            norm_mask = tf.reshape(
                tf.tile(tf.expand_dims(norm_mask, axis=1),
                        [1, self.max_step, 1]),
                [-1, self.max_n_demo])  # b*l, n

        masked_prob = tf.exp(-l2_norm) * norm_mask / tf.tile(
            tf.reduce_sum(tf.exp(-l2_norm) * norm_mask, axis=1, keepdims=True),
            [1, self.max_n_demo])  # b*l, n
        logits = tf.log(masked_prob + 1e-12)  # b*l, n
        att_pos = tf.argmax(logits, axis=1)  # b*l

        shape = tf.shape(img_vect)
        coords = tf.stack(
            [tf.range(shape[0]),
             tf.cast(att_pos, dtype=tf.int32)], axis=1)  # b*l, 2
        attended_demo_img_vect = tf.gather_nd(demo_img_vect,
                                              coords)  # b*l,  dim_img_feat

        demo_cmd_vect = tf.reshape(
            tf.nn.embedding_lookup(self.embedding_cmd, input_demo_cmd),
            [-1, self.max_n_demo, self.dim_emb])  # b, n, dim_emb
        if not test_flag:
            demo_cmd_vect = tf.tile(
                tf.expand_dims(demo_cmd_vect, axis=1),
                [1, self.max_step, 1, 1])  # b, l, n, dim_emb
            demo_cmd_vect = tf.reshape(
                demo_cmd_vect,
                [-1, self.max_n_demo, self.dim_emb])  # b*l, n, dim_emb
            l2_norm = l2_norm + (1. - norm_mask) * 100.
        attended_demo_cmd_vect = tf.gather_nd(demo_cmd_vect,
                                              coords)  # b*l, dim_emb

        demo_vect = tf.concat([attended_demo_img_vect, attended_demo_cmd_vect],
                              axis=1)  # b*l, dim_img_feat+dim_emb
        demo_dense = model_utils.dense_layer(
            demo_vect, self.n_hidden, scope='demo_dense')  # b*l, n_hidden

        return demo_dense, att_pos
Example #5
0
    def model(self, inputs):
        [
            input_demo_img, input_demo_cmd, input_img, input_depth,
            input_prev_action, input_img_test, input_depth_test,
            input_prev_action_test, rnn_h_in, demo_len, seq_len
        ] = inputs

        # training
        input_img = tf.reshape(input_img, [-1] + self.dim_img)  # b*l, dim_img
        img_vect = self.encode_image(input_img)  # b*l, dim_img_feat
        input_depth = tf.reshape(input_depth,
                                 [-1] + self.dim_depth)  # b*l, dim_depth
        depth_vect = self.encode_image(input_depth,
                                       scope='depth')  # b*l, dim_depth_feat
        input_prev_action = tf.reshape(input_prev_action,
                                       [-1, self.dim_action])  # b*l, 1
        prev_action_idx = tf.argmax(input_prev_action, axis=1)
        prev_a_vect = tf.reshape(
            tf.nn.embedding_lookup(self.embedding_a, prev_action_idx),
            [-1, self.dim_emb])  # b*l, dim_emb
        if self.att_mode == 'hard':
            demo_dense_seq, att_pos = self.process_demo_hard_att(
                input_demo_img, input_demo_cmd, img_vect, False, demo_len)
        elif self.att_mode == 'sum':
            demo_dense_seq, _ = self.process_demo_sum(input_demo_img,
                                                      input_demo_cmd, demo_len)
        all_inputs = tf.concat(
            [demo_dense_seq, img_vect, depth_vect, prev_a_vect],
            axis=1)  # b*l, n_hidden+dim_img_feat+dim_emb
        inputs_dense = model_utils.dense_layer(
            all_inputs, self.n_hidden, scope='inputs_dense')  # b*l, n_hidden
        rnn_input = tf.reshape(inputs_dense,
                               [-1, self.max_step, self.n_hidden])
        rnn_cell = model_utils._gru_cell(self.n_hidden, 1, name='rnn_cell')
        rnn_output, _ = tf.nn.dynamic_rnn(rnn_cell,
                                          rnn_input,
                                          sequence_length=seq_len,
                                          dtype=tf.float32)  # b, l, dim_emb
        rnn_output = tf.reshape(rnn_output,
                                [-1, self.n_hidden])  # b*l, dim_emb
        q = model_utils.dense_layer(rnn_output,
                                    self.dim_action,
                                    scope='q',
                                    activation=None)  # b*l, dim_action

        # testing
        input_img_test = tf.reshape(input_img_test,
                                    [-1] + self.dim_img)  # b, dim_img
        img_vect_test = self.encode_image(input_img_test)  # b, dim_img_feat
        input_depth_test = tf.reshape(input_depth_test,
                                      [-1] + self.dim_depth)  # b, dim_depth
        depth_vect_test = self.encode_image(input_depth_test,
                                            scope='depth')  # b, dim_depth_feat
        input_prev_action_test = tf.reshape(input_prev_action_test,
                                            [-1, self.dim_action])  # b, 1
        prev_action_idx_test = tf.argmax(input_prev_action_test, axis=1)
        prev_a_vect_test = tf.reshape(
            tf.nn.embedding_lookup(self.embedding_a, prev_action_idx_test),
            [-1, self.dim_emb])  # b, dim_emb
        if self.att_mode == 'hard':
            demo_dense, att_pos = self.process_demo_hard_att(
                input_demo_img, input_demo_cmd, img_vect_test, True, demo_len)
        elif self.att_mode == 'sum':
            _, demo_dense = self.process_demo_sum(input_demo_img,
                                                  input_demo_cmd, demo_len)
            att_pos = tf.zeros([1], dtype=tf.int32)
        all_inputs_test = tf.concat(
            [demo_dense, img_vect_test, depth_vect_test, prev_a_vect_test],
            axis=1)
        inputs_dense_test = model_utils.dense_layer(all_inputs_test,
                                                    self.n_hidden,
                                                    scope='inputs_dense')
        rnn_cell = model_utils._gru_cell(self.n_hidden, 1, name='rnn/rnn_cell')
        rnn_output, rnn_h_out = rnn_cell(inputs_dense_test,
                                         rnn_h_in)  # b, n_hidden | b, n_hidden
        q_test = model_utils.dense_layer(rnn_output,
                                         self.dim_action,
                                         scope='q_test',
                                         activation=None)  # b, dim_action
        return q, q_test, rnn_h_out, att_pos
Example #6
0
    def __init__(self,
                 sess,
                 batch_size,
                 max_step,
                 demo_len,
                 n_layers,
                 n_hidden,
                 dim_a=2,
                 dim_img=[64, 64, 3],
                 action_range=[0.3, np.pi / 6],
                 learning_rate=1e-3,
                 test_only=False,
                 use_demo_action=False,
                 use_demo_image=False):
        self.sess = sess
        self.batch_size = batch_size
        self.max_step = max_step
        self.demo_len = demo_len
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.dim_a = dim_a
        self.dim_img = dim_img
        self.action_range = action_range
        self.learning_rate = learning_rate
        self.test_only = test_only
        self.use_demo_action = use_demo_action

        with tf.variable_scope('network', reuse=tf.AUTO_REUSE):
            # training input
            self.input_demo_img = tf.placeholder(
                tf.float32,
                shape=[None, demo_len] + dim_img,
                name='input_demo_img')  #b,l of demo,h,d,c
            self.input_demo_a = tf.placeholder(
                tf.float32, shape=[None, demo_len,
                                   dim_a], name='input_demo_a')  #b,l of demo,2
            self.input_eta = tf.placeholder(tf.float32,
                                            shape=[None],
                                            name='input_eta')  #b

            self.input_img = tf.placeholder(
                tf.float32,
                shape=[None, max_step, dim_img[0], dim_img[1], dim_img[2]],
                name='input_img')  #b,l,h,d,c
            self.label_a = tf.placeholder(tf.float32,
                                          shape=[None, max_step, dim_a],
                                          name='label_a')  #b,l,2
            self.gru_h_in = tf.placeholder(tf.float32,
                                           shape=[None, n_hidden],
                                           name='gru_h_in')  #b,n_hidden

            # testing input
            self.input_demo_img_test = tf.placeholder(
                tf.float32, shape=[None] + dim_img,
                name='input_demo_img_test')  #l of demo,h,d,c
            self.input_demo_a_test = tf.placeholder(
                tf.float32, shape=[None, dim_a],
                name='input_demo_a_test')  #l of demo,2
            self.input_img_test = tf.placeholder(tf.float32,
                                                 shape=[1] + dim_img,
                                                 name='input_img_test')  #h,d,c
            self.input_eta_test = tf.placeholder(tf.float32,
                                                 shape=[],
                                                 name='input_eta_test')

            # create gru cell
            gru_cell = model_utils._gru_cell(n_hidden, 1, name='gru_cell')

            # training
            if not test_only:
                # process demo seq
                input_demo_img_reshape = tf.reshape(
                    self.input_demo_img, [-1] + dim_img)  # b *l of demo,h,d,c
                input_demo_a_reshape = tf.reshape(
                    self.input_demo_a, [-1, dim_a])  #b * l of demo, 2
                demo_img_vect = self.encode_image(
                    input_demo_img_reshape)  #b * l of demob, -1
                assert use_demo_action or use_demo_image, 'use demo image or action or both!'
                if use_demo_action and use_demo_image:
                    print 'use demo action and image'
                    demo_vect = tf.concat(
                        [demo_img_vect, input_demo_a_reshape],
                        axis=1)  #b * l of demo, -1
                elif use_demo_image:
                    print 'only use demo image'
                    demo_vect = demo_img_vect
                elif use_demo_action:
                    print 'only use demo action'
                    demo_vect = input_demo_a_reshape

                hidden1 = model_utils.dense_layer(demo_vect,
                                                  n_hidden,
                                                  scope='dense1_demo')
                demo_feat = model_utils.dense_layer(
                    hidden1, n_hidden,
                    scope='dense2_demo')  #b * l of demo, n_hidden
                demo_feat_reshape = tf.reshape(
                    demo_feat,
                    [-1, demo_len, n_hidden])  #b, l of demo, n_hidden
                demo_feat_list = tf.unstack(demo_feat_reshape,
                                            axis=1)  # l of demo [b, n_hidden]

                # process observation seq
                input_img_reshape = tf.reshape(self.input_img,
                                               [-1] + dim_img)  #b * l, h, d, c
                img_vect = self.encode_image(input_img_reshape)  # b * l, -1
                shape = img_vect.get_shape().as_list()
                img_vect_reshape = tf.reshape(
                    img_vect, [-1, max_step, shape[1]])  # b, l, -1
                img_vect_list = tf.unstack(img_vect_reshape,
                                           axis=1)  # l [b, n_hiddent]

                action_list = []
                eta = tf.identity(self.input_eta, name='init_eta')
                eta_list = []
                gru_h_in = self.gru_h_in
                for t, img_vect in enumerate(img_vect_list):
                    mu_t_list = []
                    for j, demo_feat in enumerate(demo_feat_list):
                        w_j = tf.exp(-tf.abs(eta - j))  #b
                        w_j_expand = tf.expand_dims(w_j, axis=1)  #b, 1
                        w_j_tile = tf.tile(w_j_expand,
                                           multiples=[1,
                                                      n_hidden])  #b, n_hidden
                        mu_t_list.append(demo_feat * w_j_tile)
                    mu_t = tf.add_n(mu_t_list)
                    input_t = tf.concat([mu_t, img_vect],
                                        axis=1)  #b, n_hidden + dim of img vect
                    gru_output, self.gru_h_out = gru_cell(input_t, gru_h_in)
                    gru_h_in = self.gru_h_out
                    increment = 1. + model_utils.dense_layer(
                        gru_output,
                        1,
                        activation=tf.nn.tanh,
                        scope='dense_increment')  #b, 1
                    increment = tf.squeeze(increment, axis=[1])  #b
                    eta = tf.identity(eta + increment, name='eta_{}'.format(t))
                    eta_list.append(eta)
                    action_linear = model_utils.dense_layer(
                        gru_output,
                        dim_a / 2,
                        activation=tf.nn.sigmoid,
                        scope='dense_a_linear') * action_range[0]  #b,1
                    action_angular = model_utils.dense_layer(
                        gru_output,
                        dim_a / 2,
                        activation=tf.nn.tanh,
                        scope='dense_a_angular') * action_range[1]  #b,1
                    action_list.append(
                        tf.concat([action_linear, action_angular],
                                  axis=1))  #l[b,2]
                self.action_seq = tf.stack(action_list, axis=1)  #b, l, 2
                self.eta_array = tf.stack(eta_list, axis=1)  #b, l
                self.loss = tf.losses.mean_squared_error(
                    labels=self.label_a, predictions=self.action_seq)
                start_time = time.time()
                self.opt = tf.train.AdamOptimizer(
                    learning_rate=learning_rate).minimize(self.loss)
                print 'construct opt time: {:.3f}'.format(time.time() -
                                                          start_time)

            # testing
            # process demo seq
            demo_img_vect = self.encode_image(self.input_demo_img_test,
                                              )  # l of demob, -1
            if use_demo_action and use_demo_image:
                demo_vect = tf.concat([demo_img_vect, self.input_demo_a_test],
                                      axis=1)  # l of demo, -1
            elif use_demo_image:
                demo_vect = demo_img_vect
            elif use_demo_action:
                demo_vect = self.input_demo_a_test
            hidden1 = model_utils.dense_layer(demo_vect,
                                              n_hidden,
                                              scope='dense1_demo')
            demo_feat = model_utils.dense_layer(
                hidden1, n_hidden, scope='dense2_demo')  # l of demo, n_hidden

            tensor_array = tf.TensorArray(tf.float32,
                                          0,
                                          dynamic_size=True,
                                          infer_shape=True,
                                          element_shape=[n_hidden])
            demo_feat_array = tensor_array.unstack(demo_feat)
            seq_len = tf.shape(demo_feat)[0]

            mu_t = tf.zeros([n_hidden], name='mu_t')
            # demo_feat_list= tf.unstack(demo_feat_reshape, axis=1) # l of demo [1, n_hidden]

            # process observation
            img_vect = self.encode_image(self.input_img_test)  # 1, -1
            eta = tf.identity(self.input_eta_test, name='eta_in')
            gru_h_in = self.gru_h_in

            # mu_t_list = []
            # for j, demo_feat in enumerate(demo_feat_list):
            #     w_j = tf.exp(-tf.abs(eta - j)) #1
            #     w_j_expand = tf.expand_dims(w_j, axis=1) #1, 1
            #     w_j_tile = tf.tile(w_j_expand, multiples=[1, n_hidden]) #1, n_hidden
            #     mu_t_list.append(demo_feat * w_j_tile)
            # mu_t = tf.add_n(mu_t_list)

            def body(demo_idx, mu_t_in):
                w_j = tf.exp(-tf.abs(eta - tf.cast(demo_idx, tf.float32)))
                w_j_expand = tf.expand_dims(w_j, axis=0)  #1
                w_j_tile = tf.tile(w_j_expand, multiples=[n_hidden])  #n_hidden
                demo_feat_t = demo_feat_array.read(demo_idx)  # n_hidden
                return (demo_idx + 1, mu_t_in + demo_feat_t * w_j_tile)

            def condition(demo_idx, output):
                return demo_idx < seq_len

            demo_idx = 0
            t_final, mu_t_final = tf.while_loop(cond=condition,
                                                body=body,
                                                loop_vars=[demo_idx, mu_t])
            mu_t_expand = tf.expand_dims(mu_t_final, axis=0)  # 1, n_hidden

            input_t = tf.concat([mu_t_expand, img_vect],
                                axis=1)  #1, n_hidden*2
            gru_output, self.gru_h_out = gru_cell(input_t, gru_h_in)
            gru_h_in = self.gru_h_out
            increment = 1. + model_utils.dense_layer(
                gru_output, 1, activation=tf.nn.tanh,
                scope='dense_increment')  #b, 1
            increment = tf.squeeze(increment, axis=[1])  #1
            self.eta = eta + increment
            action_linear = model_utils.dense_layer(
                gru_output,
                dim_a / 2,
                activation=tf.nn.sigmoid,
                scope='dense_a_linear') * action_range[0]  #b,1
            action_angular = model_utils.dense_layer(
                gru_output,
                dim_a / 2,
                activation=tf.nn.tanh,
                scope='dense_a_angular') * action_range[1]  #b,1
            self.action = tf.concat([action_linear, action_angular], axis=1)
    def process_demo_hard_att(self, input_demo_img, input_demo_cmd, img_vect,
                              test_flag, demo_len):

        input_demo_img = tf.reshape(input_demo_img,
                                    [-1] + self.dim_img)  # b * n, h, w, c
        demo_img_vect = self.encode_image(
            input_demo_img)  # b * n, dim_img_feat
        self.demo_img_vect = demo_img_vect
        shape = demo_img_vect.get_shape().as_list()
        demo_img_vect = tf.reshape(
            demo_img_vect,
            [-1, self.max_n_demo, shape[-1]])  # b, n, dim_img_feat
        if not test_flag:
            demo_img_vect = tf.tile(
                tf.expand_dims(demo_img_vect, axis=1),
                [1, self.max_step, 1, 1])  # b, l, n, dim_img_feat
            demo_img_vect = tf.reshape(
                demo_img_vect,
                [-1, self.max_n_demo, shape[-1]])  # b*l, n, dim_img_feat
        img_vect = tf.tile(tf.expand_dims(img_vect, axis=1),
                           [1, self.max_n_demo, 1])  # b*l, n, dim_img_feat

        print 'attention mode: argmax hard'
        l2_norm = safe_norm(demo_img_vect - img_vect, axis=2)  # b*l, n
        norm_mask = tf.sequence_mask(demo_len,
                                     maxlen=self.max_n_demo,
                                     dtype=tf.float32)  # b, n
        if not test_flag:
            norm_mask = tf.reshape(
                tf.tile(tf.expand_dims(norm_mask, axis=1),
                        [1, self.max_step, 1]),
                [-1, self.max_n_demo])  # b*l, n
        # logits = tf.log(tf.nn.softmax(-l2_norm)) # b*l, n
        # masked_prob = tf.nn.softmax(-l2_norm)*norm_mask

        # if test_flag:
        #     x = tf.squeeze(self.prev_att_pos)
        #     y = tf.squeeze(demo_len) - 1
        #     def f0(): return self.prev_att_pos, 0  # 1
        #     def f1(): return tf.concat([self.prev_att_pos, self.prev_att_pos+1], axis=0), 0 # 2
        #     indicies, shift = tf.case({tf.equal(x, y): f0},
        #                                default=f1, exclusive=True)
        #     l2_norm = tf.gather(tf.squeeze(l2_norm), indicies) # 2~3
        #     self.part_l2_norm = l2_norm
        #     self.indicies = indicies
        #     masked_prob = tf.nn.softmax(-l2_norm) # 2~3
        #     self.l2_norm = masked_prob
        #     logits = tf.log(masked_prob + 1e-12) # 2~3
        #     def f5(): return tf.expand_dims(tf.argmax(logits, output_type=tf.int32) + x + shift, axis=0)
        #     def f6(): return self.prev_att_pos
        #     att_pos = tf.cond(tf.reduce_max(masked_prob) > 0.99, f5, f6)
        # else:
        #     masked_prob = tf.exp(-l2_norm)*norm_mask / tf.tile(tf.reduce_sum(tf.exp(-l2_norm)*norm_mask,
        #                                                                      axis=1,
        #                                                                      keepdims=True),
        #                                                        [1, self.max_n_demo]) # b*l, n
        #     logits = tf.log(masked_prob + 1e-12) # b*l, n
        #     att_pos = tf.argmax(logits, axis=1) # b*l
        masked_prob = tf.exp(-l2_norm) * norm_mask / tf.tile(
            tf.reduce_sum(tf.exp(-l2_norm) * norm_mask, axis=1, keepdims=True),
            [1, self.max_n_demo])  # b*l, n
        logits = tf.log(masked_prob + 1e-12)  # b*l, n
        att_pos = tf.argmax(logits, axis=1)  # b*l
        self.prob = masked_prob
        self.l2_norm = l2_norm

        shape = tf.shape(img_vect)
        coords = tf.stack(
            [tf.range(shape[0]),
             tf.cast(att_pos, dtype=tf.int32)], axis=1)  # b*l, 2
        attended_demo_img_vect = tf.gather_nd(demo_img_vect,
                                              coords)  # b*l,  dim_img_feat

        demo_cmd_vect = tf.reshape(
            tf.nn.embedding_lookup(self.embedding_cmd, input_demo_cmd),
            [-1, self.max_n_demo, self.dim_emb])  # b, n, dim_emb
        if not test_flag:
            demo_cmd_vect = tf.tile(
                tf.expand_dims(demo_cmd_vect, axis=1),
                [1, self.max_step, 1, 1])  # b, l, n, dim_emb
            demo_cmd_vect = tf.reshape(
                demo_cmd_vect,
                [-1, self.max_n_demo, self.dim_emb])  # b*l, n, dim_emb
            l2_norm = l2_norm + (1. - norm_mask) * 100.
        attended_demo_cmd_vect = tf.gather_nd(demo_cmd_vect,
                                              coords)  # b*l, dim_emb

        if self.inputs_num == 1:
            demo_vect = attended_demo_cmd_vect  # b*l, dim_emb
        else:
            demo_vect = tf.concat(
                [attended_demo_img_vect, attended_demo_cmd_vect],
                axis=1)  # b*l, dim_img_feat+dim_emb
        demo_dense = model_utils.dense_layer(
            demo_vect, self.n_hidden, scope='demo_dense')  # b*l, n_hidden

        return demo_dense, att_pos, logits, masked_prob, l2_norm
    def __init__(self,
                 sess,
                 batch_size,
                 max_step,
                 demo_len,
                 n_layers,
                 n_hidden,
                 dim_a=2,
                 dim_img=[64, 64, 3],
                 action_range=[0.3, np.pi/6],
                 learning_rate=1e-3,
                 test_only=False,
                 use_demo_action=False,
                 use_demo_image=False,
                 use_flownet=False):
        self.sess = sess
        self.batch_size = batch_size
        self.max_step = max_step
        self.demo_len = demo_len
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.dim_a = dim_a
        self.dim_img = dim_img
        self.action_range = action_range
        self.learning_rate = learning_rate
        self.test_only = test_only
        self.use_demo_action = use_demo_action

          
        # training input
        self.input_ob = tf.placeholder(tf.float32, 
                                       shape=[None, max_step]+dim_img, 
                                       name='input_observation') #b,l,h,d,c
        self.input_demo = tf.placeholder(tf.float32, 
                                         shape=[None, max_step]+dim_img, 
                                         name='input_demo') #b,l,h,d,c
        self.label_a = tf.placeholder(tf.float32, 
                                      shape=[None, max_step, dim_a], 
                                      name='label_a') #b,l,2
        self.gru_h_in = tf.placeholder(tf.float32, 
                                       shape=[None, n_hidden], 
                                       name='gru_h_in') #b,n_hidden
        seq_lens = tf.constant(batch_size,dtype=tf.int32, shape=[batch_size])
        # create gru cell
        gru_cell = model_utils._gru_cell(n_hidden, 1, name='gru_cell')


        # process demo seq
        # input_img_pair = tf.concat([self.input_ob, self.input_demo], axis=4)# b,l,h,d,c*2
        # input_img_pair_reshape = tf.reshape(input_img_pair, [-1, dim_img[0], dim_img[1], dim_img[2]*2])
        # img_vector = self.encode_image(input_img_pair_reshape) # b*l, d
        input_ob_reshape = tf.reshape(self.input_ob, [-1]+dim_img)
        input_demo_reshape = tf.reshape(self.input_demo, [-1]+dim_img)
        concat_inputs = tf.concat([input_ob_reshape, input_demo_reshape], axis=3) #b*l,h,w,c*2
        if use_flownet:
            img_vector = get_flownet_feature(concat_inputs) # b*l, d
        else:
            img_vector = self.encode_image(concat_inputs) # b*l, d
        
        with tf.variable_scope('memory', reuse=tf.AUTO_REUSE):  
            shape = img_vector.get_shape().as_list()
            img_vector_seqs = tf.reshape(img_vector, [-1, max_step, shape[-1]])
            gru_outputs, gru_state = tf.nn.dynamic_rnn(gru_cell, 
                                                       img_vector_seqs, 
                                                       initial_state=self.gru_h_in, 
                                                       sequence_length=seq_lens)
            gru_outputs_reshape = tf.reshape(gru_outputs, [-1, n_hidden]) 
            action_linear = model_utils.dense_layer(gru_outputs, dim_a/2, 
                                                    activation=tf.nn.sigmoid, 
                                                    scope='dense_a_linear') * action_range[0] #b*l,1
            action_angular = model_utils.dense_layer(gru_outputs, dim_a/2, 
                                                     activation=tf.nn.tanh, 
                                                     scope='dense_a_angular') * action_range[1] #b*l,1      
            action = tf.concat([action_linear, action_angular], axis=1) #b*l,2
            self.action_seq = tf.reshape(action, [-1, max_step, 2]) # b,l,2
            self.loss = tf.losses.mean_squared_error(labels=self.label_a, 
                                                    predictions=self.action_seq)
            self.opt = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.loss)
    def testing_model(self, inputs):
        input_demo_img, input_demo_cmd, input_img, input_prev_cmd, input_prev_action, rnn_h_in, demo_len = inputs
        # process observation
        input_img = tf.reshape(input_img, [-1] + self.dim_img)  # b, dim_img
        img_vect = self.encode_image(input_img)  # b, dim_img_feat
        prev_cmd_vect = tf.reshape(
            tf.nn.embedding_lookup(self.embedding_cmd, input_prev_cmd),
            [-1, self.dim_emb])  # b, dim_emb
        input_prev_action = tf.reshape(input_prev_action,
                                       [-1, self.dim_a])  # b, dim_a
        prev_a_vect = model_utils.dense_layer(input_prev_action,
                                              self.dim_emb,
                                              scope='a_embedding',
                                              activation=None)  # b, dim_emb

        # process demo
        if self.demo_mode == 'sum':
            _, demo_dense = self.process_demo_sum(input_demo_img,
                                                  input_demo_cmd,
                                                  demo_len)  # b, n_hidden
            att_pos = tf.zeros([1, 1], dtype=tf.int32)
            prob = tf.zeros([1, self.max_n_demo], dtype=tf.float32)
            l2_norm = tf.zeros([1, self.max_n_demo], dtype=tf.float32)
        elif self.demo_mode == 'hard':
            demo_dense, att_pos, att_logits, prob, l2_norm = self.process_demo_hard_att(
                input_demo_img, input_demo_cmd, img_vect, True, demo_len)
        if self.inputs_num <= 2:
            all_inputs = demo_dense
        elif self.inputs_num == 3:
            all_inputs = tf.concat([demo_dense, img_vect],
                                   axis=1)  # b, n_hidden+dim_img_feat
        elif self.inputs_num == 4:
            all_inputs = tf.concat([demo_dense, img_vect, prev_cmd_vect],
                                   axis=1)  # b, n_hidden+dim_img_feat
        elif self.inputs_num == 5:
            all_inputs = tf.concat(
                [demo_dense, img_vect, prev_cmd_vect, prev_a_vect],
                axis=1)  # b, n_hidden+dim_img_feat+dim_emb*2
        inputs_dense = model_utils.dense_layer(
            all_inputs, self.n_hidden, scope='inputs_dense')  # b, n_hidden

        if self.post_att_model == 'gru':
            rnn_cell = model_utils._gru_cell(self.n_hidden,
                                             1,
                                             name='rnn/rnn_cell')
            rnn_output, rnn_h_out = rnn_cell(
                inputs_dense, rnn_h_in)  # b, n_hidden | b, n_hidden
            logits = model_utils.dense_layer(rnn_output,
                                             self.n_cmd_type,
                                             scope='logits',
                                             activation=None)  # b, n_cmd_type
        elif self.post_att_model == 'dense':
            dense = model_utils.dense_layer(inputs_dense,
                                            self.n_hidden / 2,
                                            scope='dense')  # b, n_hidden/2
            logits = model_utils.dense_layer(dense,
                                             self.n_cmd_type,
                                             scope='logits',
                                             activation=None)  # b, n_cmd_type
            rnn_h_out = rnn_h_in
        predict = tf.argmax(logits, axis=1)  # b

        max_prob = tf.reduce_max(prob)  # b
        min_norm = tf.reduce_min(l2_norm)
        return predict, rnn_h_out, att_pos, max_prob, min_norm
    def training_model(self, inputs):
        input_demo_img, input_demo_cmd, input_img, input_prev_cmd, input_prev_action, label_cmd, demo_len, seq_len = inputs

        # process observation
        input_img = tf.reshape(input_img, [-1] + self.dim_img)  # b*l, dim_img
        img_vect = self.encode_image(input_img)  # b*l, dim_img_feat
        prev_cmd_vect = tf.reshape(
            tf.nn.embedding_lookup(self.embedding_cmd, input_prev_cmd),
            [-1, self.dim_emb])  # b*l, dim_emb
        input_prev_action = tf.reshape(input_prev_action,
                                       [-1, self.dim_a])  # b*l, dim_a
        prev_a_vect = model_utils.dense_layer(input_prev_action,
                                              self.dim_emb,
                                              scope='a_embedding',
                                              activation=None)  # b*l, dim_emb

        # process demo
        if self.demo_mode == 'sum':
            demo_dense_seq, _ = self.process_demo_sum(
                input_demo_img, input_demo_cmd, demo_len)  # b*l, n_hidden
            att_pos = tf.zeros([self.batch_size, self.max_step],
                               dtype=tf.int32)
            att_loss = tf.zeros([], dtype=tf.float32)
        elif self.demo_mode == 'hard':
            demo_dense_seq, att_pos, att_logits, prob, _ = self.process_demo_hard_att(
                input_demo_img, input_demo_cmd, img_vect, False, demo_len)
        # post-attention inputs
        # dropouts
        if not self.test:
            demo_dense_seq = tf.nn.dropout(demo_dense_seq,
                                           rate=1. - self.keep_prob)
            img_vect = tf.nn.dropout(img_vect, rate=1. - self.keep_prob)
            prev_cmd_vect = tf.nn.dropout(prev_cmd_vect,
                                          rate=1. - self.keep_prob)
            prev_a_vect = tf.nn.dropout(prev_a_vect, rate=1. - self.keep_prob)

        if self.inputs_num <= 2:
            all_inputs = demo_dense_seq
        elif self.inputs_num == 3:
            all_inputs = tf.concat([demo_dense_seq, img_vect],
                                   axis=1)  # b*l, n_hidden+dim_img_feat
        elif self.inputs_num == 4:
            all_inputs = tf.concat([demo_dense_seq, img_vect, prev_cmd_vect],
                                   axis=1)  # b*l, n_hidden+dim_img_feat
        elif self.inputs_num == 5:
            all_inputs = tf.concat(
                [demo_dense_seq, img_vect, prev_cmd_vect, prev_a_vect],
                axis=1)  # b*l, n_hidden+dim_img_feat+dim_emb*2
        inputs_dense = model_utils.dense_layer(
            all_inputs, self.n_hidden, scope='inputs_dense')  # b*l, n_hidden

        # post-attention model
        if self.post_att_model == 'gru':
            print 'post attention model: gru'
            # rnn
            rnn_input = tf.reshape(inputs_dense,
                                   [-1, self.max_step, self.n_hidden])
            rnn_cell = model_utils._gru_cell(self.n_hidden, 1, name='rnn_cell')
            rnn_output, _ = tf.nn.dynamic_rnn(
                rnn_cell, rnn_input, sequence_length=seq_len,
                dtype=tf.float32)  # b, l, dim_emb
            # output = tf.reshape(rnn_output, [-1, 1, self.n_hidden]) # b*l, 1, dim_emb
            rnn_output = tf.reshape(rnn_output,
                                    [-1, self.n_hidden])  # b*l, dim_emb
            logits = model_utils.dense_layer(
                rnn_output, self.n_cmd_type, scope='logits',
                activation=None)  # b*l, n_cmd_type
        elif self.post_att_model == 'dense':
            print 'post attention model: dense'
            dense_output = model_utils.dense_layer(
                inputs_dense, self.n_hidden, scope='dense')  # b*l, n_hidden
            # output = tf.reshape(dense_output, [-1, 1, self.n_hidden]) # b*l, 1, dim_emb
            logits = model_utils.dense_layer(
                dense_output, self.n_cmd_type, scope='logits',
                activation=None)  # b*l, n_cmd_type

        # predict
        pred_mask = tf.sequence_mask(seq_len,
                                     maxlen=self.max_step,
                                     dtype=tf.int32)  # b, l
        pred = tf.argmax(tf.reshape(logits,
                                    [-1, self.max_step, self.n_cmd_type]),
                         axis=2,
                         output_type=tf.int32) * pred_mask  # b, l

        # cmd_loss
        label_cmd = tf.reshape(label_cmd, [-1])  # b*l
        loss_mask = tf.reshape(
            tf.sequence_mask(seq_len, maxlen=self.max_step, dtype=tf.float32),
            [-1])  # b*l
        cmd_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=label_cmd, logits=logits) * loss_mask  # b*l
        cmd_loss = tf.reduce_sum(cmd_loss) / tf.cast(tf.reduce_sum(seq_len),
                                                     tf.float32)

        # accuracy
        correct_pred = tf.equal(pred, tf.reshape(label_cmd,
                                                 [-1, self.max_step]))  # b, l
        batch_correct_num = tf.reduce_sum(tf.cast(correct_pred, tf.int32),
                                          axis=1)  # b
        batch_accuracy = tf.cast(
            (batch_correct_num - tf.reduce_sum(1 - pred_mask, axis=1)),
            tf.float32) / tf.cast(tf.reduce_sum(pred_mask, axis=1),
                                  tf.float32)  # b
        all_correct_num = tf.reduce_sum(tf.cast(correct_pred,
                                                tf.int32))  # scalar
        all_accuracy = tf.cast(
            (all_correct_num - tf.reduce_sum(1 - pred_mask)),
            tf.float32) / tf.cast(tf.reduce_sum(pred_mask), tf.float32)

        if self.demo_mode == 'hard':
            # reinforce
            sample_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=att_logits, labels=att_pos) * loss_mask  # b*l
            sample_loss = tf.reduce_sum(sample_loss) / tf.reduce_sum(loss_mask)
            reward_estimate = model_utils.reward_estimate(
                all_inputs, all_accuracy) * loss_mask  # b*l
            select_loss = sample_loss * tf.stop_gradient(
                reward_estimate)  # b*l
            select_loss = tf.reduce_sum(select_loss /
                                        tf.reduce_sum(loss_mask))  # scalar
            baseline_loss = tf.reduce_sum(
                tf.square(reward_estimate)) / tf.reduce_sum(loss_mask)
            att_loss = select_loss + baseline_loss
            att_mask = tf.sequence_mask(seq_len,
                                        maxlen=self.max_step,
                                        dtype=att_pos.dtype)  # b, l
            att_pos = tf.reshape(att_pos,
                                 [-1, self.max_step]) * att_mask  # b, l

        return [all_accuracy, cmd_loss, att_loss, pred, att_pos]
Example #11
0
    def Model(self, inputs):
        input_depth, input_cmd, input_prev_a, input_action, gru_h_in, length = inputs
        # encode depth image
        conv1 = model_utils.conv2d(input_depth,
                                   4,
                                   5,
                                   4,
                                   scope='conv1',
                                   max_pool=False)
        conv2 = model_utils.conv2d(conv1,
                                   16,
                                   5,
                                   4,
                                   scope='conv2',
                                   max_pool=False)
        conv3 = model_utils.conv2d(conv2,
                                   32,
                                   3,
                                   2,
                                   scope='conv3',
                                   max_pool=False)
        shape = conv3.get_shape().as_list()
        depth_vect = tf.reshape(conv3,
                                shape=[-1, shape[1] * shape[2] * shape[3]
                                       ])  # b*l,d
        # encode cmd
        embedding_cmd = tf.get_variable('cmd_embedding',
                                        [self.n_cmd_type, self.dim_emb])
        cmd_vect = tf.reshape(tf.nn.embedding_lookup(embedding_cmd, input_cmd),
                              [-1, self.dim_emb])
        # encode prev action and action
        embedding_w_action = tf.get_variable('embedding_w_action',
                                             [self.dim_action, self.dim_emb])
        embedding_b_action = tf.get_variable('embedding_b_action',
                                             [self.dim_emb])
        prev_a_vect = tf.matmul(input_prev_a,
                                embedding_w_action) + embedding_b_action
        action_vect = tf.matmul(input_action,
                                embedding_w_action) + embedding_b_action

        input_vect = tf.concat(
            [depth_vect, cmd_vect, prev_a_vect, action_vect], axis=1)
        rnn_cell = model_utils._gru_cell(self.n_hidden, 1, name='gru_cell')
        shape = input_vect.get_shape().as_list()
        input_vect_reshape = tf.reshape(input_vect,
                                        [-1, self.max_step, shape[-1]])
        rnn_output, _ = tf.nn.dynamic_rnn(rnn_cell,
                                          input_vect_reshape,
                                          sequence_length=length,
                                          dtype=tf.float32)  # b, l, h
        rnn_output_reshape = tf.reshape(rnn_output,
                                        [-1, self.n_hidden])  # b*l, h
        # q
        q = model_utils.dense_layer(
            rnn_output_reshape,
            1,
            'q',
            activation=None,
            w_init=tf.initializers.random_uniform(-0.003, 0.003),
            b_init=tf.initializers.random_uniform(-0.003, 0.003))

        return [q]
Example #12
0
    def Model(self, inputs):
        input_depth, input_cmd, input_prev_a, gru_h_in, length = inputs
        # encode depth image
        conv1 = model_utils.conv2d(input_depth,
                                   4,
                                   5,
                                   4,
                                   scope='conv1',
                                   max_pool=False)
        conv2 = model_utils.conv2d(conv1,
                                   16,
                                   5,
                                   4,
                                   scope='conv2',
                                   max_pool=False)
        conv3 = model_utils.conv2d(conv2,
                                   32,
                                   3,
                                   2,
                                   scope='conv3',
                                   max_pool=False)
        shape = conv3.get_shape().as_list()
        depth_vect = tf.reshape(conv3,
                                shape=[-1,
                                       shape[1] * shape[2] * shape[3]])  # b,d
        # encode cmd
        embedding_cmd = tf.get_variable('cmd_embedding',
                                        [self.n_cmd_type, self.dim_emb])
        cmd_vect = tf.reshape(tf.nn.embedding_lookup(embedding_cmd, input_cmd),
                              [-1, self.dim_emb])
        # encode prev action
        embedding_w_action = tf.get_variable('embedding_w_action',
                                             [self.dim_action, self.dim_emb])
        embedding_b_action = tf.get_variable('embedding_b_action',
                                             [self.dim_emb])
        prev_a_vect = tf.matmul(input_prev_a,
                                embedding_w_action) + embedding_b_action

        input_vect = tf.concat([depth_vect, cmd_vect, prev_a_vect], axis=1)
        gru_cell = model_utils._gru_cell(self.n_hidden, 1, name='gru_cell')

        # training
        shape = input_vect.get_shape().as_list()
        input_vect_reshape = tf.reshape(input_vect,
                                        [-1, self.max_step, shape[-1]])
        gru_output, _ = tf.nn.dynamic_rnn(gru_cell,
                                          input_vect_reshape,
                                          sequence_length=length,
                                          dtype=tf.float32)  # b, l, h
        gru_output_reshape = tf.reshape(gru_output,
                                        [-1, self.n_hidden])  # b*l, h
        # action
        a_linear = model_utils.dense_layer(
            gru_output_reshape, 1, 'a_linear',
            activation=tf.nn.sigmoid) * self.action_range[0]
        a_angular = model_utils.dense_layer(
            gru_output_reshape, 1, 'a_angular',
            activation=tf.nn.tanh) * self.action_range[1]
        action = tf.concat([a_linear, a_angular], axis=1)

        # testing
        gru_output, gru_h_out = gru_cell(input_vect, gru_h_in)
        # action
        a_linear = model_utils.dense_layer(
            gru_output, 1, 'a_linear',
            activation=tf.nn.sigmoid) * self.action_range[0]
        a_angular = model_utils.dense_layer(
            gru_output, 1, 'a_angular',
            activation=tf.nn.tanh) * self.action_range[1]
        action_test = tf.concat([a_linear, a_angular], axis=1)
        return [action, action_test, gru_h_out]
Example #13
0
    def Model(self, inputs):
        input_depth, input_cmd, input_prev_a = inputs
        # encode depth image
        conv1 = model_utils.conv2d(input_depth,
                                   4,
                                   5,
                                   4,
                                   scope='conv1',
                                   max_pool=False)
        conv2 = model_utils.conv2d(conv1,
                                   16,
                                   5,
                                   4,
                                   scope='conv2',
                                   max_pool=False)
        conv3 = model_utils.conv2d(conv2,
                                   32,
                                   3,
                                   2,
                                   scope='conv3',
                                   max_pool=False)
        shape = conv3.get_shape().as_list()
        depth_vect = tf.reshape(conv3,
                                shape=[-1,
                                       shape[1] * shape[2] * shape[3]])  # b,d
        # encode cmd
        embedding_cmd = tf.get_variable('cmd_embedding',
                                        [self.n_cmd_type, self.dim_emb])
        cmd_vect = tf.reshape(tf.nn.embedding_lookup(embedding_cmd, input_cmd),
                              [-1, self.dim_emb])
        # encode prev action
        embedding_w_action = tf.get_variable('embedding_w_action',
                                             [self.dim_action, self.dim_emb])
        embedding_b_action = tf.get_variable('embedding_b_action',
                                             [self.dim_emb])
        prev_a_vect = tf.matmul(input_prev_a,
                                embedding_w_action) + embedding_b_action

        input_vect = tf.concat([depth_vect, cmd_vect, prev_a_vect], axis=1)

        hidden_1 = model_utils.dense_layer(input_vect, self.n_hidden,
                                           'hiddent_1')
        hidden_2 = model_utils.dense_layer(input_vect, self.n_hidden / 2,
                                           'hiddent_2')
        a_linear = model_utils.dense_layer(
            hidden_2,
            1,
            'a_linear',
            activation=tf.nn.sigmoid,
            w_init=tf.initializers.random_uniform(-0.003, 0.003),
            b_init=tf.initializers.random_uniform(
                -0.003, 0.003)) * self.action_range[0]
        a_angular = model_utils.dense_layer(
            hidden_2,
            1,
            'a_angular',
            activation=tf.nn.tanh,
            w_init=tf.initializers.random_uniform(-0.003, 0.003),
            b_init=tf.initializers.random_uniform(
                -0.003, 0.003)) * self.action_range[1]
        pred_action = tf.concat([a_linear, a_angular], axis=1)

        return pred_action