Ejemplo n.º 1
0
 def predict(self, obs):
     real_obs = layers.slice(obs, axes=[1], starts=[0], ends=[self.obs_dim - self.vel_obs_dim])
     vel_obs = layers.slice(obs, axes=[1], starts=[-self.vel_obs_dim], ends=[self.obs_dim])
     hid0 = self.fc0(real_obs)
     hid1 = self.fc1(hid0)
     vel_hid0 = self.vel_fc0(vel_obs)
     vel_hid1 = self.vel_fc1(vel_hid0)
     concat = layers.concat([hid1, vel_hid1], axis=1)
     hid2 = self.fc2(concat)
     means = self.fc3(hid2) 
     return means
Ejemplo n.º 2
0
 def predict(self, obs, action):
     real_obs = layers.slice(obs, axes=[1], starts=[0], ends=[self.obs_dim - self.vel_obs_dim])
     vel_obs = layers.slice(obs, axes=[1], starts=[-self.vel_obs_dim], ends=[self.obs_dim])
     hid0 = self.fc0(real_obs)
     hid1 = self.fc1(hid0)
     vel_hid0 = self.vel_fc0(vel_obs)
     vel_hid1 = self.vel_fc1(vel_hid0)
     a1 = self.act_fc0(action)
     concat = layers.concat([hid1, a1, vel_hid1], axis=1)
     hid2 = self.fc2(concat)
     V = self.fc3(hid2)
     V = layers.squeeze(V, axes=[1])
     return V
Ejemplo n.º 3
0
    def dynamic_rnn(self, item_fc, h_0, output_type=None, double_type=None, double_id=None):
        drnn = fluid.layers.DynamicRNN()
        pos = fluid_sequence_get_pos(item_fc)
        with drnn.block():
            cur_item_fc = drnn.step_input(item_fc)
            cur_h_0 = drnn.memory(init=h_0, need_reorder=True)

            cur_item_fc = layers.lod_reset(cur_item_fc, cur_h_0)
            next_h_0 = self.simple_step_rnn(cur_item_fc, h_0=cur_h_0)

            if output_type == 'c_Q':
                Q = self.out_Q_fc2_op(self.out_Q_fc1_op(next_h_0))
                drnn.output(Q)

            elif output_type in ['max_Q', 'double_Q']:
                # batch_size = 2
                # item_fc: lod = [0,4,7]
                # cur_h_0: lod = [0,1,2]
                item_fc = drnn.static_input(item_fc)
                pos = drnn.static_input(pos)
                cur_step = drnn.memory(shape=[1], dtype='int64', value=0)

                expand_h_0 = layers.sequence_expand(cur_h_0, item_fc)               # lod = [0,1,2,3,4,5,6,7]
                new_item_fc = layers.lod_reset(item_fc, expand_h_0)                 # lod = [0,1,2,3,4,5,6,7]
                next_expand_h_0 = self.simple_step_rnn(new_item_fc, expand_h_0)     # lod = [0,1,2,3,4,5,6,7]
                next_expand_h_0 = layers.lod_reset(next_expand_h_0, item_fc)        # lod = [0,4,7]

                expand_Q = self.out_Q_fc2_op(self.out_Q_fc1_op(next_expand_h_0))
                cur_step_id = layers.slice(cur_step, axes=[0, 1], starts=[0, 0], ends=[1, 1])
                mask = layers.cast(pos >= cur_step_id, 'float32')
                expand_Q = expand_Q * mask

                if output_type == 'max_Q':
                    max_Q = layers.sequence_pool(expand_Q, 'max')                       # lod = [0,1,2]
                    drnn.output(max_Q)
                elif output_type == 'double_Q':
                    if double_type == 'max_id':
                        max_id = self.eps_greedy_sampling(expand_Q, mask, eps=0)
                        drnn.output(max_id)
                    elif double_type == 'double_Q':
                        cur_double_id = drnn.step_input(double_id)

                        double_Q = fluid_sequence_index(expand_Q, cur_double_id)
                        drnn.output(double_Q)

                # update
                next_step = cur_step + 1
                drnn.update_memory(cur_step, next_step)

            elif output_type == 'hidden':
                drnn.output(next_h_0)                

            else:
                raise NotImplementedError(output_type)

            # update
            drnn.update_memory(cur_h_0, next_h_0)

        drnn_output = drnn()
        return drnn_output
Ejemplo n.º 4
0
    def value(self, obs, action):
        real_obs = layers.slice(
            obs, axes=[1], starts=[0], ends=[-self.vel_obs_dim])
        # target related fetures
        vel_obs = layers.slice(
            obs, axes=[1], starts=[-self.vel_obs_dim], ends=[self.obs_dim])

        hid0 = self.fc0(real_obs)
        hid1 = self.fc1(hid0)
        vel_hid0 = self.vel_fc0(vel_obs)
        vel_hid1 = self.vel_fc1(vel_hid0)
        a1 = self.act_fc0(action)
        concat = layers.concat([hid1, a1, vel_hid1], axis=1)
        hid2 = self.fc2(concat)
        Q = self.fc3(hid2)
        Q = layers.squeeze(Q, axes=[1])
        return Q
Ejemplo n.º 5
0
 def sampling_rnn_forward(self, independent_item_fc, independent_hidden,
                          independent_pos_embed):
     gru_input = self.item_gru_fc_op(
         layers.concat([independent_item_fc, independent_pos_embed], 1))
     item_gru = self.item_gru_op(gru_input, h_0=independent_hidden)
     click_prob = self.out_fc2_op(self.out_fc1_op(item_gru))
     scores = layers.slice(click_prob, axes=[1], starts=[1], ends=[2])
     return item_gru, scores
Ejemplo n.º 6
0
 def sampling_rnn_forward(self, independent_item_fc, independent_hidden,
                          independent_pos_embed):
     item_concat = layers.concat(
         [independent_item_fc, independent_pos_embed, independent_hidden],
         1)
     item_concat_fc = self.item_concat_fc_op(item_concat)
     click_prob = self.out_fc2_op(self.out_fc1_op(item_concat_fc))
     scores = layers.slice(click_prob, axes=[1], starts=[1], ends=[2])
     return independent_hidden, scores
Ejemplo n.º 7
0
    def train_rnn(self,
                  item_fc,
                  atten_item_fc,
                  h_0,
                  pos,
                  pos_embed,
                  output_type=''):
        shifted_item_fc = fluid_sequence_advance(item_fc, OOV=0)
        drnn = fluid.layers.DynamicRNN()
        with drnn.block():
            cur_item_fc = drnn.step_input(shifted_item_fc)
            cur_pos_embed = drnn.step_input(pos_embed)
            cur_h_0 = drnn.memory(init=h_0, need_reorder=True)

            # step_input will remove lod info
            cur_item_fc = layers.lod_reset(cur_item_fc, cur_h_0)
            cur_pos_embed = layers.lod_reset(cur_pos_embed, cur_h_0)

            next_h_0, hidden_fc = self.sampling_rnn_forward(
                cur_item_fc, cur_h_0, cur_pos_embed)

            if output_type == 'c_Q':
                cur_atten_item_fc = drnn.step_input(atten_item_fc)
                cur_atten_item_fc = layers.lod_reset(cur_atten_item_fc,
                                                     cur_h_0)

                Q = layers.reduce_sum(hidden_fc * cur_atten_item_fc,
                                      dim=1,
                                      keep_dim=True)
                drnn.output(Q)

            elif output_type == 'max_Q':
                cur_pos = drnn.step_input(pos)
                pos = drnn.static_input(pos)
                atten_item_fc = drnn.static_input(atten_item_fc)

                expand_Q = self._dot_attention(hidden_fc, atten_item_fc)

                cur_step_id = layers.slice(cur_pos,
                                           axes=[0, 1],
                                           starts=[0, 0],
                                           ends=[1, 1])
                mask = layers.cast(pos >= cur_step_id, 'float32')
                expand_Q = expand_Q * mask
                max_Q = layers.sequence_pool(expand_Q, 'max')
                drnn.output(max_Q)

            else:
                raise NotImplementedError(output_type)

            # update
            drnn.update_memory(cur_h_0, next_h_0)

        drnn_output = drnn()
        return drnn_output
Ejemplo n.º 8
0
    def train_rnn(self, item_fc, h_0, pos, pos_embed, output_type=''):
        drnn = fluid.layers.DynamicRNN()
        with drnn.block():
            cur_item_fc = drnn.step_input(item_fc)
            cur_pos_embed = drnn.step_input(pos_embed)
            cur_h_0 = drnn.memory(init=h_0, need_reorder=True)

            # step_input will remove lod info
            cur_item_fc = layers.lod_reset(cur_item_fc, cur_h_0)
            cur_pos_embed = layers.lod_reset(cur_pos_embed, cur_h_0)

            next_h_0, Q = self.sampling_rnn_forward(cur_item_fc, cur_h_0,
                                                    cur_pos_embed)

            if output_type == 'c_Q':
                drnn.output(Q)

            elif output_type == 'max_Q':
                # e.g. batch_size = 2
                # cur_h_0: lod = [0,1,2]
                cur_pos = drnn.step_input(pos)
                pos = drnn.static_input(pos)  # lod = [0,4,7]
                item_fc = drnn.static_input(item_fc)  # lod = [0,4,7]

                # expand
                expand_h_0 = layers.sequence_expand(
                    cur_h_0, item_fc)  # lod = [0,1,2,3,4,5,6,7]
                expand_pos_embed = layers.sequence_expand(
                    cur_pos_embed, item_fc)  # lod = [0,1,2,3,4,5,6,7]
                expand_item_fc = layers.lod_reset(item_fc, expand_h_0)
                # forward
                _, expand_scores = self.sampling_rnn_forward(
                    expand_item_fc, expand_h_0, expand_pos_embed)
                # reset result lod
                expand_Q = layers.lod_reset(expand_scores,
                                            item_fc)  # lod = [0,4,7]

                cur_step_id = layers.slice(cur_pos,
                                           axes=[0, 1],
                                           starts=[0, 0],
                                           ends=[1, 1])
                mask = layers.cast(pos >= cur_step_id, 'float32')
                expand_Q = expand_Q * mask
                max_Q = layers.sequence_pool(expand_Q, 'max')  # lod = [0,1,2]
                drnn.output(max_Q)

            else:
                raise NotImplementedError(output_type)

            # update
            drnn.update_memory(cur_h_0, next_h_0)

        drnn_output = drnn()
        return drnn_output