Ejemplo n.º 1
0
    def _ensemble_predict(self, obs):
        actor_outputs = []
        for i in range(self.ensemble_num):
            actor_outputs.append(self.actors[i].predict(obs))
        batch_actions = layers.concat(actor_outputs, axis=0)
        batch_obs = layers.expand(obs, expand_times=[self.ensemble_num, 1])

        critic_outputs = []
        for i in range(self.ensemble_num):
            critic_output = self.critics[i].predict(batch_obs, batch_actions)
            critic_output = layers.unsqueeze(critic_output, axes=[1])
            critic_outputs.append(critic_output)
        score_matrix = layers.concat(critic_outputs, axis=1)

        # Normalize scores given by each critic
        sum_critic_score = layers.reduce_sum(
            score_matrix, dim=0, keep_dim=True)
        sum_critic_score = layers.expand(
            sum_critic_score, expand_times=[self.ensemble_num, 1])
        norm_score_matrix = score_matrix / sum_critic_score

        actions_mean_score = layers.reduce_mean(
            norm_score_matrix, dim=1, keep_dim=True)
        best_score_id = layers.argmax(actions_mean_score, axis=0)
        best_score_id = layers.cast(best_score_id, dtype='int32')
        ensemble_predict_action = layers.gather(batch_actions, best_score_id)
        ensemble_predict_action = layers.squeeze(
            ensemble_predict_action, axes=[0])
        return ensemble_predict_action
Ejemplo n.º 2
0
    def value(self, obs, act):
        concat = layers.concat([obs, act], axis=1)
        # out = self.fc1(concat)
        # out = self.fc2(out)
        # out = self.fc3(out)
        # out = self.fc4(out)
        # out = self.fc5(out)
        o = self.obs_fc1(obs)
        o = self.obs_fc2(o)
        o = self.obs_fc3(o)

        a = self.act_fc1(act)
        a = self.act_fc2(a)
        a = self.act_fc3(a)

        c = self.total_fc1(concat)
        c = self.total_fc2(c)
        c = self.total_fc3(c)

        out = self.re_fc1(layers.concat([o, a, c], axis=1))
        out = self.re_fc2(out)
        out = self.re_fc3(out)
        out = self.re_fc4(out)

        return layers.squeeze(out, axes=[1])
Ejemplo n.º 3
0
    def value(self, obs, act):
        x = self.fc1(obs)
        concat = layers.concat([x, act], axis=1)
        x = self.fc2(concat)
        Q1 = self.fc3(x)
        Q1 = layers.squeeze(Q1, axes=[1])

        y = self.fc4(obs)
        concat2 = layers.concat([y, act], axis=1)
        Q2 = self.fc5(concat2)
        Q2 = self.fc6(Q2)
        Q2 = layers.squeeze(Q2, axes=[1])
        return Q1, Q2
Ejemplo n.º 4
0
    def value(self, obs, act):
        hid1 = self.fc1(obs)
        concat1 = layers.concat([hid1, act], axis=1)
        Q1 = self.fc2(concat1)
        Q1 = self.fc3(Q1)
        Q1 = layers.squeeze(Q1, axes=[1])

        hid2 = self.fc4(obs)
        concat2 = layers.concat([hid2, act], axis=1)
        Q2 = self.fc5(concat2)
        Q2 = self.fc6(Q2)
        Q2 = layers.squeeze(Q2, axes=[1])
        return Q1, Q2
Ejemplo n.º 5
0
 def value(self, obs, act):
     # 输入 state, action, 输出对应的Q(s,a)
     concat = layers.concat([obs, act], axis=1)
     hid = self.fc1(concat)
     Q = self.fc2(hid)
     Q = layers.squeeze(Q, axes=[1])
     return Q
Ejemplo n.º 6
0
 def value(self, obs_n, act_n):
     inputs = layers.concat(obs_n + act_n, axis=1)
     hid1 = self.fc1(inputs)
     hid2 = self.fc2(hid1)
     Q = self.fc3(hid2)
     Q = layers.squeeze(Q, axes=[1])
     return Q
Ejemplo n.º 7
0
 def value(self, obs, act):
     concat = layers.concat([obs, act], axis=1)
     Q1 = self.q1(concat)
     Q1 = layers.squeeze(Q1, axes=[1])
     Q2 = self.q2(concat)
     Q2 = layers.squeeze(Q2, axes=[1])
     return Q1, Q2
Ejemplo n.º 8
0
    def forward(self, inputs, mode):
        """
        don't use sequence_expand for backward
        """
        is_test = True if (mode in ['test', 'inference']) else False

        # encode
        user_embedding = self._build_embeddings(inputs, self.user_slot_names)
        user_feature = self.user_feature_fc_op(user_embedding)

        # item embed and pos embed
        item_embedding = self._build_embeddings(inputs, self.item_slot_names)
        item_fc = self.item_fc_op(item_embedding)
        pos = fluid_sequence_get_pos(item_fc)
        pos_embed = self.dict_data_embed_op['pos'](pos)
        input_embed = layers.concat([
            item_fc, pos_embed,
            layers.sequence_expand_as(user_feature, item_fc)
        ], 1)

        # transformer
        trans_in = self.input_embed_fc_op(input_embed)
        decoding = self.transformer_decode(is_test, trans_in)
        click_prob = self.output_fc2_op(self.output_fc1_op(decoding))
        return click_prob
Ejemplo n.º 9
0
 def value(self, obs, act):
     x = self.fc1(obs)
     concat = layers.concat([x, act], axis=1)
     x = self.fc2(concat)
     Q = self.fc3(x)
     Q = layers.squeeze(Q, axes=[1])
     return Q
Ejemplo n.º 10
0
    def sampling(self, inputs, sampling_type):
        decode_len = inputs['decode_len']
        user_feature = self.user_encode(inputs)
        item_embedding = self._build_embeddings(inputs, self.item_slot_names)
        item_fc = self.item_fc_op(item_embedding)
        pos = fluid_sequence_get_pos(item_fc)
        pos_embed = self.dict_data_embed_op['pos'](pos)

        if self._candidate_encode:
            cand_encoding = self.candidate_encode(item_fc)
            init_hidden = self.candidate_encode_fc_op(
                layers.concat([user_feature, cand_encoding], 1))
        else:
            init_hidden = user_feature
        eps = inputs['eps'] if sampling_type == 'eps_greedy' else None
        eta = inputs['eta'] if sampling_type == 'softmax' else None
        sampled_id = self.sampling_rnn(item_fc,
                                       h_0=init_hidden,
                                       pos_embed=pos_embed,
                                       forward_func=self.sampling_rnn_forward,
                                       sampling_type=sampling_type,
                                       eps=eps,
                                       eta=eta)
        sampled_id = self._cut_by_decode_len(
            layers.lod_reset(sampled_id, item_fc), decode_len)
        return sampled_id
Ejemplo n.º 11
0
 def value(self, obs, act):
     concat = layers.concat([obs, act], axis=1)
     hidden1 = self.fc1(concat)
     hidden2 = self.fc2(hidden1)
     Q = self.fc3(hidden2)
     Q = layers.squeeze(Q, axes=[1])
     return Q
Ejemplo n.º 12
0
 def Q1(self, obs, act):
     hid1 = self.fc1(obs)
     concat1 = layers.concat([hid1, act], axis=1)
     Q1 = self.fc2(concat1)
     Q1 = self.fc3(Q1)
     Q1 = layers.squeeze(Q1, axes=[1])
     return Q1
Ejemplo n.º 13
0
 def sampling_rnn_forward(self, independent_item_fc, independent_hidden,
                          independent_pos_embed):
     gru_input = self.item_gru_fc_op(
         layers.concat([independent_item_fc, independent_pos_embed], 1))
     item_gru = self.item_gru_op(gru_input, h_0=independent_hidden)
     hidden_fc = self.hidden_fc_op(item_gru)
     return item_gru, hidden_fc
Ejemplo n.º 14
0
 def value(self, obs, act):
     hid1 = self.fc1(obs)
     concat = layers.concat([hid1, act], axis=1)
     hid2 = self.fc2(concat)
     Q = self.fc3(hid2)
     Q = layers.squeeze(Q, axes=[1])
     return Q
Ejemplo n.º 15
0
 def sampling_rnn_forward(self, independent_item_fc, independent_hidden,
                          independent_pos_embed):
     gru_input = self.item_gru_fc_op(
         layers.concat([independent_item_fc, independent_pos_embed], 1))
     item_gru = self.item_gru_op(gru_input, h_0=independent_hidden)
     Q = self.out_Q_fc2_op(self.out_Q_fc1_op(item_gru))
     scores = Q
     return item_gru, scores
Ejemplo n.º 16
0
 def simple_step_rnn(self, item_fc, last_click_embedding, h_0):
     """
     The same as self.train_rnn(item_fc, h_0, output_type='hidden') for a single step
     """
     input_fc = self.item_gru_fc_op(
         layers.concat([item_fc, last_click_embedding], 1))
     next_h_0 = self.item_gru_op(input_fc, h_0=h_0)
     return next_h_0
Ejemplo n.º 17
0
 def sampling_rnn_forward(self, independent_item_fc, independent_hidden,
                          independent_pos_embed):
     gru_input = self.item_gru_fc_op(
         layers.concat([independent_item_fc, independent_pos_embed], 1))
     item_gru = self.item_gru_op(gru_input, h_0=independent_hidden)
     click_prob = self.out_fc2_op(self.out_fc1_op(item_gru))
     scores = layers.slice(click_prob, axes=[1], starts=[1], ends=[2])
     return item_gru, scores
Ejemplo n.º 18
0
 def value(self, obs, act):
     # 因为s和a都是参数,神经网络中对于多向量输入可以使用联级的方法输入
     # 所以我们先把它们拼起来
     concat = layers.concat([obs, act], axis=1)
     hid = self.fc1(concat)
     Q = self.fc2(hid)
     Q = layers.squeeze(Q, axes=[1])
     return Q
Ejemplo n.º 19
0
 def _build_embeddings(self, inputs, list_names):
     list_embed = []
     for name in list_names:
         embed_name = self._get_embed_name(name)
         c_embed = self.dict_data_embed_op[embed_name](inputs[name])
         list_embed.append(c_embed)                              # (batch*num_items, 16)
     concated_embed = layers.concat(input=list_embed, axis=1)    # (batch*num_items, concat_dim)
     concated_embed = layers.softsign(concated_embed)
     return concated_embed
Ejemplo n.º 20
0
 def sampling_rnn_forward(self, independent_item_fc, independent_hidden,
                          independent_pos_embed):
     item_concat = layers.concat(
         [independent_item_fc, independent_pos_embed, independent_hidden],
         1)
     item_concat_fc = self.item_concat_fc_op(item_concat)
     click_prob = self.out_fc2_op(self.out_fc1_op(item_concat_fc))
     scores = layers.slice(click_prob, axes=[1], starts=[1], ends=[2])
     return independent_hidden, scores
Ejemplo n.º 21
0
    def sampling_rnn_forward(self, independent_item_fc, independent_hidden,
                             independent_pos_embed):
        raise NotImplementedError()

        # example:
        gru_input = self.item_gru_fc_op(
            layers.concat([independent_item_fc, independent_pos_embed], 1))
        next_hidden = self.item_gru_op(gru_input, independent_hidden)
        scores = self.out_Q_fc2_op(self.out_Q_fc1_op(next_hidden))
        return next_hidden, scores
    def value(self, obs, act):
        concat = layers.concat([obs, act], axis=1)
        #concat = self.conv2(concat)
        hid = self.fc1(concat)
        Q = self.fc2(hid)
        Q = self.fc3(Q)
        Q = self.fc4(Q)

        Q = layers.squeeze(Q, axes=[1])
        return Q
Ejemplo n.º 23
0
 def predict(self, obs):
     real_obs = layers.slice(obs, axes=[1], starts=[0], ends=[self.obs_dim - self.vel_obs_dim])
     vel_obs = layers.slice(obs, axes=[1], starts=[-self.vel_obs_dim], ends=[self.obs_dim])
     hid0 = self.fc0(real_obs)
     hid1 = self.fc1(hid0)
     vel_hid0 = self.vel_fc0(vel_obs)
     vel_hid1 = self.vel_fc1(vel_hid0)
     concat = layers.concat([hid1, vel_hid1], axis=1)
     hid2 = self.fc2(concat)
     means = self.fc3(hid2) 
     return means
Ejemplo n.º 24
0
 def _build_embeddings(self, inputs, list_names):
     list_embed = []
     for name in list_names:
         embed_name = self._get_embed_name(name)
         c_embed = self.dict_data_embed_op[embed_name](inputs[name])
         if len(c_embed.shape) == 3:                             # squeeze (batch*num_items, None, 16)
             c_embed = layers.reduce_sum(c_embed, dim=1)
         list_embed.append(c_embed)                              # (batch*num_items, 16)
     concated_embed = layers.concat(input=list_embed, axis=1)    # (batch*num_items, concat_dim)
     concated_embed = layers.softsign(concated_embed)
     return concated_embed
Ejemplo n.º 25
0
 def _build_embeddings(self, inputs, list_names):
     list_embed = []
     for name in list_names:
         # message = "%s %d" % (name, self.npz_config['embedding_size'][name])
         # layers.Print(layers.reduce_max(inputs[name]), summarize=32, print_tensor_lod=False, message=message)
         c_embed = self.dict_data_embed_op[name](inputs[name])
         list_embed.append(c_embed)  # (batch*seq_lens, 16)
     concated_embed = layers.concat(input=list_embed,
                                    axis=1)  # (batch*seq_lens, concat_dim)
     concated_embed = layers.softsign(concated_embed)
     return concated_embed
Ejemplo n.º 26
0
    def forward(self, inputs, mode):
        """forward"""
        # encode
        user_embedding = self._build_embeddings(inputs, self.user_slot_names)
        user_feature = self.user_feature_fc_op(user_embedding)
        # item embed + pos embed
        item_embedding = self._build_embeddings(inputs, self.item_slot_names)
        item_fc = self.item_fc_op(item_embedding)
        pos = fluid_sequence_get_pos(item_fc)
        pos_embed = self.dict_data_embed_op['pos'](pos)

        # item gru
        gru_input = self.item_gru_fc_op(layers.concat([item_fc, pos_embed], 1))
        item_gru_forward = self.item_gru_forward_op(gru_input,
                                                    h_0=user_feature)
        item_gru_backward = self.item_gru_backward_op(gru_input,
                                                      h_0=user_feature)
        item_gru = layers.concat([item_gru_forward, item_gru_backward], axis=1)
        click_prob = self.out_click_fc2_op(self.out_click_fc1_op(item_gru))
        return click_prob
Ejemplo n.º 27
0
    def forward(self, inputs):
        """forward"""
        user_feature = self.user_encode(inputs)
        item_embedding = self._build_embeddings(inputs, self.item_slot_names)
        item_fc = self.item_fc_op(item_embedding)
        pos = fluid_sequence_get_pos(item_fc)
        pos_embed = self.dict_data_embed_op['pos'](pos)

        gru_input = self.item_gru_fc_op(layers.concat([item_fc, pos_embed], 1))
        item_gru = self.item_gru_op(gru_input, h_0=user_feature)
        click_prob = self.out_fc2_op(self.out_fc1_op(item_gru))
        return click_prob
Ejemplo n.º 28
0
 def value(self, obs, act):
     # 输入 state, action, 输出对应的Q(s,a)
     ######################################################################
     ######################################################################
     concat = layers.concat([obs,act], axis=1)
     hid0 = self.fc1(concat)
     hid1 = self.fc2(hid0)
     hid2 = self.fc3(hid1)
     Q = layers.squeeze(hid2, axes=[1])
     ######################################################################
     ######################################################################
     return Q
Ejemplo n.º 29
0
 def predict(self, obs, action):
     real_obs = layers.slice(obs, axes=[1], starts=[0], ends=[self.obs_dim - self.vel_obs_dim])
     vel_obs = layers.slice(obs, axes=[1], starts=[-self.vel_obs_dim], ends=[self.obs_dim])
     hid0 = self.fc0(real_obs)
     hid1 = self.fc1(hid0)
     vel_hid0 = self.vel_fc0(vel_obs)
     vel_hid1 = self.vel_fc1(vel_hid0)
     a1 = self.act_fc0(action)
     concat = layers.concat([hid1, a1, vel_hid1], axis=1)
     hid2 = self.fc2(concat)
     V = self.fc3(hid2)
     V = layers.squeeze(V, axes=[1])
     return V
Ejemplo n.º 30
0
    def value(self, hidden, act):
        # 输入 state, action, 输出对应的Q(s,a)

        ######################################################################
        ######################################################################
        #
        # 5. 请组装Q网络
        #
        flatten_obs = layers.flatten(hidden, axis=1)
        concat = layers.concat([flatten_obs, act], axis=1)
        hid = self.fc1(concat)
        Q = self.fc2(hid)
        Q2 = layers.squeeze(Q, axes=[1])
        return Q2