def _build_online_policy(self):
     policy = policies.BCQPolicy(
         a_network=self._agent_module.p_net,
         q_network=self._agent_module.q_nets[0][0],
         b_network=self._agent_module.b_net,
         n=self._n_action_samples,
     )
     return policy
 def _build_test_policies(self):
     policy = policies.BCQPolicy(
         a_network=self._agent_module.p_net,
         q_network=self._agent_module.q_nets[0][0],
         b_network=self._agent_module.b_net,
         n=self._n_action_samples,
     )
     self._test_policies['main'] = policy