Example #1
0
    def get_trajectory(self, last_pred=None):
        """Get trajectory"""
        # Need copy, when run with explore time > 1,
        # if not, will clear trajectory before sent.
        # trajectory = message(self.trajectory.copy())
        trajectory = message(deepcopy(self.trajectory))
        set_msg_info(trajectory, agent_id=self.id)

        return trajectory
    def get_trajectory(self, last_pred=None):
        for _data_key in ("cur_state", "logit", "action"):
            self.trajectory[_data_key] = np.asarray(self.trajectory[_data_key])

        self.trajectory["action"].astype(np.int32)
        # self.trajectory["cur_state"].astype(np.int32)
        # print(self.trajectory)

        trajectory = message(self.trajectory)
        set_msg_info(trajectory, agent_id=self.id)
        return trajectory
Example #3
0
    def transfer_to_broker(self):
        """Send train data to learner."""
        while True:
            data = self.recv_agent.recv()
            info_cmd = get_msg_info(data, "cmd")

            new_cmd = info_cmd + self.learner_postfix
            set_msg_info(data,
                         broker_id=self.broker_id,
                         explorer_id=self.explorer_id,
                         cmd=new_cmd)

            self.send_broker.send(data)
Example #4
0
    def transfer_to_broker(self):
        """Send train data to learner."""
        while True:
            data = self.recv_agent.recv()

            info_cmd = get_msg_info(data, "cmd")
            # print("info_cmd in explorer: ", info_cmd, data)
            data_type = "buf_reduce" if info_cmd == "buf_reduce" else "data"
            set_msg_info(data,
                         broker_id=self.broker_id,
                         explorer_id=self.explorer_id)

            self.send_broker.send(data, data_type=data_type)
Example #5
0
    def get_trajectory(self, last_pred=None):
        for env_id in range(self.vector_env_size):
            for _data_key in ("cur_state", "logit", "action", "reward", "done",
                              "info"):
                self.trajectory[_data_key].extend(
                    self.sample_vector[env_id][_data_key])

        # merge data into env_num * seq_len
        for _data_key in self.trajectory:
            self.trajectory[_data_key] = np.stack(self.trajectory[_data_key])

        self.trajectory["action"].astype(np.int32)

        trajectory = message(self.trajectory.copy())
        set_msg_info(trajectory, agent_id=self.id)
        return trajectory
Example #6
0
    def predict(self):
        """Predict action."""
        while True:

            start_t0 = time()
            data = self.request_q.recv()
            state = get_msg_data(data)
            self._stats.obs_wait_time += time() - start_t0

            start_t1 = time()
            with self.lock:
                action = self.alg.predict(state)
            self._stats.inference_time += time() - start_t1

            set_msg_info(data, cmd="predict_reply")
            set_msg_data(data, action)

            # logging.debug("msg to explore: ", data)
            self.reply_q.send(data)

            self._stats.iters += 1
            if self._stats.iters > self._report_period:
                _report = self._stats.get()
                self.stats_deliver.send(_report, block=True)
Example #7
0
 def get_trajectory(self):
     trajectory = message(self.trajectory.copy())
     set_msg_info(trajectory, agent_id=self.id)
     return trajectory
Example #8
0
 def get_trajectory(self):
     transition = self.batch.data.transition_data
     transition.update(self._info.copy())  # record win rate within train
     trajectory = message(transition)
     set_msg_info(trajectory, agent_id=self.id)
     return trajectory