コード例 #1
0
    def act(self, state, explore=True):
        state = copy.deepcopy(state)
        state["low_dim_states"] = np.float32(
            np.append(state["low_dim_states"], self.prev_action))
        state["social_vehicles"] = (torch.from_numpy(
            state["social_vehicles"]).unsqueeze(0).to(self.device))
        state["low_dim_states"] = (torch.from_numpy(
            state["low_dim_states"]).unsqueeze(0).to(self.device))

        self.actor.eval()

        action = self.actor(state).cpu().data.numpy().flatten()

        noise = [self.noise[0].sample(), self.noise[1].sample()]
        if explore:
            action[0] += noise[0]
            action[1] += noise[1]

        self.actor.train()
        action_low, action_high = (
            self.action_low.data.cpu().numpy(),
            self.action_high.data.cpu().numpy(),
        )
        action = np.clip(action, action_low, action_high)[0]

        return to_3d_action(action)
コード例 #2
0
    def act(self, state, explore=True):
        self.actor.eval()
        state = self.state_preprocessor(
            state=state,
            normalize=True,
            unsqueeze=True,
            device=self.device,
            social_capacity=self.social_capacity,
            observation_num_lookahead=self.observation_num_lookahead,
            social_vehicle_config=self.social_vehicle_config,
            prev_action=self.prev_action,
        )
        # print(state)
        action = self.actor(state).cpu().data.numpy().flatten()

        noise = [self.noise[0].sample(), self.noise[1].sample()]
        if explore:
            action[0] += noise[0]
            action[1] += noise[1]

        self.actor.train()
        action_low, action_high = (
            self.action_low.data.cpu().numpy(),
            self.action_high.data.cpu().numpy(),
        )
        action = np.clip(action, action_low, action_high)[0]

        return to_3d_action(action)
コード例 #3
0
ファイル: policy.py プロジェクト: huzhejie/SMARTS
    def act(self, state, explore=True):
        state = self.state_preprocessor(
            state=state,
            normalize=True,
            unsqueeze=True,
            device=self.device,
            social_capacity=self.social_capacity,
            observation_num_lookahead=self.observation_num_lookahead,
            social_vehicle_config=self.social_vehicle_config,
            prev_action=self.prev_action,
        )
        with torch.no_grad():
            dist, value = self.ppo_net(state)
        if explore:  # training mode
            action = dist.sample()
            log_prob = dist.log_prob(action)

            self.current_log_prob = log_prob
            self.current_value = value

            action = torch.squeeze(action)
            action = action.data.cpu().numpy()
        else:  # testing mode
            mean = torch.squeeze(dist.loc)
            action = mean.data.cpu().numpy()
        self.step_count += 1
        return to_3d_action(action)
コード例 #4
0
ファイル: policy.py プロジェクト: qyshen815/SMARTS
    def act(self, state, explore=True):
        state = copy.deepcopy(state)
        state["low_dim_states"] = np.float32(
            np.append(state["low_dim_states"], self.prev_action))
        state["social_vehicles"] = (torch.from_numpy(
            state["social_vehicles"]).unsqueeze(0).to(self.device))
        state["low_dim_states"] = (torch.from_numpy(
            state["low_dim_states"]).unsqueeze(0).to(self.device))

        action, _, mean = self.sac_net.sample(state)

        if explore:  # training mode
            action = torch.squeeze(action, 0)
            action = action.detach().cpu().numpy()
        else:  # testing mode
            mean = torch.squeeze(mean, 0)
            action = mean.detach().cpu().numpy()
        return to_3d_action(action)
コード例 #5
0
    def act(self, state, explore=True):
        state = self.state_preprocessor(
            state=state,
            normalize=True,
            unsqueeze=True,
            device=self.device_name,
            social_capacity=self.social_capacity,
            observation_num_lookahead=self.observation_num_lookahead,
            social_vehicle_config=self.social_vehicle_config,
            prev_action=self.prev_action,
        )

        action, _, mean = self.sac_net.sample(state)

        if explore:  # training mode
            action = torch.squeeze(action, 0)
            action = action.detach().cpu().numpy()
        else:  # testing mode
            mean = torch.squeeze(mean, 0)
            action = mean.detach().cpu().numpy()
        return to_3d_action(action)
コード例 #6
0
    def act(self, state, explore=True):
        state["low_dim_states"] = np.float32(
            np.append(state["low_dim_states"], self.prev_action))
        state["social_vehicles"] = (torch.from_numpy(
            state["social_vehicles"]).unsqueeze(0).to(self.device))
        state["low_dim_states"] = (torch.from_numpy(
            state["low_dim_states"]).unsqueeze(0).to(self.device))

        with torch.no_grad():
            dist, value = self.ppo_net(x=state)
        if explore:  # training mode
            action = dist.sample()
            log_prob = dist.log_prob(action)

            self.current_log_prob = log_prob
            self.current_value = value

            action = torch.squeeze(action)
            action = action.data.cpu().numpy()
        else:  # testing mode
            mean = torch.squeeze(dist.loc)
            action = mean.data.cpu().numpy()
        self.step_count += 1
        return to_3d_action(action)