Python ActorCritic 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: Model

클래스/타입: ActorCritic

hotexamples.com에서의 예제들: 4

Python ActorCritic - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 Model.ActorCritic에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

ActorCritic(3)

forward(1)

자주 사용되는 메소드들

ActorCritic (3)

forward (1)

예제 #1

파일 보기

파일: trainFCL.py 프로젝트: Benatti1991/gym-chrono

        env = gym.make(env_id)
        env.seed(seed + rank)
        return env

    set_global_seeds(seed)
    return _init


if __name__ == "__main__":
    envs = SubprocVecEnv([make_env(env_name, i) for i in range(num_envs)])
    env = gym.make(env_name)

    num_inputs = envs.observation_space.shape
    num_outputs = envs.action_space.shape

    model = ActorCritic(num_inputs[0], num_outputs[0]).to(device)
    if os.path.isfile(modelpath):
        model.load_state_dict(torch.load(modelpath))

    ppo = PPO(model=model,
              envs=envs,
              device=device,
              lr=lr,
              modelpath=modelpath)
    if not play_mode:
        ppo.ppo_train(num_steps,
                      mini_batch_size,
                      ppo_epochs,
                      max_frames,
                      max_pol_updates,
                      save_interval,

예제 #2

파일 보기

class Agent:
    def __init__(self, cell_nb, lr=4e-3, nb_blocks=5, gamma=0.99):
        self.cell_nb = cell_nb
        self.gamma = gamma
        self.ActorCritic = ActorCritic(lr, cell_nb**2, nb_blocks)
        self.log_probs = None

    def choose_action(self, state):  #here state is simply the current f_map
        state_tensor = torch.tensor([state]).to(self.ActorCritic.device)

        (mu, sigma), _ = self.ActorCritic.forward(state_tensor)

        actions = np.zeros(self.cell_nb, self.cell_nb)
        log_probs = np.zeros_like(actions)
        for ir, (mu_r, sig_r) in enumerate(zip(mu, sigma)):
            for ic, (mu_c, sig_c) in enumerate(zip(mu_r, sig_r)):
                #mu_c and sig_c are the mu and sigma parameter for the gaussian distribution of the current cell
                sig_c = torch.exp(sig_c)
                dist = torch.distributions.Normal(mu_c, sig_c)
                action = dist.sample()
                log_prob = dist.log_prob(action).to(self.ActorCritic.device)
                actions[ir, ic] = F.sigmoid(action.item(
                ))  #bound the normalized transmit power between 0 and 1
                log_probs[
                    ir, ic] = log_prob  #for later, to calculate the actor loss
        self.log_probs = log_probs
        return actions

    def learn(self, episode):

        self.ActorCritic.optimizer.zero_grad()

        #s is the state, in the most simple case it is the f_map
        f_map = torch.tensor(episode["s"]).to(
            self.ActorCritic.device)  #current f_map
        r = torch.tensor(episode["r"]).to(
            self.ActorCritic.device
        )  #the embeded objective function (sum-rate, capcity, SINR...)
        d = torch.tensor(episode["d"]).to(
            self.ActorCritic.device)  #done, not really necessary
        f_map_ = torch.tensor(episode["s_"]).to(
            self.ActorCritic.device)  #new f_map
        lg_p = torch.tensor(self.log_probs).to(
            self.ActorCritic.device)  #log_probs as given by choose_action

        #get critic values for current and next state
        _, val = self.ActorCritic.forward(f_map)
        _, val_ = self.ActorCritic.forward(f_map_)

        #set the values for the next state to 0 if done
        val_[d] = 0.0

        #compute the delta
        delta = r + self.gamma * val_ - val

        actor_loss = -torch.mean(lg_p.flatten() * delta)
        critic_loss = delta**2

        (actor_loss + critic_loss).backward()
        self.ActorCritic.optimizer.step()

    def compute_loss(self, gains, policy):

        ps = np.array([d.getPowerFromPolicy(policy) for d in self.S.dList()])
        H = gains
        rate = [
            np.log(1 + (H[i, i]**2 * p_ /
                        sum([H[i, j]**2 * p for j, p in enumerate(ps)])))
            for i, p_ in enumerate(ps)
        ]
        return -np.sum(rate)

예제 #3

파일 보기

 def __init__(self, cell_nb, lr=4e-3, nb_blocks=5, gamma=0.99):
     self.cell_nb = cell_nb
     self.gamma = gamma
     self.ActorCritic = ActorCritic(lr, cell_nb**2, nb_blocks)
     self.log_probs = None

예제 #4

파일 보기

        return env

    set_global_seeds(seed)
    return _init


if __name__ == "__main__":

    envs = SubprocVecEnv([make_env(env_name, i) for i in range(num_envs)])
    env = gym.make(env_name)

    img_size = envs.observation_space[0].shape
    sensor_size = envs.observation_space[1].shape
    num_outputs = envs.action_space.shape

    model = ActorCritic([img_size[1], img_size[0]], sensor_size[0],
                        num_outputs[0]).to(device)
    if args.onnx_converter and os.path.isfile(modelpath):
        model.load_state_dict(torch.load(modelpath))

        model.export("gvsets_early_fusion.onnx")
        exit(1)

    if os.path.isfile(modelpath):
        model.load_state_dict(torch.load(modelpath))

    ppo = PPO(model=model,
              envs=envs,
              device=device,
              lr=lr,
              modelpath=modelpath,
              tuple_ob=True)