Python MADDPG.choose_action 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: maddpg

클래스/타입: MADDPG

메소드/함수: choose_action

hotexamples.com에서의 예제들: 2

Python MADDPG.choose_action - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 maddpg.MADDPG.choose_action에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

MADDPG(30)

act(26)

update_targets(12)

update(9)

reset(5)

step(4)

learn(4)

save(4)

test(3)

choose_action(2)

run(2)

update_policy(2)

train(2)

store_transition(1)

take_actions(1)

select_action_test(1)

select_action(1)

save_weights(1)

save_model(1)

save_checkpoint(1)

actors_target(1)

reset_ounoise(1)

actor_optimizer(1)

load_model(1)

load_from_file(1)

load_critic(1)

load_checkpoint(1)

load_actor(1)

load(1)

init_from_save(1)

init_from_env(1)

critics_target(1)

critic_optimizer(1)

create_agents(1)

load_weights(1)

예제 #1

파일 보기

def update():
    if ALGORITHM == 'maddpg':
        ddpg = MADDPG(avs.n_actions, avs.n_features, 1, 'maddpg model',
                      RETRAIN)
    elif ALGORITHM == 'ddpg':
        ddpg = DDPG(avs.n_actions, avs.n_features, 1, 'ddpg model', RETRAIN)
    else:
        ddpg = DDPG(avs.n_actions, avs.n_features, 1, 'ddpg model', RETRAIN)
    t1 = time.time()
    rewards1 = 0
    rewards2 = 0
    var = VAR
    collision = 0
    avgreward1 = []
    avgreward2 = []
    collision_percentage = []
    for i in range(MAX_EPISODES):
        s1, s2 = avs.reset()
        ep_reward1 = 0
        ep_reward2 = 0
        if i % 100000 == 0 and i > IMITATION_EPISODE:
            plot(avgreward1, avgreward2, collision_percentage, i)
        for j in range(MAX_EP_STEPS):
            if RENDER:
                avs.render()

            # Add exploration noise
            if i < IMITATION_EPISODE or i % 4 == 0:
                a1 = imitation(avs.agent1, avs.agent2, avs.target1)
                a2 = imitation(avs.agent2, avs.agent1, avs.target2)
            else:
                # add randomness to action selection for exploration
                a1 = ddpg.choose_action(s1)
                a1 = [
                    np.clip(np.random.normal(a1[0], var), -1, 1),
                    np.clip(np.random.normal(a1[1], var), -1, 1)
                ]
                a2 = ddpg.choose_action(s2)
                a2 = [
                    np.clip(np.random.normal(a2[0], var), -1, 1),
                    np.clip(np.random.normal(a2[1], var), -1, 1)
                ]
                # a2 = imitation(avs.agent2, avs.agent1, avs.target2)

            if DEBUG:
                time.sleep(0.1)
            s_1, r1, s_2, r2, done, info = avs.step(a1, a2)
            if ALGORITHM == 'ddpg':
                ddpg.store_transition(s1, a1, r1, s_1)
                ddpg.store_transition(s2, a2, r2, s_2)
            else:
                ddpg.store_transition(s1, s2, a1, a2, r1, s_1, s_2)
                ddpg.store_transition(s2, s1, a2, a1, r2, s_2, s_1)

            s1 = s_1
            s2 = s_2
            ep_reward1 += r1
            ep_reward2 += r2

            if j == MAX_EP_STEPS - 1 or done:
                print("pt:", ddpg.pointer)
                print('Episode:', i,
                      'Step:', j, ' Reward: %i' % int(ep_reward1),
                      int(ep_reward2), 'Explore: %.2f' % var)

                if i >= IMITATION_EPISODE:
                    rewards1 += ep_reward1
                    rewards2 += ep_reward2
                    if r1 < -100:
                        collision += 1
                    if (i + 1) % 100 == 0:
                        avgreward1.append(rewards1 / 100)
                        avgreward2.append(rewards2 / 100)
                        collision_percentage.append(collision)
                        rewards1 = 0
                        rewards2 = 0
                        collision = 0
                break
        if ddpg.pointer > MEMORY_CAPACITY:
            ddpg.learn()
            ddpg.learn()
            if var > MIN_VAR and i > IMITATION_EPISODE:
                var *= DECAY  # decay the action randomness
        if i % 4 != 0 and ep_reward1 > 100 and ep_reward2 > 100 and i > IMITATION_EPISODE:
            ddpg.save(i)
    print('Running time: ', time.time() - t1)

예제 #2

파일 보기

파일: main.py 프로젝트: fb1n15/maddpg

    evaluate = False
    best_score = 0

    if evaluate:
        maddpg_agents.load_checkpoint()

    for i in range(N_GAMES):
        obs = env.reset()
        score = 0
        done = [False] * n_agents
        episode_step = 0
        while not any(done):
            if evaluate:
                env.render()
                #time.sleep(0.1) # to slow down the action for the video
            actions = maddpg_agents.choose_action(obs)
            obs_, reward, done, info = env.step(actions)

            state = obs_list_to_state_vector(obs)
            state_ = obs_list_to_state_vector(obs_)

            if episode_step >= MAX_STEPS:
                done = [True] * n_agents

            memory.store_transition(obs, state, actions, reward, obs_, state_,
                                    done)

            if total_steps % 100 == 0 and not evaluate:
                maddpg_agents.learn(memory)

            obs = obs_