return 0
    elif action == 1:
        return 2
    elif action == 2:
        return 3
    elif action == 3:
        return 3


if __name__ == "__main__":
    env = gym.make('Breakout-ramDeterministic-v4')
    env = wrappers.Monitor(env, "./results", force=True)
    state_size = 128
    action_size = 3

    agent = DoubleDQNAgent(state_size, action_size)
    agent.load_model()
    agent.epsilon = -1  # Q value에 의해서만 움직이게끔.
    agent.render = True
    scores, episodes = [], []
    random_success_cnt = 0
    model_success_cnt = 0
    # 랜덤액션 진행시
    for e in range(EPISODES):
        done = False
        score = 0
        life = 5
        env.reset()
        for i in range(5):
            env.step(1)  # 시작 action.
Beispiel #2
0
def change_action(action):
    if action == 0:
        return 0
    elif action == 1:
        return 2
    elif action == 2:
        return 3


if __name__ == "__main__":
    env = gym.make(
        'Breakout-ramDeterministic-v4')  #자동으로 frameskip해주는 더 간단한 env.
    state_size = 128
    action_size = 3  # start action 제외.

    agent = DoubleDQNAgent(state_size, action_size)
    agent.load_model()  #@@@@@@@@@@@@@@@@@@@@모델 로드하기.
    #agent.render = True
    scores, episodes, avg_q_max_record = [], [], []

    for e in range(EPISODES):
        done = False
        score = 0
        epi_step = 0
        life = 5
        agent.avg_q_max = 0
        env.reset()
        for i in range(5):  # 시작 action 1을 한번만 실행해주면 자주 무시되는듯함. 정확한 이유 모름.
            state, _, _, _ = env.step(1)  # 시작 action.
        state = np.reshape(state,
                           [1, 128])  # (1,128) => model의 메소드에는 이 형식으로 넣어야함.
# 0,1,2 =>0,2,3 action으로 치환해주기.
def change_action(action):
    if action == 0:
        return 0
    elif action == 1:
        return 2
    elif action == 2:
        return 3


if __name__ == "__main__":
    env = gym.make('Breakout-ram-v0')
    state_size = 128
    action_size = 3  # start action 제외.

    agent = DoubleDQNAgent(state_size, action_size)
    #agent.load_model() #@@@@@@@@@@@@@@@@@@@@모델 로드하기.
    scores, episodes, avg_q_max_record = [], [], []

    for e in range(EPISODES):
        done = False
        score = 0
        epi_step = 0
        life = 5
        agent.avg_q_max = 0
        env.reset()
        for i in range(5):  # 시작 action 1을 한번만 실행해주면 자주 무시되는듯함. 정확한 이유 모름.
            state, _, _, _ = env.step(1)  # 시작 action.
        state = np.reshape(state,
                           [1, 128])  # (1,128) => model의 메소드에는 이 형식으로 넣어야함.