Python LinearEpsilonGreedyExplorer 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: malmopy.agent

hotexamples.com에서의 예제들: 2

Python LinearEpsilonGreedyExplorer - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 malmopy.agent.LinearEpsilonGreedyExplorer에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

LinearEpsilonGreedyExplorer(2)

자주 사용되는 메소드들

LinearEpsilonGreedyExplorer (2)

예제 #1

파일 보기

def agent_factory(name, role, clients, backend, device, max_epochs, logdir,
                  visualizer):

    assert len(clients) >= 2, 'Not enough clients (need at least 2)'
    clients = parse_clients_args(clients)

    if role == 0:

        builder = PigChaseSymbolicStateBuilder()
        env = PigChaseEnvironment(clients,
                                  builder,
                                  role=role,
                                  randomize_positions=True)
        agent = PigChaseChallengeAgent(name)
        if type(agent.current_agent) == RandomAgent:
            agent_type = PigChaseEnvironment.AGENT_TYPE_1
        else:
            agent_type = PigChaseEnvironment.AGENT_TYPE_2

        obs = env.reset(agent_type)
        reward = 0
        agent_done = False

        while True:
            if env.done:
                if type(agent.current_agent) == RandomAgent:
                    agent_type = PigChaseEnvironment.AGENT_TYPE_1
                else:
                    agent_type = PigChaseEnvironment.AGENT_TYPE_2

                obs = env.reset(agent_type)
                while obs is None:
                    # this can happen if the episode ended with the first
                    # action of the other agent
                    print('Warning: received obs == None.')
                    obs = env.reset(agent_type)

            # select an action
            action = agent.act(obs, reward, agent_done, is_training=True)
            # take a step
            obs, reward, agent_done = env.do(action)

    else:
        env = PigChaseEnvironment(clients,
                                  MalmoALEStateBuilder(),
                                  role=role,
                                  randomize_positions=True)
        memory = TemporalMemory(100000, (84, 84))

        if backend == 'cntk':
            from malmopy.model.cntk import QNeuralNetwork
            model = QNeuralNetwork((memory.history_length, 84, 84),
                                   env.available_actions, device)
        else:
            from malmopy.model.chainer import QNeuralNetwork, DQNChain
            chain = DQNChain((memory.history_length, 84, 84),
                             env.available_actions)
            target_chain = DQNChain((memory.history_length, 84, 84),
                                    env.available_actions)
            model = QNeuralNetwork(chain, target_chain, device)

        explorer = LinearEpsilonGreedyExplorer(1, 0.1, 1000000)
        agent = PigChaseQLearnerAgent(name,
                                      env.available_actions,
                                      model,
                                      memory,
                                      0.99,
                                      32,
                                      50000,
                                      explorer=explorer,
                                      visualizer=visualizer)

        obs = env.reset()
        reward = 0
        agent_done = False
        viz_rewards = []

        max_training_steps = EPOCH_SIZE * max_epochs
        for step in six.moves.range(1, max_training_steps + 1):

            # check if env needs reset
            if env.done:

                visualize_training(visualizer, step, viz_rewards)
                agent.inject_summaries(step)
                viz_rewards = []

                obs = env.reset()
                while obs is None:
                    # this can happen if the episode ended with the first
                    # action of the other agent
                    print('Warning: received obs == None.')
                    obs = env.reset()

            # select an action
            action = agent.act(obs, reward, agent_done, is_training=True)
            # take a step
            obs, reward, agent_done = env.do(action)
            viz_rewards.append(reward)

            if (step % EPOCH_SIZE) == 0:
                if 'model' in locals():
                    model.save('pig_chase-dqn_%d.model' % (step / EPOCH_SIZE))

예제 #2

파일 보기

from common import ENV_AGENT_NAMES
from evaluation import PigChaseEvaluator
from malmopy.agent import TemporalMemory, LinearEpsilonGreedyExplorer
from malmopy.environment.malmo import MalmoALEStateBuilder
from agent import PigChaseChallengeAgent, PigChaseQLearnerAgent
from malmopy.visualization import ConsoleVisualizer
from malmopy.model.chainer import QNeuralNetwork, ReducedDQNChain



if __name__ == '__main__':
    device = -1
    nb_actions = 3
    visualizer = ConsoleVisualizer()

    clients = [('127.0.0.1', 10000), ('127.0.0.1', 10001)]
    memory = TemporalMemory(100000, (18, 18))
    chain = ReducedDQNChain((memory.history_length, 18, 18), nb_actions)
    target_chain = ReducedDQNChain((memory.history_length, 18, 18), nb_actions)
    model = QNeuralNetwork(chain, target_chain, device)
    explorer = LinearEpsilonGreedyExplorer(0.6, 0.1, 1000000)
    agent = PigChaseQLearnerAgent(ENV_AGENT_NAMES[1], nb_actions,
                                  model, memory, 0.99, 32, 50000,
                                  explorer=explorer, visualizer=visualizer)

    #builder = MalmoALEStateBuilder()
    builder = PigChaseTopDownStateBuilder(True)
    eval = PigChaseEvaluator(clients, agent, agent, builder)
    eval.run()
    eval.save('qlearner_exp', 'qlearner_results.json')