Exemple #1
0
def test_launch_visualizer_canceled(tmpdir):
    agent = MockAgent()
    gymlike_env = MagicMock()
    action_meanings = {0: 'hoge', 1: 'fuga'}
    os.makedirs(os.path.join(tmpdir, 'log_space'))
    with change_execution_dir(tmpdir):
        with patch('chainerrl_visualizer.launcher.input', side_effect='n'), \
                patch('chainerrl_visualizer.launcher.inspect_agent') as inspect_agent:
            launch_visualizer(agent, gymlike_env, action_meanings)
            inspect_agent.assert_not_called()
Exemple #2
0
def test_launch_visualizer(tmpdir, outs):
    agent = MockAgent()
    agent.model = MagicMock(side_effect=lambda *args: outs)
    gymlike_env = MagicMock(spec=gym.Env)
    action_meanings = {0: 'hoge', 1: 'fuga'}

    # This assertion checks the instances is called correctly.
    # In the target luncher function, the instance is called from forked process internally,
    # So cannot use `assert_called` and shared values.
    # It's not smart, and address with touch file to check called or not.
    websrv_called_touch = os.path.join(tmpdir, 'websrv_called.log')
    worker_called_touch = os.path.join(tmpdir, 'worker_called.log')

    def assert_server_called(*args):
        assert len(args) == 12
        assert id(args[0]) == id(agent)
        assert id(args[1]) == id(gymlike_env)
        open(websrv_called_touch, 'w').close()

    web_server = MagicMock(side_effect=assert_server_called)

    def assert_worker_called(*args):
        assert len(args) == 6
        assert id(args[0]) == id(agent)
        assert id(args[1]) == id(gymlike_env)
        open(worker_called_touch, 'w').close()

    job_worker = MagicMock(side_effect=assert_worker_called)

    webbrowser = MagicMock()
    webbrowser.open_new_tab = MagicMock()

    with change_execution_dir(tmpdir):
        with patch('chainerrl_visualizer.launcher.modify_gym_env_render') as modify_gymenv, \
                patch('chainerrl_visualizer.launcher.web_server', web_server), \
                patch('chainerrl_visualizer.launcher.job_worker', job_worker), \
                patch('chainerrl_visualizer.launcher.webbrowser', webbrowser):
            launch_visualizer(agent, gymlike_env, action_meanings)
            modify_gymenv.assert_called_once()
            assert os.path.exists(websrv_called_touch)
            assert os.path.exists(worker_called_touch)
Exemple #3
0
    buy_sell_num_flag = [
        1.0, 0.0, abs(buy_sell_count)
    ] if buy_sell_count >= 1 else [0.0, 1.0, abs(buy_sell_count)]
    agent.stop_episode_and_train(X_train[-1] + buy_sell_num_flag, reward,
                                 True)  #エピソード(価格データの最初から最後までを辿ること)を一旦停止
    #強化学習の初期では少ない手数料で、学習後半には手数料を少しずつ増やしていく

print("========Training END============")
print("Passは" + str(pass_count) + "回")
print("終わった後のbuy_sell_count:" + str(buy_sell_count) +
      ("回買いの取引が多い" if buy_sell_count > 0 else "回売りの取引が多い"))
print("Initial MONEY" + str(first_total_money))
print("FINAL MONEY:" + str(total_money))
# Save an agent to the 'agent' directory
agent.save('chainerRLAgentFinal-Dense')
'''
ACTION_MEANINGS = {
    0: 'a',
    1: 'b',
    2: 'c'
}

launch_visualizer(
    agent,  # required
    env,  # required
    ACTION_MEANINGS,  # required
    port=5002,  # optional (default: 5002)
    log_dir='log_space',  # optional (default: 'log_space')
    raw_image_input=False,  # optional (default: False)
    contains_rnn=False,  # optional (default: False)
)
Exemple #4
0
    target_update_interval=10**4,
    update_interval=4,
    batch_accumulator="mean",
    phi=phi,
)

agent.load("parameters")

ACTION_MEANINGS = {
    0: "NOOP",
    1: "FIRE",
    2: "UP",
    3: "RIGHT",
    4: "LEFT",
    5: "DOWN",
    6: "UPRIGHT",
    7: "UPLEFT",
    8: "DOWNRIGHT",
    9: "DOWNLEFT",
    10: "UPFIRE",
    11: "RIGHTFIRE",
    12: "LEFTFIRE",
    13: "DOWNFIRE",
    14: "UPRIGHTFIRE",
    15: "UPLEFTFIRE",
    16: "DOWNRIGHTFIRE",
    17: "DOWNLEFTFIRE",
}

launch_visualizer(agent, env, ACTION_MEANINGS, raw_image_input=True)
Exemple #5
0
action_space = env.action_space

model = A3CFFGaussian(obs_space.low.size,
                      action_space,
                      bound_mean=False,
                      normalize_obs=False)
opt = chainer.optimizers.Adam(alpha=3e-4, eps=1e-5)
opt.setup(model)

agent = PPO(model,
            opt,
            gpu=-1,
            phi=phi,
            update_interval=2048,
            minibatch_size=64,
            epochs=10,
            clip_eps_vf=None,
            entropy_coef=0.0,
            standardize_advantages=False)

agent.load("parameters")

ACTION_MEANINGS = {
    0: 'Hip1(Torque/Velocity)',
    1: 'Knee1(Torque/Velocity)',
    2: 'Hip2(Torque/Velocity)',
    3: 'Knee2(Torque/Velocity)',
}

launch_visualizer(agent, env, ACTION_MEANINGS)