def test_launch_visualizer_canceled(tmpdir): agent = MockAgent() gymlike_env = MagicMock() action_meanings = {0: 'hoge', 1: 'fuga'} os.makedirs(os.path.join(tmpdir, 'log_space')) with change_execution_dir(tmpdir): with patch('chainerrl_visualizer.launcher.input', side_effect='n'), \ patch('chainerrl_visualizer.launcher.inspect_agent') as inspect_agent: launch_visualizer(agent, gymlike_env, action_meanings) inspect_agent.assert_not_called()
def test_launch_visualizer(tmpdir, outs): agent = MockAgent() agent.model = MagicMock(side_effect=lambda *args: outs) gymlike_env = MagicMock(spec=gym.Env) action_meanings = {0: 'hoge', 1: 'fuga'} # This assertion checks the instances is called correctly. # In the target luncher function, the instance is called from forked process internally, # So cannot use `assert_called` and shared values. # It's not smart, and address with touch file to check called or not. websrv_called_touch = os.path.join(tmpdir, 'websrv_called.log') worker_called_touch = os.path.join(tmpdir, 'worker_called.log') def assert_server_called(*args): assert len(args) == 12 assert id(args[0]) == id(agent) assert id(args[1]) == id(gymlike_env) open(websrv_called_touch, 'w').close() web_server = MagicMock(side_effect=assert_server_called) def assert_worker_called(*args): assert len(args) == 6 assert id(args[0]) == id(agent) assert id(args[1]) == id(gymlike_env) open(worker_called_touch, 'w').close() job_worker = MagicMock(side_effect=assert_worker_called) webbrowser = MagicMock() webbrowser.open_new_tab = MagicMock() with change_execution_dir(tmpdir): with patch('chainerrl_visualizer.launcher.modify_gym_env_render') as modify_gymenv, \ patch('chainerrl_visualizer.launcher.web_server', web_server), \ patch('chainerrl_visualizer.launcher.job_worker', job_worker), \ patch('chainerrl_visualizer.launcher.webbrowser', webbrowser): launch_visualizer(agent, gymlike_env, action_meanings) modify_gymenv.assert_called_once() assert os.path.exists(websrv_called_touch) assert os.path.exists(worker_called_touch)
buy_sell_num_flag = [ 1.0, 0.0, abs(buy_sell_count) ] if buy_sell_count >= 1 else [0.0, 1.0, abs(buy_sell_count)] agent.stop_episode_and_train(X_train[-1] + buy_sell_num_flag, reward, True) #エピソード(価格データの最初から最後までを辿ること)を一旦停止 #強化学習の初期では少ない手数料で、学習後半には手数料を少しずつ増やしていく print("========Training END============") print("Passは" + str(pass_count) + "回") print("終わった後のbuy_sell_count:" + str(buy_sell_count) + ("回買いの取引が多い" if buy_sell_count > 0 else "回売りの取引が多い")) print("Initial MONEY" + str(first_total_money)) print("FINAL MONEY:" + str(total_money)) # Save an agent to the 'agent' directory agent.save('chainerRLAgentFinal-Dense') ''' ACTION_MEANINGS = { 0: 'a', 1: 'b', 2: 'c' } launch_visualizer( agent, # required env, # required ACTION_MEANINGS, # required port=5002, # optional (default: 5002) log_dir='log_space', # optional (default: 'log_space') raw_image_input=False, # optional (default: False) contains_rnn=False, # optional (default: False) )
target_update_interval=10**4, update_interval=4, batch_accumulator="mean", phi=phi, ) agent.load("parameters") ACTION_MEANINGS = { 0: "NOOP", 1: "FIRE", 2: "UP", 3: "RIGHT", 4: "LEFT", 5: "DOWN", 6: "UPRIGHT", 7: "UPLEFT", 8: "DOWNRIGHT", 9: "DOWNLEFT", 10: "UPFIRE", 11: "RIGHTFIRE", 12: "LEFTFIRE", 13: "DOWNFIRE", 14: "UPRIGHTFIRE", 15: "UPLEFTFIRE", 16: "DOWNRIGHTFIRE", 17: "DOWNLEFTFIRE", } launch_visualizer(agent, env, ACTION_MEANINGS, raw_image_input=True)
action_space = env.action_space model = A3CFFGaussian(obs_space.low.size, action_space, bound_mean=False, normalize_obs=False) opt = chainer.optimizers.Adam(alpha=3e-4, eps=1e-5) opt.setup(model) agent = PPO(model, opt, gpu=-1, phi=phi, update_interval=2048, minibatch_size=64, epochs=10, clip_eps_vf=None, entropy_coef=0.0, standardize_advantages=False) agent.load("parameters") ACTION_MEANINGS = { 0: 'Hip1(Torque/Velocity)', 1: 'Knee1(Torque/Velocity)', 2: 'Hip2(Torque/Velocity)', 3: 'Knee2(Torque/Velocity)', } launch_visualizer(agent, env, ACTION_MEANINGS)