Path(save_path).mkdir(exist_ok=True) run_number = 0 for p in Path(save_path).iterdir(): if p.is_dir() and p.name.isnumeric(): if int(p.name) > run_number: run_number = int(str(p.name)) run_number += 1 save_path += ('/' + str(run_number)) Path(save_path).mkdir(exist_ok=True) with open(save_path + '/description.txt', mode='w') as f: f.write(description) agent = TD3Agent(env, save_path=save_path, critic_lr=1e-3, actor_lr=1e-3).to(device) # agent=TD3Agent( # env, # save_path="./evals/td3_example", # critic_lr=1e-3, # actor_lr=1e-3 # ).to(device) # agent.train(env,100000,train_freq=1,batch_size=100,policy_noise=0.1,policy_noise_clip=.25,gamma=.99, policy_freq=2, tau=0.005,warmup_steps=10000,buffer_size=50000, exp_noise=.1,eval_freq=1, render=True, eval_episodes=1) agent.train(env, 500, train_freq=1, batch_size=800, policy_noise=0.1, policy_noise_clip=.25,
with open(file_path_txt, 'w') as f: f.write('agent_params\n') for k in agent_params.keys(): f.write('\t' + k + ' = ' + str(agent_params[k]) + '\n') f.write('env_params\n') for k in sandbox_env.env_params.keys(): f.write('\t' + k + ' = ' + str(sandbox_env.env_params[k]) + '\n') # device="cuda:0" if torch.cuda.is_available() else "cpu" # torch.cuda.set_device(0) device= "cpu" env=wrap_openai_gym(sandbox_env.App(always_render=False)) agent=TD3Agent( env, save_path=agent_params['save_path'], critic_lr=agent_params['critic_lr'], actor_lr=agent_params['actor_lr'] ).to(device) if LOAD_AGENT_FROM is not None: with open(LOAD_AGENT_FROM, 'rb') as f: state_dict = pickle.load(f) agent.load_state_dict(state_dict) # agent=TD3Agent( # env, # save_path="./evals/td3_example", # critic_lr=1e-3, # actor_lr=1e-3 # ).to(device)
# from pyforce.nn import default_network_components from pyforce.agents import TD3Agent # import gym import torch import sandbox_env # from pyforce import agents # device="cuda:0" if torch.cuda.is_available() else "cpu" # # torch.cuda.set_device(0) device = "cpu" # print(device) env = wrap_openai_gym(sandbox_env.App(always_render=True, verbose=False)) agent = TD3Agent(env, save_path="./evals/td3_new", critic_lr=1e-3, actor_lr=1e-3).to(device) # agent=TD3Agent( # env, # save_path="./evals/td3_example", # critic_lr=1e-3, # actor_lr=1e-3 # ).to(device) # agent.train(env,100000,train_freq=1,batch_size=100,policy_noise=0.1,policy_noise_clip=.25,gamma=.99, policy_freq=2, tau=0.005,warmup_steps=10000,buffer_size=50000, exp_noise=.1,eval_freq=1, render=True, eval_episodes=1) agent.train(env, 100000, train_freq=1, batch_size=800, policy_noise=0.1,