file_path = agent_params['save_path'] + '/params' with open(file_path, 'wb') as f: pickle.dump(parm_list, f) file_path_txt = file_path + '.txt' with open(file_path_txt, 'w') as f: f.write('agent_params\n') for k in agent_params.keys(): f.write('\t' + k + ' = ' + str(agent_params[k]) + '\n') f.write('env_params\n') for k in sandbox_env.env_params.keys(): f.write('\t' + k + ' = ' + str(sandbox_env.env_params[k]) + '\n') # device="cuda:0" if torch.cuda.is_available() else "cpu" # torch.cuda.set_device(0) device= "cpu" env=wrap_openai_gym(sandbox_env.App(always_render=False)) agent=TD3Agent( env, save_path=agent_params['save_path'], critic_lr=agent_params['critic_lr'], actor_lr=agent_params['actor_lr'] ).to(device) if LOAD_AGENT_FROM is not None: with open(LOAD_AGENT_FROM, 'rb') as f: state_dict = pickle.load(f) agent.load_state_dict(state_dict) # agent=TD3Agent( # env,
from pyforce.env import wrap_openai_gym from pyforce.nn import default_network_components from pyforce.agents import A2CAgent import gym import torch device = "cuda:0" if torch.cuda.is_available() else "cpu" env = wrap_openai_gym(gym.make("LunarLanderContinuous-v2")) observation_processor, hidden_layers, action_mapper = default_network_components( env) agent = A2CAgent(observation_processor, hidden_layers, action_mapper, save_path="./evals/a2c_example", value_lr=1e-3, policy_lr=1e-3).to(device) agent.train(env, episodes=1000, train_freq=256, eval_freq=50, render=True, gamma=.99, entropy_coef=.01)
with open(file_path, 'wb') as f: pickle.dump(parm_list, f) file_path_txt = file_path + '.txt' with open(file_path_txt, 'w') as f: f.write('agent_params\n') for k in agent_params.keys(): f.write('\t' + k + ' = ' + str(agent_params[k]) + '\n') f.write('env_params\n') for k in gauss_env.env_params.keys(): f.write('\t' + k + ' = ' + str(gauss_env.env_params[k]) + '\n') # device="cuda:0" if torch.cuda.is_available() else "cpu" # # torch.cuda.set_device(0) device = "cpu" env = wrap_openai_gym(gauss_env.App(always_render=False, verbose=False)) observation_processor, hidden_layers, action_mapper = default_network_components( env) agent = PPOAgent(observation_processor, hidden_layers, action_mapper, save_path=agent_params['save_path'], value_lr=agent_params['value_lr'], policy_lr=agent_params['policy_lr']).to(device) if LOAD_AGENT_FROM is not None: with open(LOAD_AGENT_FROM, 'rb') as f: state_dict = pickle.load(f) agent.load_state_dict(state_dict)
LOAD_PATH = './evals/ppo_gauss/4/' file_path = LOAD_PATH + '/params' with open(file_path, 'rb') as f: parm_list = pickle.load(f) agent_params = parm_list[0] env_parms = parm_list[1] gauss_env.N_BOTS = env_parms['N_BOTS'] # device="cuda:0" if torch.cuda.is_available() else "cpu" # # torch.cuda.set_device(0) device = "cpu" env=wrap_openai_gym(gauss_env.App(always_render=True, plot_reward=False, verbose=False)) observation_processor,hidden_layers,action_mapper=default_network_components(env) agent=PPOAgent( observation_processor, hidden_layers, action_mapper, save_path=agent_params['save_path'], value_lr=agent_params['value_lr'], policy_lr=agent_params['policy_lr'] ).to(device) file_path = LOAD_PATH + '/agent' # file_path = LOAD_PATH + '/agent4903861.0' with open(file_path, 'rb') as f:
# LOAD_PATH = './evals/preliminary_minimal/reward_function/distance-True/2/' file_path = LOAD_PATH + '/params' with open(file_path, 'rb') as f: parm_list = pickle.load(f) agent_params = parm_list[0] env_parms = parm_list[1] sandbox_env.N_BOTS = env_parms['N_BOTS'] # device="cuda:0" if torch.cuda.is_available() else "cpu" # # torch.cuda.set_device(0) device = "cpu" env = wrap_openai_gym(sandbox_env.App(traj_savepath=LOAD_PATH)) if PPO: observation_processor, hidden_layers, action_mapper = default_network_components( env) agent = PPOAgent(observation_processor, hidden_layers, action_mapper, save_path=agent_params['save_path'], value_lr=agent_params['value_lr'], policy_lr=agent_params['policy_lr']).to(device) else: agent = TD3Agent(env,
import torch from pathlib import Path import minimal_v4 as envi # import minimal_v1 # from pyforce import agents save_path = "./evals/td3_example" description = 'Minimal 4: Umgebung im Bereich von (-100,-100) bis (100,100), Observationspace: Eigene Koordinaten und Zielposition, Startposition(0,0), Zufällige Zielposition' # description = 'minimal_v4' device = "cuda:0" if torch.cuda.is_available() else "cpu" # # torch.cuda.set_device(0) # device= "cpu" # print(device) # env=wrap_openai_gym(minimal_v1.App()) env = wrap_openai_gym(envi.App()) Path(save_path).mkdir(exist_ok=True) run_number = 0 for p in Path(save_path).iterdir(): if p.is_dir() and p.name.isnumeric(): if int(p.name) > run_number: run_number = int(str(p.name)) run_number += 1 save_path += ('/' + str(run_number)) Path(save_path).mkdir(exist_ok=True) with open(save_path + '/description.txt', mode='w') as f: f.write(description)
LOAD_PATH = './evals/ppo_pycking/4/' file_path = LOAD_PATH + '/params' with open(file_path, 'rb') as f: parm_list = pickle.load(f) agent_params = parm_list[0] env_parms = parm_list[1] pycking_env3.N_BOTS = env_parms['N_BOTS'] # device="cuda:0" if torch.cuda.is_available() else "cpu" # # torch.cuda.set_device(0) device = "cpu" env=wrap_openai_gym(pycking_env3.App(always_render=True, verbose=False)) observation_processor,hidden_layers,action_mapper=default_network_components(env) agent=PPOAgent( observation_processor, hidden_layers, action_mapper, save_path=agent_params['save_path'], value_lr=agent_params['value_lr'], policy_lr=agent_params['policy_lr'] ).to(device) file_path = LOAD_PATH + '/agent' with open(file_path, 'rb') as f: state_dict = pickle.load(f)
LOAD_PATH = './evals/ppo_sandbox/21' file_path = LOAD_PATH + '/params' with open(file_path, 'rb') as f: parm_list = pickle.load(f) agent_params = parm_list[0] env_parms = parm_list[1] sandbox_env.N_BOTS = env_parms['N_BOTS'] # device="cuda:0" if torch.cuda.is_available() else "cpu" # # torch.cuda.set_device(0) device = "cpu" env = wrap_openai_gym(sandbox_env.App(always_render=True, verbose=False)) if PPO: observation_processor, hidden_layers, action_mapper = default_network_components( env) agent = PPOAgent(observation_processor, hidden_layers, action_mapper, save_path=agent_params['save_path'], value_lr=agent_params['value_lr'], policy_lr=agent_params['policy_lr']).to(device) else: agent = TD3Agent(env,
file_path = agent_params['save_path'] + '/params' with open(file_path, 'wb') as f: pickle.dump(parm_list, f) file_path_txt = file_path + '.txt' with open(file_path_txt, 'w') as f: f.write('agent_params\n') for k in agent_params.keys(): f.write('\t' + k + ' = ' + str(agent_params[k]) + '\n') f.write('env_params\n') for k in preliminary_env.env_params.keys(): f.write('\t' + k + ' = ' + str(preliminary_env.env_params[k]) + '\n') device = "cpu" env = wrap_openai_gym( preliminary_env.App(always_render=False, verbose=False)) # env=wrap_openai_gym(minimal_preliminary.App()) if ag == 'ppo': observation_processor, hidden_layers, action_mapper = default_network_components( env) agent = PPOAgent(observation_processor, hidden_layers, action_mapper, save_path=agent_params['save_path'], value_lr=agent_params['value_lr'], policy_lr=agent_params['policy_lr']).to(device) agent.train(env, episodes=agent_params['episodes'], train_freq=agent_params['train_freq'],
LOAD_PATH = './evals/final/21/' # LOAD_PATH = './evals/algorithm/ppo/3/' file_path = LOAD_PATH + '/params' with open(file_path, 'rb') as f: parm_list = pickle.load(f) agent_params = parm_list[0] env_parms = parm_list[1] # sandbox_env.N_BOTS = env_parms['N_BOTS'] # device="cuda:0" if torch.cuda.is_available() else "cpu" # # torch.cuda.set_device(0) device = "cpu" env=wrap_openai_gym(sandbox_env.App(always_render=True, verbose=False, traj_savepath=LOAD_PATH)) if PPO: observation_processor,hidden_layers,action_mapper=default_network_components(env) agent=PPOAgent( observation_processor, hidden_layers, action_mapper, save_path=agent_params['save_path'], value_lr=agent_params['value_lr'], policy_lr=agent_params['policy_lr'] ).to(device) else: