Esempio n. 1
0
file_path = agent_params['save_path'] + '/params'
with open(file_path, 'wb') as f:
    pickle.dump(parm_list, f)
file_path_txt = file_path + '.txt'
with open(file_path_txt, 'w') as f:
    f.write('agent_params\n')
    for k in agent_params.keys():
        f.write('\t' + k + ' = ' + str(agent_params[k]) + '\n')
    f.write('env_params\n')
    for k in sandbox_env.env_params.keys():
        f.write('\t' + k + ' = ' + str(sandbox_env.env_params[k]) + '\n')

# device="cuda:0" if torch.cuda.is_available() else "cpu"
# torch.cuda.set_device(0)
device= "cpu"
env=wrap_openai_gym(sandbox_env.App(always_render=False))

agent=TD3Agent(
    env,
    save_path=agent_params['save_path'],
    critic_lr=agent_params['critic_lr'],
    actor_lr=agent_params['actor_lr']
).to(device)

if LOAD_AGENT_FROM is not None:
    with open(LOAD_AGENT_FROM, 'rb') as f:
        state_dict = pickle.load(f)
    agent.load_state_dict(state_dict)

# agent=TD3Agent(
#     env,
Esempio n. 2
0
from pyforce.env import wrap_openai_gym
from pyforce.nn import default_network_components
from pyforce.agents import A2CAgent
import gym
import torch

device = "cuda:0" if torch.cuda.is_available() else "cpu"

env = wrap_openai_gym(gym.make("LunarLanderContinuous-v2"))

observation_processor, hidden_layers, action_mapper = default_network_components(
    env)

agent = A2CAgent(observation_processor,
                 hidden_layers,
                 action_mapper,
                 save_path="./evals/a2c_example",
                 value_lr=1e-3,
                 policy_lr=1e-3).to(device)

agent.train(env,
            episodes=1000,
            train_freq=256,
            eval_freq=50,
            render=True,
            gamma=.99,
            entropy_coef=.01)
Esempio n. 3
0
with open(file_path, 'wb') as f:
    pickle.dump(parm_list, f)
file_path_txt = file_path + '.txt'
with open(file_path_txt, 'w') as f:
    f.write('agent_params\n')
    for k in agent_params.keys():
        f.write('\t' + k + ' = ' + str(agent_params[k]) + '\n')
    f.write('env_params\n')
    for k in gauss_env.env_params.keys():
        f.write('\t' + k + ' = ' + str(gauss_env.env_params[k]) + '\n')

# device="cuda:0" if torch.cuda.is_available() else "cpu"
# # torch.cuda.set_device(0)
device = "cpu"

env = wrap_openai_gym(gauss_env.App(always_render=False, verbose=False))

observation_processor, hidden_layers, action_mapper = default_network_components(
    env)

agent = PPOAgent(observation_processor,
                 hidden_layers,
                 action_mapper,
                 save_path=agent_params['save_path'],
                 value_lr=agent_params['value_lr'],
                 policy_lr=agent_params['policy_lr']).to(device)

if LOAD_AGENT_FROM is not None:
    with open(LOAD_AGENT_FROM, 'rb') as f:
        state_dict = pickle.load(f)
    agent.load_state_dict(state_dict)
Esempio n. 4
0
LOAD_PATH = './evals/ppo_gauss/4/'

file_path = LOAD_PATH + '/params'
with open(file_path, 'rb') as f:
    parm_list = pickle.load(f)

agent_params = parm_list[0]
env_parms = parm_list[1]

gauss_env.N_BOTS = env_parms['N_BOTS']

# device="cuda:0" if torch.cuda.is_available() else "cpu"
# # torch.cuda.set_device(0)
device = "cpu"

env=wrap_openai_gym(gauss_env.App(always_render=True, plot_reward=False, verbose=False))

observation_processor,hidden_layers,action_mapper=default_network_components(env)

agent=PPOAgent(
    observation_processor,
    hidden_layers,
    action_mapper,
    save_path=agent_params['save_path'],
    value_lr=agent_params['value_lr'],
    policy_lr=agent_params['policy_lr']
).to(device)

file_path = LOAD_PATH + '/agent'
# file_path = LOAD_PATH + '/agent4903861.0'
with open(file_path, 'rb') as f:
# LOAD_PATH = './evals/preliminary_minimal/reward_function/distance-True/2/'

file_path = LOAD_PATH + '/params'
with open(file_path, 'rb') as f:
    parm_list = pickle.load(f)

agent_params = parm_list[0]
env_parms = parm_list[1]

sandbox_env.N_BOTS = env_parms['N_BOTS']

# device="cuda:0" if torch.cuda.is_available() else "cpu"
# # torch.cuda.set_device(0)
device = "cpu"

env = wrap_openai_gym(sandbox_env.App(traj_savepath=LOAD_PATH))

if PPO:

    observation_processor, hidden_layers, action_mapper = default_network_components(
        env)

    agent = PPOAgent(observation_processor,
                     hidden_layers,
                     action_mapper,
                     save_path=agent_params['save_path'],
                     value_lr=agent_params['value_lr'],
                     policy_lr=agent_params['policy_lr']).to(device)

else:
    agent = TD3Agent(env,
Esempio n. 6
0
import torch
from pathlib import Path

import minimal_v4 as envi
# import minimal_v1
# from pyforce import agents

save_path = "./evals/td3_example"
description = 'Minimal 4: Umgebung im Bereich von (-100,-100) bis (100,100), Observationspace: Eigene Koordinaten und Zielposition, Startposition(0,0), Zufällige Zielposition'
# description = 'minimal_v4'
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# # torch.cuda.set_device(0)
# device= "cpu"
# print(device)
# env=wrap_openai_gym(minimal_v1.App())
env = wrap_openai_gym(envi.App())

Path(save_path).mkdir(exist_ok=True)

run_number = 0
for p in Path(save_path).iterdir():
    if p.is_dir() and p.name.isnumeric():
        if int(p.name) > run_number:
            run_number = int(str(p.name))
run_number += 1
save_path += ('/' + str(run_number))

Path(save_path).mkdir(exist_ok=True)

with open(save_path + '/description.txt', mode='w') as f:
    f.write(description)
Esempio n. 7
0
LOAD_PATH = './evals/ppo_pycking/4/'

file_path = LOAD_PATH + '/params'
with open(file_path, 'rb') as f:
    parm_list = pickle.load(f)

agent_params = parm_list[0]
env_parms = parm_list[1]

pycking_env3.N_BOTS = env_parms['N_BOTS']

# device="cuda:0" if torch.cuda.is_available() else "cpu"
# # torch.cuda.set_device(0)
device = "cpu"

env=wrap_openai_gym(pycking_env3.App(always_render=True, verbose=False))

observation_processor,hidden_layers,action_mapper=default_network_components(env)

agent=PPOAgent(
    observation_processor,
    hidden_layers,
    action_mapper,
    save_path=agent_params['save_path'],
    value_lr=agent_params['value_lr'],
    policy_lr=agent_params['policy_lr']
).to(device)

file_path = LOAD_PATH + '/agent'
with open(file_path, 'rb') as f:
    state_dict = pickle.load(f)
Esempio n. 8
0
LOAD_PATH = './evals/ppo_sandbox/21'

file_path = LOAD_PATH + '/params'
with open(file_path, 'rb') as f:
    parm_list = pickle.load(f)

agent_params = parm_list[0]
env_parms = parm_list[1]

sandbox_env.N_BOTS = env_parms['N_BOTS']

# device="cuda:0" if torch.cuda.is_available() else "cpu"
# # torch.cuda.set_device(0)
device = "cpu"

env = wrap_openai_gym(sandbox_env.App(always_render=True, verbose=False))

if PPO:

    observation_processor, hidden_layers, action_mapper = default_network_components(
        env)

    agent = PPOAgent(observation_processor,
                     hidden_layers,
                     action_mapper,
                     save_path=agent_params['save_path'],
                     value_lr=agent_params['value_lr'],
                     policy_lr=agent_params['policy_lr']).to(device)

else:
    agent = TD3Agent(env,
        file_path = agent_params['save_path'] + '/params'
        with open(file_path, 'wb') as f:
            pickle.dump(parm_list, f)
        file_path_txt = file_path + '.txt'
        with open(file_path_txt, 'w') as f:
            f.write('agent_params\n')
            for k in agent_params.keys():
                f.write('\t' + k + ' = ' + str(agent_params[k]) + '\n')
            f.write('env_params\n')
            for k in preliminary_env.env_params.keys():
                f.write('\t' + k + ' = ' + str(preliminary_env.env_params[k]) +
                        '\n')

        device = "cpu"

        env = wrap_openai_gym(
            preliminary_env.App(always_render=False, verbose=False))
        # env=wrap_openai_gym(minimal_preliminary.App())

        if ag == 'ppo':
            observation_processor, hidden_layers, action_mapper = default_network_components(
                env)
            agent = PPOAgent(observation_processor,
                             hidden_layers,
                             action_mapper,
                             save_path=agent_params['save_path'],
                             value_lr=agent_params['value_lr'],
                             policy_lr=agent_params['policy_lr']).to(device)

            agent.train(env,
                        episodes=agent_params['episodes'],
                        train_freq=agent_params['train_freq'],
LOAD_PATH = './evals/final/21/'
# LOAD_PATH = './evals/algorithm/ppo/3/'
file_path = LOAD_PATH + '/params'
with open(file_path, 'rb') as f:
    parm_list = pickle.load(f)

agent_params = parm_list[0]
env_parms = parm_list[1]

# sandbox_env.N_BOTS = env_parms['N_BOTS']

# device="cuda:0" if torch.cuda.is_available() else "cpu"
# # torch.cuda.set_device(0)
device = "cpu"

env=wrap_openai_gym(sandbox_env.App(always_render=True, verbose=False, traj_savepath=LOAD_PATH))

if PPO:

    observation_processor,hidden_layers,action_mapper=default_network_components(env)

    agent=PPOAgent(
        observation_processor,
        hidden_layers,
        action_mapper,
        save_path=agent_params['save_path'],
        value_lr=agent_params['value_lr'],
        policy_lr=agent_params['policy_lr']
    ).to(device)

else: