from agent import DDPGAgent, DDPGArgs from trainer import DistributedTrainer from numeric_env import MultiEnv import torch torch.set_num_threads(1) env = MultiEnv(2, 2) args = DDPGArgs(state_dim=env.STATE_DIM, action_dim=2) agents = [DDPGAgent(args) for _ in range(2)] trainer = DistributedTrainer(agents, env, parameter_share=False, log_dir='../logs/ddpg_d') trainer.train(1000000)
from agent import * from trainer import DistributedTrainer from numeric_env import MultiEnv import torch torch.set_num_threads(1) env = MultiEnv(2, 2, car_policy='wjf') args = DDPGArgs(state_dim=env.STATE_DIM, action_dim=2, action_scale=1.0, ) agents = [DDPGAgent(args) for _ in range(2)] normalizer_path = '../models/debug/norm.pkl' save_pathlists = [['../models/debug/actor.pkl', '../models/debug/critic.pkl'] for _ in range(2)] trainer = DistributedTrainer(agents, env, parameter_share=False, log_dir='../logs/debug', save_pathlists=None, load_pathlists=None, normalizer_path=None ) trainer.train(-1) # torch.set_num_threads(1) # # # env = MultiEnv(2, 2, car_policy='wjf')