print(f"logging to {log_dir}") writer = SummaryWriter(log_dir=log_dir) config = DefaultMunch() config.seed = seed config.n_episodes = 40000 config.max_t = 1000 config.buffer_size = 100000 config.batch_size = 200 config.gamma = 0.99 config.tau = 0.001 config.lr_actor = 0.0001 config.lr_critic = 0.0001 config.n_agents = n_agents config.state_size = state_size * state_multiplier config.action_size = action_size config.learn_start = 10000 config.max_action = 1 # maximum value allowed for each action config.memory = ExperienceReplayMemory(config.buffer_size, seed) config.update_every = 2 config.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") config_file = open(os.path.join(log_dir, "config.json"), "w+") config_file.write( json.dumps(json.loads( jsonpickle.encode(config, unpicklable=False, max_depth=1)), indent=4, sort_keys=True)) config_file.close() scores = [] scores_std = []
comment = f"MADDPG Unity Tennis" rand_seed = 0 config = DefaultMunch() config.seed = seed config.n_episodes = 10 config.max_t = 1000 config.buffer_size = 100000 config.batch_size = 200 config.gamma = 0.99 config.tau = 0.001 config.lr_actor = 0.0001 config.lr_critic = 0.001 config.n_agents = n_agents config.state_size = state_size * state_multiplier config.action_size = action_size config.learn_start = 3000 config.max_action = 1 config.memory = ExperienceReplayMemory(config.buffer_size, rand_seed) config.update_every = 2 config.device = device rand_seed = 0 scores = [] scores_std = [] scores_avg = [] scores_window = deque(maxlen=100) agent = MultiAgent(config) agent.load("./save/checkpoint_success.pth") global_steps = 0 noise_scheduler = config.noise_scheduler for i_episode in range(config.n_episodes):