def main(): args = get_args() print_args(args) log_dir = create_log_dir(args) if not args.evaluate: writer = SummaryWriter(log_dir) SEED = 721 env = make_env(args) # "LaserTag-small2-v0" "SlimeVolleyPixel-v0" print(env.observation_space, env.action_space) set_global_seeds(args.seed) env.seed(args.seed) if args.evaluate: test(env, args) env.close() return train(env, args, writer) writer.export_scalars_to_json(os.path.join(log_dir, "all_scalars.json")) writer.close() env.close()
def main(): args = get_args() args.noisy = True args.double = True args.dueling = True args.prioritized_replay = True args.c51 = True args.multi_step = 3 args.load_agents = True args.num_agents = 12 args.read_model = None args.evaluate = False print_args(args) log_dir = create_log_dir(args) if not args.evaluate: writer = SummaryWriter(log_dir) env = PanicEnv(num_agents=args.num_agents, scenario_=Scenario.Two_Exits, load_agents=True, read_agents=False) set_global_seeds(args.seed) if args.evaluate: test(env, args) return train(env, args, writer) writer.export_scalars_to_json(os.path.join(log_dir, "all_scalars.json")) writer.close()
def main(): args = get_args() print_args(args) model_path = f'models/bilateral_dqn/{args.env}' os.makedirs(model_path, exist_ok=True) log_dir = create_log_dir(args) if not args.evaluate: writer = SummaryWriter(log_dir) SEED = 721 if args.num_envs == 1 or args.evaluate: env = make_env( args) # "SlimeVolley-v0", "SlimeVolleyPixel-v0" 'Pong-ram-v0' else: VectorEnv = [ DummyVectorEnv, SubprocVectorEnv ][1] # https://github.com/thu-ml/tianshou/blob/master/tianshou/env/venvs.py env = VectorEnv([lambda: make_env(args) for _ in range(args.num_envs)]) print(env.observation_space, env.action_space) set_global_seeds(args.seed) env.seed(args.seed) if args.evaluate: test(env, args, model_path) env.close() return train(env, args, writer, model_path) # writer.export_scalars_to_json(os.path.join(log_dir, "all_scalars.json")) writer.close() env.close()
def main(): args = get_args() print_args(args) if args.evaluate: if args.env == "1DStatic": env = Env1DStatic(args) elif args.env == "1DDynamic": env = Env1DDynamic_Validation(args) elif args.env == "2DStatic": env = Env2DStatic(args) elif args.env == "2DDynamic": env = Env2DDynamic_Validation(args) elif args.env == "3DStatic": env = Env3DStatic(args) elif args.env == "3DDynamic": env = Env3DDynamic_Validation(args) else: if args.env == "1DStatic": env = Env1DStatic(args) elif args.env == "1DDynamic": env = Env1DDynamic(args) elif args.env == "2DStatic": env = Env2DStatic(args) elif args.env == "2DDynamic": env = Env2DDynamic(args) elif args.env == "3DStatic": env = Env3DStatic(args) elif args.env == "3DDynamic": env = Env3DDynamic(args) datetime = time.time() save_hyperparameters(args, datetime) log_dir = create_log_dir(args) writer = SummaryWriter(log_dir) set_global_seeds(args.seed) env.seed(args.seed) if args.evaluate: validate(env, args) else: train(env, args, writer, datetime) writer.flush() writer.close() env.close()
def main(): args = get_args() log_dir = create_log_dir(args) if not args.evaluate: writer = SummaryWriter(log_dir) env = make_atari(args.env) env = wrap_atari_dqn(env, args) set_global_seeds(args.seed) env.seed(args.seed) if args.evaluate: test(env, args) env.close() return train(env, args, writer) writer.export_scalars_to_json(os.path.join(log_dir, "all_scalars.json")) writer.close() env.close()
def main(): args = get_args() print_args(args) log_dir = create_log_dir(args) print("Log dir is:", log_dir) if not args.evaluate: writer = SummaryWriter(log_dir) env = gym.make(args.env) set_global_seeds(args.seed) env.seed(args.seed) if args.evaluate: test(env, args) env.close() return train(env, args, writer) writer.export_scalars_to_json(os.path.join(log_dir, "all_scalars.json")) writer.close() env.close()
def main(): args = get_args() print_args(args) log_dir = create_log_dir(args) wandb.init(project=args.wandb_project, name=args.wandb_name, notes=args.wandb_notes, config=args) env = make_atari(args.env) env = wrap_atari_dqn(env, args) set_global_seeds(args.seed) env.seed(args.seed) if args.evaluate: test(env, args) env.close() return train(env, args) env.close()
def main(): Exploiter = 'DQN' EvaluatedModel = 'NashDQN' args = get_args() # args.against_baseline=False print_args(args) env = make_env( args) # "SlimeVolley-v0", "SlimeVolleyPixel-v0" 'Pong-ram-v0' print(env.observation_space, env.action_space) model_prefix = model_metadata[args.env] exploiter = load_exploiter(env, Exploiter, args) evaluated_model = load_evaluated_model(env, EvaluatedModel, args) model_dir = "models/nash_dqn/{}/{}/".format(args.env, model_prefix) exploiter_dir = "models/nash_dqn/{}/{}/exploiter/".format( args.env, model_prefix) os.makedirs(model_dir, exist_ok=True) os.makedirs(exploiter_dir, exist_ok=True) log_dir = create_log_dir(args) if not args.evaluate: writer = SummaryWriter(log_dir) set_global_seeds(args.seed) env.seed(args.seed) # Parse all models saved during training in order filelist, epi_list = [], [] for filename in os.listdir(model_dir): if filename.endswith("dqn"): filelist.append(filename.split('_')[0] + '_') # remove '_dqn' at end epi_list.append(int(filename.split('_')[0])) sort_idx = np.argsort(epi_list).tolist() filelist = [x for _, x in sorted(zip(epi_list, filelist)) ] # sort filelist according to the sorting of epi_list epi_list.sort() # filelist.sort() will not give correct answer print(epi_list) # Evaluate/exploit all models saved during training in order eval_data = {} for f, i in zip(filelist, epi_list): print('load model: ', i, model_dir, f) # if i>17000: evaluated_model.load_model(model_dir + f, eval=True, map_location='cuda:0') exploiter_path = exploiter_dir + f r, l = exploit(env, evaluated_model, exploiter, args, exploiter_path=exploiter_path) eval_data[str(i)] = [r, l] save_dir = 'data/{}/'.format(args.env) os.makedirs(save_dir, exist_ok=True) if args.fictitious: save_dir += '/fictitious_eval_data.npy' else: save_dir += '/eval_data.npy' np.save(save_dir, eval_data) writer.close() env.close()