def main(args): # Set logging if not os.path.exists("./log"): os.makedirs("./log") log = set_log(args) tb_writer = SummaryWriter('./log/tb_{0}'.format(args.log_name)) # Set seed set_seed(args.seed, cudnn=args.make_deterministic) # Set sampler sampler = BatchSampler(args, log) # Set policy policy = CaviaMLPPolicy( input_size=int(np.prod(sampler.observation_space.shape)), output_size=int(np.prod(sampler.action_space.shape)), hidden_sizes=(args.hidden_size, ) * args.num_layers, num_context_params=args.num_context_params, device=args.device) # Initialise baseline baseline = LinearFeatureBaseline( int(np.prod(sampler.observation_space.shape))) # Initialise meta-learner metalearner = MetaLearner(sampler, policy, baseline, args, tb_writer) # Begin train train(sampler, metalearner, args, log, tb_writer)
def main(args): if args.central_train is True: raise NotImplementedError("todo") # Create directories if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") # Set logs tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name)) log = set_log(args) # Create env env = make_env(args) # Set seeds env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # Initialize policy agents = [ set_policy(env, tb_writer, log, args, name="agent", i_agent=i_agent) for i_agent in range(args.n_agent) ] # Start train train(agents=agents, env=env, log=log, tb_writer=tb_writer, args=args)
def main(args): # Create dir if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") # Set logs tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name)) log = set_log(args) # Create env env = make_env(args) # Set seeds env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # Initialize policy student_n = [ set_policy(env, log, args, name="student", i_agent=i_agent) for i_agent in range(args.n_student) ] # Start train train(student_n=student_n, env=env, log=log, tb_writer=tb_writer, args=args) if not os.path.exists("./saved_model"): os.makedirs("./saved_model")
def main(args): # Create directories if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") # Set logging log = set_log(args) tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name)) # Create env env = make_env(args) # Set seeds 0 seed is odd env.seed(args.seed) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) # Initialize policy agent = set_policy(env, args.n_hidden, tb_writer, log, args) # load agent if args.mode == "test": agent.load_weight("pytorch_models/", args.test_model) test(agent=agent, env=env, log=log, tb_writer=tb_writer, args=args) else: train(agent=agent, env=env, log=log, tb_writer=tb_writer, num_samples=args.num_samples, args=args)
def main(args): # Create directories if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") # Set logs log = set_log(args) tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name)) # Create env env = make_env(args) # Set seeds random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) env.seed(args.seed) # Initialize agents agent1 = Agent(env, log, tb_writer, args, name="agent1", i_agent=1) agent2 = Agent(env, log, tb_writer, args, name="agent2", i_agent=2) # Start train train(agent1, agent2, env, log, tb_writer, args)
def main(args): # Create directories if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") # Set logs tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name)) log = set_log(args) # Create env env = make_env(log, args) # Set seeds env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # Initialize policy agent = set_policy(env, tb_writer, log, args, name=args.algorithm) if args.test: from tester import test test(agent=agent, env=env, log=log, tb_writer=tb_writer, args=args) else: from trainer import train train(agent=agent, env=env, log=log, tb_writer=tb_writer, args=args)
def main(args): # Set logging if not os.path.exists("./log"): os.makedirs("./log") log = set_log(args) tb_writer = SummaryWriter('./log/tb_{0}'.format(args.log_name)) # Set seeds random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if device == torch.device("cuda"): torch.cuda.manual_seed(args.seed) # For GPU, Set start method for multithreading if device == torch.device("cuda"): torch.multiprocessing.set_start_method('spawn') # Initialize shared meta-agent shared_meta_agent = MetaAgent(log, tb_writer, args, name="meta-agent", i_agent=0) shared_meta_agent.share_memory() # Begin either meta-train or meta-test if not args.test_mode: # Start meta-train processes, process_dict = [], mp.Manager().dict() for rank in range(args.n_process): p = mp.Process(target=meta_train, args=(shared_meta_agent, process_dict, rank, log, args)) p.start() processes.append(p) time.sleep(0.1) p = mp.Process(target=meta_val, args=(shared_meta_agent, process_dict, -1, log, args)) p.start() processes.append(p) time.sleep(0.1) for p in processes: time.sleep(0.1) p.join() else: # Start meta-test meta_test(shared_meta_agent, log, tb_writer, args)
def main(args): # Create dir if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") # Set logs tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name)) log = set_log(args) # Create env env = make_env(args) # Set seeds env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # Initialize policy opponent_n = [ set_policy(env, tb_writer, log, args, name="opponent", i_agent=i_agent) for i_agent in range(1) ] modeler = set_policy(env, tb_writer, log, args, name="modeler", i_agent=0) # Start training if args.train_opponent: train_opponent(opponent_n=opponent_n, env=env, log=log, tb_writer=tb_writer, args=args) else: # Load trained opponent model for opponent in opponent_n: opponent.load_model(filename="opponent0_500", directory="./pytorch_models") train_modeler(modeler=modeler, opponent_n=opponent_n, env=env, log=log, tb_writer=tb_writer, args=args)
def main(args): """ Program entry point Arguments args (argparse.Namespace) command-line arguments """ # Create directories if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") pathlib.Path(args.binary_file).parents[0].mkdir(parents=False, exist_ok=True) # Set logging log = set_log(args) tb_writer = SummaryWriter('./logs/tb_{}'.format(args.log_name)) if args.env == "GridEnv-v0": if args.start is not None: args.start = tuple(args.start) args.target = tuple(args.target) args.exits = Consts.EXITS gui = None if args.render: multiprocessing.set_start_method("spawn") manager = multiprocessing.Manager() queue = manager.Queue() gui = GUI(args.gui_width, args.gui_height, args.rows, args.cols, args.x_rooms, args.y_rooms, args.target, args.exits, queue) train(args, gui, log, tb_writer) if gui: gui.process.join() sys.exit() elif args.env == "Taxi-v4": train(args, None) else: raise ValueError("Environment {} not recognized".format(args.env))
def main(args): # Create directories if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") # Set logs tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name)) log = set_log(args) # Create env env = make_env(args) # Set seeds env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # Initialize policy # Note that only one teacher is considered in the one box push domain # to transfer knowledge from agent $i$ to agent $k$ (Section 6.1) workers = [ set_policy(env, tb_writer, log, args, name="worker", i_agent=i_agent) for i_agent in range(args.n_worker)] managers = [ set_policy(env, tb_writer, log, args, name="manager", i_agent=i_agent) for i_agent in range(args.n_manager)] temp_managers = [ set_policy(env, tb_writer, log, args, name="temp_manager", i_agent=i_agent) for i_agent in range(args.n_manager)] teacher = set_policy(env, tb_writer, log, args, name="teacher", i_agent=0) assert len(workers) == len(managers), "The two number must be same" assert len(managers) == len(temp_managers), "The two number must be same" # Start train train( workers=workers, managers=managers, temp_managers=temp_managers, teacher=teacher, env=env, log=log, tb_writer=tb_writer, args=args)
def main(args): # Create dir if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") # Set logs tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name)) log = set_log(args) # Set seeds random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if device == torch.device("cuda"): torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Set the gpu learner = MetaLearner(log, tb_writer, args) learner.train()
def main(args): # Create dir if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") # Set logs tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name)) log = set_log(args) # Create env env = make_env(args) # Set seeds env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # Initialize policy predator_agents = [ set_policy(env, tb_writer, log, args, name="predator", i_agent=i_agent) for i_agent in range(args.n_predator) ] prey_agents = [ set_policy(env, tb_writer, log, args, name="prey", i_agent=i_agent) for i_agent in range(args.n_prey) ] # Start training train(predator_agents=predator_agents, prey_agents=prey_agents, env=env, log=log, tb_writer=tb_writer, args=args)
def main(args): # Check arguments assert args.n_agent == 2, "Only two agents are supported" # Create directories if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./data"): os.makedirs("./data") # Set logs tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name)) log = set_log(args) # Create env env = make_env(args) # Set seeds env.seed(args.seed) np.random.seed(args.seed) # Set agents agents = [ Agent(env=env, tb_writer=tb_writer, log=log, args=args, name="agent", i_agent=i_agent) for i_agent in range(args.n_agent) ] # Get true return by Monte Carlo estimate if args.estimate_option == "montecarlo": from trainer.montecarlo import train table = train(agents=agents, env=env, log=log, tb_writer=tb_writer, args=args) elif args.estimate_option == "naive": from trainer.naive import train table = train(agents=agents, env=env, log=log, tb_writer=tb_writer, args=args) elif args.estimate_option == "ours": from trainer.ours_mp import train # from trainer.ours import train table = train(agents=agents, env=env, log=log, tb_writer=tb_writer, args=args) else: raise ValueError("Invalid option") # Save and vis result save_name = args.estimate_option + "_" + str(args.decay_max_timesteps) if args.estimate_option == "ours": save_name += "_" + str(args.future_max_timesteps) np.save("./data/" + save_name + ".npy", table)
def main(args): # Setup for logging tb_writer = SummaryWriter('./logs/tb_{}'.format( args.log_name)) # Tensorboard logging log = set_log(args) # Setup before meta-train starts sampler = BatchSampler(env_name=args.env_name, batch_size=args.fast_batch_size, num_workers=args.num_workers, args=args) # NOTE Observation space is a list with [predator0, predator1, ..., prey] # Thus using the index of 0 policy = NormalMLPPolicy( input_size=int(np.prod(sampler.envs.observation_space[0].shape)), output_size=int(np.prod(sampler.envs.action_space[0].shape)), hidden_sizes=(args.hidden_size, ) * args.num_layers) baseline = LinearFeatureBaseline( input_size=int(np.prod(sampler.envs.observation_space[0].shape))) meta_learner = MetaLearner(sampler, policy, baseline, gamma=args.gamma, fast_lr=args.fast_lr, tau=args.tau, device=args.device, args=args, log=log, tb_writer=tb_writer) # meta_learner.load( # filename="theta_200", directory="./pytorch_models") meta_tester = MetaTester(sampler, policy, baseline, gamma=args.gamma, fast_lr=args.fast_lr, tau=args.tau, device=args.device, args=args, log=log, tb_writer=tb_writer) prey = Prey(env=sampler._env, args=args, log=log, tb_writer=tb_writer, name="prey", i_agent=0) # Meta-train starts iteration = 0 while True: # Sample train and validation episode tasks = sampler.sample_tasks(num_tasks=args.meta_batch_size, test=False) episodes = meta_learner.sample(tasks, prey, first_order=args.first_order, iteration=iteration) # Train meta-policy meta_learner.step(episodes=episodes, args=args) # Test meta-policy if iteration % 10 == 0: test_tasks = sampler.sample_tasks(num_tasks=5, test=True) meta_tester.few_shot_adaptation(meta_policy=meta_learner.policy, tasks=test_tasks, first_order=args.first_order, iteration=iteration, prey=prey) if iteration % 100 == 0: meta_learner.save(iteration) iteration += 1