def launch(args): assert not args.env_name.startswith( 'Residual'), 'Residual envs not allowed' # create the ddpg_agent env = make_env(args.env_name) controller = get_controller(args.env_name) # set random seeds for reproducibility env.seed(args.seed + MPI.COMM_WORLD.Get_rank()) random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) np.random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) torch.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank()) if args.cuda: torch.cuda.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank()) # Configure logger if MPI.COMM_WORLD.Get_rank() == 0: if args.log_dir or logger.get_dir() is None: logger.configure( dir=os.path.join('logs', 'switch_her', args.log_dir), format_strs=['tensorboard', 'log', 'csv', 'json', 'stdout']) else: logger.configure( dir=os.path.join('logs', 'switch_her', args.env_name), format_strs=['tensorboard', 'log', 'csv', 'json', 'stdout']) args.log_dir = logger.get_dir() assert args.log_dir is not None os.makedirs(args.log_dir, exist_ok=True) # TODO: Write code for loading and saving params from/to json files # get the environment parameters env_params = get_env_params(env) # create the ddpg agent to interact with the environment her_trainer = her_switch_agent(args, env, env_params, controller) her_trainer.learn()
def launch(args): env = make_env(args.env_name, env_id=args.env_id, discrete=True, reward_type=args.reward_type) # set random seeds for reproducibility env.seed(args.seed + MPI.COMM_WORLD.Get_rank()) random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) np.random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) torch.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank()) if args.deterministic: env.make_deterministic() if args.debug: logger.set_level(DEBUG) controller = get_controller( args.env_name, env_id=args.env_id, discrete=True, num_expansions=args.num_expansions, reward_type=args.reward_type) # Configure logger if MPI.COMM_WORLD.Get_rank() == 0 and args.log_dir: logger.configure(dir=os.path.join( 'logs', 'rts', args.log_dir), format_strs=['tensorboard', 'log', 'csv', 'json', 'stdout']) args.log_dir = logger.get_dir() assert args.log_dir is not None os.makedirs(args.log_dir, exist_ok=True) env_params = get_env_params(env) rts_trainer = dqn_rts_agent(args, env, env_params, controller) rts_trainer.learn()
def launch(args): assert args.env_name.startswith('Residual'), 'Only residual envs allowed' env = make_env(args.env_name) # set random seeds for reproducibility env.seed(args.seed + MPI.COMM_WORLD.Get_rank()) random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) np.random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) # Configure logger if MPI.COMM_WORLD.Get_rank() == 0: if args.log_dir or logger.get_dir() is None: logger.configure(dir=os.path.join( 'logs', 'hardcoded', args.log_dir), format_strs=['tensorboard', 'log', 'csv', 'json', 'stdout']) else: logger.configure(dir=os.path.join('logs', 'hardcoded', args.env_name), format_strs=[ 'tensorboard', 'log', 'csv', 'json', 'stdout']) args.log_dir = logger.get_dir() assert args.log_dir is not None os.makedirs(args.log_dir, exist_ok=True) env_params = get_env_params(env) hardcoded_controller = hardcoded_agent(args, env, env_params) hardcoded_controller.eval_agent()
def launch(args): # create the ddpg_agent env = make_env(args.env_name, env_id=args.env_id, reward_type=args.reward_type) # set random seeds for reproducibility env.seed(args.seed + MPI.COMM_WORLD.Get_rank()) random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) np.random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) torch.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank()) if args.cuda: torch.cuda.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank()) # Configure logger if MPI.COMM_WORLD.Get_rank() == 0 and args.log_dir: logger.configure(dir=os.path.join( 'logs', 'her', args.log_dir), format_strs=['tensorboard', 'log', 'csv', 'json', 'stdout']) args.log_dir = logger.get_dir() assert args.log_dir is not None os.makedirs(args.log_dir, exist_ok=True) # TODO: Write code for loading and saving params from/to json files # get the environment parameters env_params = get_env_params(env) # create the ddpg agent to interact with the environment her_trainer = her_agent(args, env, env_params) her_trainer.learn()
def launch(args): # rospy.init_node('rts_trainer', anonymous=True) # Start ray ray.init(logging_level=logging.ERROR) # Create environments env = make_env(env_name=args.env_name, env_id=args.env_id, discrete=True, reward_type=args.reward_type) planning_env = make_env(env_name=args.env_name, env_id=args.planning_env_id, discrete=True, reward_type=args.reward_type) # Set random seeds env.seed(args.seed) planning_env.seed(args.seed) # Set global seeds set_global_seed(args.seed) # Make deterministic, if you have to if args.deterministic: env.make_deterministic() planning_env.make_deterministic() # Set logger level to debug, if you have to if args.debug: logger.set_level(logger.DEBUG) # Create controller controller = get_controller(env_name=args.env_name, env_id=args.planning_env_id, discrete=True, num_expansions=args.n_expansions, reward_type=args.reward_type, seed=args.seed) # Configure logger if args.log_dir: logger.configure( dir=osp.join('logs', 'rts', args.log_dir), format_strs=['tensorboard', 'log', 'csv', 'json', 'stdout']) os.makedirs(logger.get_dir(), exist_ok=True) # Configure save dir # if args.save_dir: # args.save_dir = osp.join('saved', 'rts', args.save_dir) # os.makedirs(args.save_dir, exist_ok=True) # if args.load_dir: # args.load_dir = osp.join('saved', 'rts', args.load_dir) # # TODO: CHeck if dir exists # Get env params env_params = get_env_params(args, env) # Get agent if args.agent == 'rts' or args.agent == 'mbpo' or args.agent == 'mbpo_knn' or args.agent == 'mbpo_gp': fetch_trainer = fetch_rts_agent(args, env_params, env, planning_env, controller) elif args.agent == 'dqn': fetch_trainer = fetch_dqn_agent(args, env_params, env, controller) # elif args.agent == 'mbpo': # fetch_trainer = fetch_model_agent(args, # env_params, # env, # planning_env, # controller) # Start if args.offline: # Train in simulation raise Exception('Only online mode is required') fetch_trainer.learn_offline_in_model() else: n_steps = fetch_trainer.learn_online_in_real_world(args.max_timesteps) print('REACHED GOAL in', n_steps, 'by agent', args.agent) ray.shutdown() time.sleep(5) return n_steps