# Environment # ################ env_params = GymVectorEnvironment(level='TSP_env:TSPMediumEnv') ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() ################# # Visualization # ################# env_params.frame_skip = 5 #to make sure the gifs work without skipping steps vis_params = VisualizationParameters() vis_params.dump_gifs=True #vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()] #experiment_name = "TSPEasy" #experiment_name = logger.get_experiment_name(experiment_name) #experiment_path = logger.get_experiment_path(experiment_name) #task_params = TaskParameters(experiment_path=experiment_path) #################### # Graph Scheduling # ####################
# Environment # ################ env_params = GymVectorEnvironment(level='VRP_abstract_env:VRPEasyEnv') ######### # Agent # ######### agent_params = ClippedPPOAgentParameters() ################# # Visualization # ################# env_params.frame_skip = 5 #to make sure the gifs work without skipping steps vis_params = VisualizationParameters() vis_params.dump_gifs = True #vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()] #experiment_name = "TSPEasy" #experiment_name = logger.get_experiment_name(experiment_name) #experiment_path = logger.get_experiment_path(experiment_name) #task_params = TaskParameters(experiment_path=experiment_path) #################### # Graph Scheduling # ####################
bottom_critic.learning_rate = 0.001 bottom_critic.batch_size = 4096 agents_params = [top_agent_params, bottom_agent_params] ############### # Environment # ############### time_limit = 1000 env_params = GymVectorEnvironment( level="rl_coach.environments.mujoco.pendulum_with_goals:PendulumWithGoals") env_params.additional_simulator_parameters = { "time_limit": time_limit, "random_goals_instead_of_standing_goal": False, "polar_coordinates": polar_coordinates, "goal_reaching_thresholds": distance_from_goal_threshold } env_params.frame_skip = 10 env_params.custom_reward_threshold = -time_limit + 1 vis_params = VisualizationParameters() vis_params.native_rendering = False graph_manager = HACGraphManager( agents_params=agents_params, env_params=env_params, schedule_params=schedule_params, vis_params=vis_params, consecutive_steps_to_run_non_top_levels=EnvironmentSteps(40))