def rollout_worker(graph_manager, data_store, num_workers, task_parameters): """ wait for first checkpoint then perform rollouts using the model """ if not data_store: raise AttributeError("None type for data_store object") checkpoint_dir = task_parameters.checkpoint_restore_path wait_for_checkpoint(checkpoint_dir, data_store) wait_for_trainer_ready(checkpoint_dir, data_store) # Make the clients that will allow us to pause and unpause the physics rospy.wait_for_service('/gazebo/pause_physics') rospy.wait_for_service('/gazebo/unpause_physics') pause_physics = ServiceProxyWrapper('/gazebo/pause_physics', Empty) unpause_physics = ServiceProxyWrapper('/gazebo/unpause_physics', Empty) graph_manager.create_graph(task_parameters=task_parameters, stop_physics=pause_physics, start_physics=unpause_physics, empty_service_call=EmptyRequest) with graph_manager.phase_context(RunPhase.TRAIN): chkpt_state_reader = CheckpointStateReader( checkpoint_dir, checkpoint_state_optional=False) last_checkpoint = chkpt_state_reader.get_latest().num for level in graph_manager.level_managers: for agent in level.agents.values(): agent.memory.memory_backend.set_current_checkpoint( last_checkpoint) # this worker should play a fraction of the total playing steps per rollout act_steps = 1 while True: exit_if_trainer_done(checkpoint_dir) unpause_physics(EmptyRequest()) graph_manager.reset_internal_state(True) graph_manager.act(EnvironmentSteps(num_steps=act_steps), wait_for_full_episodes=graph_manager. agent_params.algorithm.act_for_full_episodes) graph_manager.reset_internal_state(True) time.sleep(1) pause_physics(EmptyRequest()) new_checkpoint = data_store.get_latest_checkpoint() if new_checkpoint and new_checkpoint > last_checkpoint: if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.SYNC: exit_if_trainer_done(checkpoint_dir) data_store.load_from_store( expected_checkpoint_number=last_checkpoint + 1) graph_manager.restore_checkpoint() if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.ASYNC: graph_manager.restore_checkpoint() last_checkpoint = new_checkpoint for level in graph_manager.level_managers: for agent in level.agents.values(): agent.memory.memory_backend.set_current_checkpoint( last_checkpoint)
def rollout_worker(graph_manager, num_workers, rollout_idx, task_parameters, s3_writer): """ wait for first checkpoint then perform rollouts using the model """ if not graph_manager.data_store: raise AttributeError("None type for data_store object") data_store = graph_manager.data_store checkpoint_dir = task_parameters.checkpoint_restore_path wait_for_checkpoint(checkpoint_dir, data_store) wait_for_trainer_ready(checkpoint_dir, data_store) # Make the clients that will allow us to pause and unpause the physics rospy.wait_for_service('/gazebo/pause_physics') rospy.wait_for_service('/gazebo/unpause_physics') rospy.wait_for_service('/racecar/save_mp4/subscribe_to_save_mp4') rospy.wait_for_service('/racecar/save_mp4/unsubscribe_from_save_mp4') pause_physics = ServiceProxyWrapper('/gazebo/pause_physics', Empty) unpause_physics = ServiceProxyWrapper('/gazebo/unpause_physics', Empty) subscribe_to_save_mp4 = ServiceProxyWrapper( '/racecar/save_mp4/subscribe_to_save_mp4', Empty) unsubscribe_from_save_mp4 = ServiceProxyWrapper( '/racecar/save_mp4/unsubscribe_from_save_mp4', Empty) graph_manager.create_graph(task_parameters=task_parameters, stop_physics=pause_physics, start_physics=unpause_physics, empty_service_call=EmptyRequest) chkpt_state_reader = CheckpointStateReader(checkpoint_dir, checkpoint_state_optional=False) last_checkpoint = chkpt_state_reader.get_latest().num # this worker should play a fraction of the total playing steps per rollout episode_steps_per_rollout = graph_manager.agent_params.algorithm.num_consecutive_playing_steps.num_steps act_steps = int(episode_steps_per_rollout / num_workers) if rollout_idx < episode_steps_per_rollout % num_workers: act_steps += 1 act_steps = EnvironmentEpisodes(act_steps) configure_environment_randomizer() for _ in range( (graph_manager.improve_steps / act_steps.num_steps).num_steps): # Collect profiler information only IS_PROFILER_ON is true with utils.Profiler(s3_bucket=PROFILER_S3_BUCKET, s3_prefix=PROFILER_S3_PREFIX, output_local_path=ROLLOUT_WORKER_PROFILER_PATH, enable_profiling=IS_PROFILER_ON): graph_manager.phase = RunPhase.TRAIN exit_if_trainer_done(checkpoint_dir, s3_writer, rollout_idx) unpause_physics(EmptyRequest()) graph_manager.reset_internal_state(True) graph_manager.act(act_steps, wait_for_full_episodes=graph_manager. agent_params.algorithm.act_for_full_episodes) graph_manager.reset_internal_state(True) time.sleep(1) pause_physics(EmptyRequest()) graph_manager.phase = RunPhase.UNDEFINED new_checkpoint = -1 if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type\ == DistributedCoachSynchronizationType.SYNC: unpause_physics(EmptyRequest()) is_save_mp4_enabled = rospy.get_param( 'MP4_S3_BUCKET', None) and rollout_idx == 0 if is_save_mp4_enabled: subscribe_to_save_mp4(EmptyRequest()) if rollout_idx == 0: for _ in range(MIN_EVAL_TRIALS): graph_manager.evaluate(EnvironmentSteps(1)) while new_checkpoint < last_checkpoint + 1: exit_if_trainer_done(checkpoint_dir, s3_writer, rollout_idx) if rollout_idx == 0: graph_manager.evaluate(EnvironmentSteps(1)) new_checkpoint = data_store.get_chkpoint_num('agent') if is_save_mp4_enabled: unsubscribe_from_save_mp4(EmptyRequest()) s3_writer.upload_to_s3() pause_physics(EmptyRequest()) data_store.load_from_store( expected_checkpoint_number=last_checkpoint + 1) graph_manager.restore_checkpoint() if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type\ == DistributedCoachSynchronizationType.ASYNC: if new_checkpoint > last_checkpoint: graph_manager.restore_checkpoint() last_checkpoint = new_checkpoint
def rollout_worker(graph_manager, num_workers, task_parameters, s3_writer): """ wait for first checkpoint then perform rollouts using the model """ if not graph_manager.data_store: raise AttributeError("None type for data_store object") data_store = graph_manager.data_store checkpoint_dir = task_parameters.checkpoint_restore_path wait_for_checkpoint(checkpoint_dir, data_store) wait_for_trainer_ready(checkpoint_dir, data_store) # Make the clients that will allow us to pause and unpause the physics rospy.wait_for_service('/gazebo/pause_physics') rospy.wait_for_service('/gazebo/unpause_physics') rospy.wait_for_service('/racecar/save_mp4/subscribe_to_save_mp4') rospy.wait_for_service('/racecar/save_mp4/unsubscribe_from_save_mp4') pause_physics = ServiceProxyWrapper('/gazebo/pause_physics', Empty) unpause_physics = ServiceProxyWrapper('/gazebo/unpause_physics', Empty) subscribe_to_save_mp4 = ServiceProxyWrapper('/racecar/save_mp4/subscribe_to_save_mp4', Empty) unsubscribe_from_save_mp4 = ServiceProxyWrapper('/racecar/save_mp4/unsubscribe_from_save_mp4', Empty) graph_manager.create_graph(task_parameters=task_parameters, stop_physics=pause_physics, start_physics=unpause_physics, empty_service_call=EmptyRequest) with graph_manager.phase_context(RunPhase.TRAIN): chkpt_state_reader = CheckpointStateReader(checkpoint_dir, checkpoint_state_optional=False) last_checkpoint = chkpt_state_reader.get_latest().num for level in graph_manager.level_managers: for agent in level.agents.values(): agent.memory.memory_backend.set_current_checkpoint(last_checkpoint) # this worker should play a fraction of the total playing steps per rollout act_steps = 1 while True: graph_manager.phase = RunPhase.TRAIN exit_if_trainer_done(checkpoint_dir, s3_writer) unpause_physics(EmptyRequest()) graph_manager.reset_internal_state(True) graph_manager.act(EnvironmentSteps(num_steps=act_steps), wait_for_full_episodes=graph_manager.agent_params.algorithm.act_for_full_episodes) graph_manager.reset_internal_state(True) time.sleep(1) pause_physics(EmptyRequest()) graph_manager.phase = RunPhase.UNDEFINED new_checkpoint = data_store.get_chkpoint_num('agent') if new_checkpoint and new_checkpoint > last_checkpoint: if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type\ == DistributedCoachSynchronizationType.SYNC: exit_if_trainer_done(checkpoint_dir, s3_writer) unpause_physics(EmptyRequest()) is_save_mp4_enabled = rospy.get_param('MP4_S3_BUCKET', None) if is_save_mp4_enabled: subscribe_to_save_mp4(EmptyRequest()) for _ in range(MIN_EVAL_TRIALS): graph_manager.evaluate(EnvironmentSteps(1)) if is_save_mp4_enabled: unsubscribe_from_save_mp4(EmptyRequest()) s3_writer.upload_to_s3() pause_physics(EmptyRequest()) if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type\ == DistributedCoachSynchronizationType.ASYNC: graph_manager.restore_checkpoint() last_checkpoint = new_checkpoint for level in graph_manager.level_managers: for agent in level.agents.values(): agent.memory.memory_backend.set_current_checkpoint(last_checkpoint)