def rollout_worker(graph_manager, data_store, num_workers, task_parameters):
    """
    wait for first checkpoint then perform rollouts using the model
    """
    if not data_store:
        raise AttributeError("None type for data_store object")

    checkpoint_dir = task_parameters.checkpoint_restore_path
    wait_for_checkpoint(checkpoint_dir, data_store)
    wait_for_trainer_ready(checkpoint_dir, data_store)
    # Make the clients that will allow us to pause and unpause the physics
    rospy.wait_for_service('/gazebo/pause_physics')
    rospy.wait_for_service('/gazebo/unpause_physics')
    pause_physics = ServiceProxyWrapper('/gazebo/pause_physics', Empty)
    unpause_physics = ServiceProxyWrapper('/gazebo/unpause_physics', Empty)
    graph_manager.create_graph(task_parameters=task_parameters,
                               stop_physics=pause_physics,
                               start_physics=unpause_physics,
                               empty_service_call=EmptyRequest)

    with graph_manager.phase_context(RunPhase.TRAIN):
        chkpt_state_reader = CheckpointStateReader(
            checkpoint_dir, checkpoint_state_optional=False)
        last_checkpoint = chkpt_state_reader.get_latest().num

        for level in graph_manager.level_managers:
            for agent in level.agents.values():
                agent.memory.memory_backend.set_current_checkpoint(
                    last_checkpoint)

        # this worker should play a fraction of the total playing steps per rollout
        act_steps = 1
        while True:
            exit_if_trainer_done(checkpoint_dir)
            unpause_physics(EmptyRequest())
            graph_manager.reset_internal_state(True)
            graph_manager.act(EnvironmentSteps(num_steps=act_steps),
                              wait_for_full_episodes=graph_manager.
                              agent_params.algorithm.act_for_full_episodes)
            graph_manager.reset_internal_state(True)
            time.sleep(1)
            pause_physics(EmptyRequest())

            new_checkpoint = data_store.get_latest_checkpoint()
            if new_checkpoint and new_checkpoint > last_checkpoint:
                if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.SYNC:
                    exit_if_trainer_done(checkpoint_dir)
                    data_store.load_from_store(
                        expected_checkpoint_number=last_checkpoint + 1)
                    graph_manager.restore_checkpoint()

                if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.ASYNC:
                    graph_manager.restore_checkpoint()

                last_checkpoint = new_checkpoint
                for level in graph_manager.level_managers:
                    for agent in level.agents.values():
                        agent.memory.memory_backend.set_current_checkpoint(
                            last_checkpoint)
def rollout_worker(graph_manager, num_workers, rollout_idx, task_parameters,
                   s3_writer):
    """
    wait for first checkpoint then perform rollouts using the model
    """
    if not graph_manager.data_store:
        raise AttributeError("None type for data_store object")

    data_store = graph_manager.data_store

    checkpoint_dir = task_parameters.checkpoint_restore_path
    wait_for_checkpoint(checkpoint_dir, data_store)
    wait_for_trainer_ready(checkpoint_dir, data_store)
    # Make the clients that will allow us to pause and unpause the physics
    rospy.wait_for_service('/gazebo/pause_physics')
    rospy.wait_for_service('/gazebo/unpause_physics')
    rospy.wait_for_service('/racecar/save_mp4/subscribe_to_save_mp4')
    rospy.wait_for_service('/racecar/save_mp4/unsubscribe_from_save_mp4')
    pause_physics = ServiceProxyWrapper('/gazebo/pause_physics', Empty)
    unpause_physics = ServiceProxyWrapper('/gazebo/unpause_physics', Empty)
    subscribe_to_save_mp4 = ServiceProxyWrapper(
        '/racecar/save_mp4/subscribe_to_save_mp4', Empty)
    unsubscribe_from_save_mp4 = ServiceProxyWrapper(
        '/racecar/save_mp4/unsubscribe_from_save_mp4', Empty)
    graph_manager.create_graph(task_parameters=task_parameters,
                               stop_physics=pause_physics,
                               start_physics=unpause_physics,
                               empty_service_call=EmptyRequest)

    chkpt_state_reader = CheckpointStateReader(checkpoint_dir,
                                               checkpoint_state_optional=False)
    last_checkpoint = chkpt_state_reader.get_latest().num

    # this worker should play a fraction of the total playing steps per rollout
    episode_steps_per_rollout = graph_manager.agent_params.algorithm.num_consecutive_playing_steps.num_steps
    act_steps = int(episode_steps_per_rollout / num_workers)
    if rollout_idx < episode_steps_per_rollout % num_workers:
        act_steps += 1
    act_steps = EnvironmentEpisodes(act_steps)

    configure_environment_randomizer()

    for _ in range(
        (graph_manager.improve_steps / act_steps.num_steps).num_steps):
        # Collect profiler information only IS_PROFILER_ON is true
        with utils.Profiler(s3_bucket=PROFILER_S3_BUCKET,
                            s3_prefix=PROFILER_S3_PREFIX,
                            output_local_path=ROLLOUT_WORKER_PROFILER_PATH,
                            enable_profiling=IS_PROFILER_ON):
            graph_manager.phase = RunPhase.TRAIN
            exit_if_trainer_done(checkpoint_dir, s3_writer, rollout_idx)
            unpause_physics(EmptyRequest())
            graph_manager.reset_internal_state(True)
            graph_manager.act(act_steps,
                              wait_for_full_episodes=graph_manager.
                              agent_params.algorithm.act_for_full_episodes)
            graph_manager.reset_internal_state(True)
            time.sleep(1)
            pause_physics(EmptyRequest())

            graph_manager.phase = RunPhase.UNDEFINED
            new_checkpoint = -1
            if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type\
                    == DistributedCoachSynchronizationType.SYNC:
                unpause_physics(EmptyRequest())
                is_save_mp4_enabled = rospy.get_param(
                    'MP4_S3_BUCKET', None) and rollout_idx == 0
                if is_save_mp4_enabled:
                    subscribe_to_save_mp4(EmptyRequest())
                if rollout_idx == 0:
                    for _ in range(MIN_EVAL_TRIALS):
                        graph_manager.evaluate(EnvironmentSteps(1))

                while new_checkpoint < last_checkpoint + 1:
                    exit_if_trainer_done(checkpoint_dir, s3_writer,
                                         rollout_idx)
                    if rollout_idx == 0:
                        graph_manager.evaluate(EnvironmentSteps(1))
                    new_checkpoint = data_store.get_chkpoint_num('agent')
                if is_save_mp4_enabled:
                    unsubscribe_from_save_mp4(EmptyRequest())
                s3_writer.upload_to_s3()

                pause_physics(EmptyRequest())
                data_store.load_from_store(
                    expected_checkpoint_number=last_checkpoint + 1)
                graph_manager.restore_checkpoint()

            if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type\
                    == DistributedCoachSynchronizationType.ASYNC:
                if new_checkpoint > last_checkpoint:
                    graph_manager.restore_checkpoint()

            last_checkpoint = new_checkpoint
Esempio n. 3
0
def rollout_worker(graph_manager, num_workers, task_parameters, s3_writer):
    """
    wait for first checkpoint then perform rollouts using the model
    """
    if not graph_manager.data_store:
        raise AttributeError("None type for data_store object")

    data_store = graph_manager.data_store

    checkpoint_dir = task_parameters.checkpoint_restore_path
    wait_for_checkpoint(checkpoint_dir, data_store)
    wait_for_trainer_ready(checkpoint_dir, data_store)
    # Make the clients that will allow us to pause and unpause the physics
    rospy.wait_for_service('/gazebo/pause_physics')
    rospy.wait_for_service('/gazebo/unpause_physics')
    rospy.wait_for_service('/racecar/save_mp4/subscribe_to_save_mp4')
    rospy.wait_for_service('/racecar/save_mp4/unsubscribe_from_save_mp4')
    pause_physics = ServiceProxyWrapper('/gazebo/pause_physics', Empty)
    unpause_physics = ServiceProxyWrapper('/gazebo/unpause_physics', Empty)
    subscribe_to_save_mp4 = ServiceProxyWrapper('/racecar/save_mp4/subscribe_to_save_mp4', Empty)
    unsubscribe_from_save_mp4 = ServiceProxyWrapper('/racecar/save_mp4/unsubscribe_from_save_mp4', Empty)
    graph_manager.create_graph(task_parameters=task_parameters, stop_physics=pause_physics,
                               start_physics=unpause_physics, empty_service_call=EmptyRequest)

    with graph_manager.phase_context(RunPhase.TRAIN):
        chkpt_state_reader = CheckpointStateReader(checkpoint_dir, checkpoint_state_optional=False)
        last_checkpoint = chkpt_state_reader.get_latest().num

        for level in graph_manager.level_managers:
            for agent in level.agents.values():
                agent.memory.memory_backend.set_current_checkpoint(last_checkpoint)

        # this worker should play a fraction of the total playing steps per rollout
        act_steps = 1
        while True:
            graph_manager.phase = RunPhase.TRAIN
            exit_if_trainer_done(checkpoint_dir, s3_writer)
            unpause_physics(EmptyRequest())
            graph_manager.reset_internal_state(True)
            graph_manager.act(EnvironmentSteps(num_steps=act_steps), wait_for_full_episodes=graph_manager.agent_params.algorithm.act_for_full_episodes)
            graph_manager.reset_internal_state(True)
            time.sleep(1)
            pause_physics(EmptyRequest())

            graph_manager.phase = RunPhase.UNDEFINED
            new_checkpoint = data_store.get_chkpoint_num('agent')
            if new_checkpoint and new_checkpoint > last_checkpoint:
                if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type\
                        == DistributedCoachSynchronizationType.SYNC:
                    exit_if_trainer_done(checkpoint_dir, s3_writer)
                    unpause_physics(EmptyRequest())
                    is_save_mp4_enabled = rospy.get_param('MP4_S3_BUCKET', None)
                    if is_save_mp4_enabled:
                        subscribe_to_save_mp4(EmptyRequest())
                    for _ in range(MIN_EVAL_TRIALS):
                        graph_manager.evaluate(EnvironmentSteps(1))
                    if is_save_mp4_enabled:
                        unsubscribe_from_save_mp4(EmptyRequest())
                    s3_writer.upload_to_s3()

                    pause_physics(EmptyRequest())
                if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type\
                        == DistributedCoachSynchronizationType.ASYNC:
                    graph_manager.restore_checkpoint()

                last_checkpoint = new_checkpoint
                for level in graph_manager.level_managers:
                    for agent in level.agents.values():
                        agent.memory.memory_backend.set_current_checkpoint(last_checkpoint)