Example #1
0
    def _setup_graph_manager(self, checkpoint, agent_list):
        """Sets up graph manager based on the checkpoint file and agents list.

        Args:
            checkpoint (Checkpoint): The model checkpoints we just downloaded.
            agent_list (list[Agent]): List of agents we want to setup graph manager for.
        """
        sm_hyperparams_dict = {}
        self._current_graph_manager, _ = get_graph_manager(hp_dict=sm_hyperparams_dict, agent_list=agent_list,
                                                           run_phase_subject=self._run_phase_subject,
                                                           enable_domain_randomization=self._enable_domain_randomization,
                                                           done_condition=self._done_condition,
                                                           pause_physics=self._model_updater.pause_physics_service,
                                                           unpause_physics=self._model_updater.unpause_physics_service)
        checkpoint_dict = dict()
        checkpoint_dict[self._agent_name] = checkpoint
        ds_params_instance = S3BotoDataStoreParameters(checkpoint_dict=checkpoint_dict)

        self._current_graph_manager.data_store = S3BotoDataStore(params=ds_params_instance,
                                                                 graph_manager=self._current_graph_manager,
                                                                 ignore_lock=True,
                                                                 log_and_cont=True)
        self._current_graph_manager.env_params.seed = 0

        self._current_graph_manager.data_store.wait_for_checkpoints()
        self._current_graph_manager.data_store.modify_checkpoint_variables()

        task_parameters = TaskParameters()
        task_parameters.checkpoint_restore_path = self._local_model_directory

        self._current_graph_manager.create_graph(task_parameters=task_parameters,
                                                 stop_physics=self._model_updater.pause_physics_service,
                                                 start_physics=self._model_updater.unpause_physics_service,
                                                 empty_service_call=EmptyRequest)
def evaluate(params):
    # file params
    experiment_path = os.path.join(params.output_data_dir)
    logger.experiment_path = os.path.join(experiment_path, 'evaluation')
    params.checkpoint_restore_dir = os.path.join(params.input_data_dir,
                                                 'checkpoint')
    checkpoint_file = os.path.join(params.checkpoint_restore_dir, 'checkpoint')

    inplace_change(checkpoint_file, "/opt/ml/output/data/checkpoint", ".")
    # Note that due to a tensorflow issue (https://github.com/tensorflow/tensorflow/issues/9146) we need to replace
    # the absolute path for the evaluation-from-a-checkpointed-model to work

    vis_params = VisualizationParameters()
    vis_params.dump_gifs = True

    task_params = TaskParameters(evaluate_only=True,
                                 experiment_path=logger.experiment_path)
    task_params.__dict__ = add_items_to_dict(task_params.__dict__,
                                             params.__dict__)

    graph_manager = BasicRLGraphManager(
        agent_params=ClippedPPOAgentParameters(),
        env_params=GymVectorEnvironment(level='TSP_env:TSPEasyEnv'),
        schedule_params=ScheduleParameters(),
        vis_params=vis_params)
    graph_manager = graph_manager.create_graph(task_parameters=task_params)
    graph_manager.evaluate(EnvironmentSteps(5))
def evaluate(params):
    # file params
    experiment_path = os.path.join(params.output_data_dir)
    logger.experiment_path = os.path.join(experiment_path, 'evaluation')
    params.checkpoint_restore_dir = os.path.join(params.input_data_dir, 'checkpoint')
    checkpoint_file = os.path.join(params.checkpoint_restore_dir, 'checkpoint')

    inplace_change(checkpoint_file, "/opt/ml/output/data/checkpoint", ".")
    # Note that due to a tensorflow issue (https://github.com/tensorflow/tensorflow/issues/9146) we need to replace
    # the absolute path for the evaluation-from-a-checkpointed-model to work

    vis_params = VisualizationParameters()
    vis_params.dump_gifs = True

    task_params = TaskParameters(evaluate_only=True, experiment_path=logger.experiment_path)
    task_params.__dict__ = add_items_to_dict(task_params.__dict__, params.__dict__)

    graph_manager = BasicRLGraphManager(
        agent_params=ClippedPPOAgentParameters(),
        env_params=GymVectorEnvironment(level='TSP_env:TSPEasyEnv'),
        schedule_params=ScheduleParameters(),
        vis_params=vis_params
    )
    graph_manager = graph_manager.create_graph(task_parameters=task_params)
    graph_manager.evaluate(EnvironmentSteps(5))
def evaluation_worker(graph_manager, number_of_trials, local_model_directory):
    # Initialize the graph
    task_parameters = TaskParameters()
    task_parameters.__dict__['checkpoint_restore_dir'] = local_model_directory
    graph_manager.create_graph(task_parameters)

    graph_manager.evaluate(EnvironmentEpisodes(number_of_trials))
Example #5
0
def rollout_worker(graph_manager, checkpoint_dir, data_store, num_workers):
    """
    wait for first checkpoint then perform rollouts using the model
    """
    utils.wait_for_checkpoint(checkpoint_dir)

    task_parameters = TaskParameters()
    task_parameters.__dict__['checkpoint_restore_dir'] = checkpoint_dir
    graph_manager.create_graph(task_parameters)
    with graph_manager.phase_context(RunPhase.TRAIN):
        error_compensation = random.randint(0, 5)
        act_steps = math.ceil(
            (graph_manager.agent_params.algorithm.
             num_consecutive_playing_steps.num_steps + error_compensation) /
            num_workers)

        for i in range(int(graph_manager.improve_steps.num_steps / act_steps)):
            graph_manager.act(
                EnvironmentEpisodes(num_steps=act_steps +
                                    random.randint(0, 5)))
            # This waits for the first checkpoint
            last_checkpoint = data_store.get_current_checkpoint_number()
            data_store.load_from_store(
                expected_checkpoint_number=last_checkpoint + 1)
            graph_manager.restore_checkpoint()
Example #6
0
    def solve(self, model, app_args, amc_cfg, services, steps_per_episode):
        msglogger.info("AMC: Using coach")

        # When we import the graph_manager from the ADC_DDPG preset, we implicitly instruct
        # Coach to create and use our DistillerWrapperEnvironment environment.
        # So Distiller calls Coach, which creates the environment, trains the agent, and ends.
        if amc_cfg.agent_algo == "DDPG":
            from examples.auto_compression.amc.rl_libs.coach.presets.ADC_DDPG import (
                graph_manager, agent_params)
            graph_manager.agent_params.exploration.noise_schedule = ExponentialSchedule(
                amc_cfg.ddpg_cfg.initial_training_noise, 0,
                amc_cfg.ddpg_cfg.training_noise_decay)
            # Number of iterations to train
            graph_manager.agent_params.algorithm.num_consecutive_training_steps = steps_per_episode
            #graph_manager.agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps(1)
            graph_manager.agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps(
                steps_per_episode)
            # Heatup
            graph_manager.heatup_steps = EnvironmentEpisodes(
                amc_cfg.ddpg_cfg.num_heatup_episodes)
            # Replay buffer size
            graph_manager.agent_params.memory.max_size = (
                MemoryGranularity.Transitions,
                amc_cfg.ddpg_cfg.replay_buffer_size)
            amc_cfg.ddpg_cfg.training_noise_decay = amc_cfg.ddpg_cfg.training_noise_decay**(
                1. / steps_per_episode)
        elif "ClippedPPO" in amc_cfg.agent_algo:
            from examples.auto_compression.amc.rl_libs.coach.presets.ADC_ClippedPPO import graph_manager, agent_params
        elif "TD3" in amc_cfg.agent_algo:
            from examples.auto_compression.amc.rl_libs.coach.presets.ADC_TD3 import graph_manager, agent_params
        else:
            raise ValueError(
                "The agent algorithm you are trying to use (%s) is not supported"
                % amc_cfg.amc_agent_algo)

        # Number of training steps
        n_training_episodes = amc_cfg.ddpg_cfg.num_training_episodes
        graph_manager.improve_steps = EnvironmentEpisodes(n_training_episodes)
        # Don't evaluate until the end
        graph_manager.steps_between_evaluation_periods = EnvironmentEpisodes(
            n_training_episodes)

        # These parameters are passed to the Distiller environment
        env_cfg = {
            'model': model,
            'app_args': app_args,
            'amc_cfg': amc_cfg,
            'services': services
        }
        graph_manager.env_params.additional_simulator_parameters = env_cfg

        coach_logs_dir = os.path.join(msglogger.logdir, 'coach')
        os.mkdir(coach_logs_dir)
        task_parameters = TaskParameters(experiment_path=coach_logs_dir)
        # Set Coach's PRNG seed
        if app_args.seed is not None:
            task_parameters.seed = app_args.seed
        graph_manager.create_graph(task_parameters)
        graph_manager.improve()
Example #7
0
def evaluation_worker(graph_manager, number_of_trials, local_model_directory):
    # initialize graph
    task_parameters = TaskParameters()
    task_parameters.__dict__['checkpoint_restore_dir'] = local_model_directory
    graph_manager.create_graph(task_parameters)

    with graph_manager.phase_context(RunPhase.TEST):
        # reset all the levels before starting to evaluate
        graph_manager.reset_internal_state(force_environment_reset=True)
        graph_manager.act(EnvironmentEpisodes(number_of_trials))
Example #8
0
def rollout_worker(graph_manager, checkpoint_dir, data_store, num_workers):
    """
    wait for first checkpoint then perform rollouts using the model
    """
    wait_for_checkpoint(checkpoint_dir)

    task_parameters = TaskParameters()
    task_parameters.__dict__['checkpoint_restore_dir'] = checkpoint_dir

    graph_manager.create_graph(task_parameters)
    with graph_manager.phase_context(RunPhase.TRAIN):

        last_checkpoint = 0

        act_steps = math.ceil(
            (graph_manager.agent_params.algorithm.
             num_consecutive_playing_steps.num_steps) / num_workers)

        for i in range(int(graph_manager.improve_steps.num_steps / act_steps)):

            if should_stop(checkpoint_dir):
                break

            if type(graph_manager.agent_params.algorithm.
                    num_consecutive_playing_steps) == EnvironmentSteps:
                graph_manager.act(EnvironmentSteps(num_steps=act_steps),
                                  wait_for_full_episodes=graph_manager.
                                  agent_params.algorithm.act_for_full_episodes)
            elif type(graph_manager.agent_params.algorithm.
                      num_consecutive_playing_steps) == EnvironmentEpisodes:
                graph_manager.act(EnvironmentEpisodes(num_steps=act_steps))

            new_checkpoint = get_latest_checkpoint(checkpoint_dir)

            if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.SYNC:
                while new_checkpoint < last_checkpoint + 1:
                    if should_stop(checkpoint_dir):
                        break
                    if data_store:
                        data_store.load_from_store()
                    new_checkpoint = get_latest_checkpoint(checkpoint_dir)

                graph_manager.restore_checkpoint()

            if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.ASYNC:
                if new_checkpoint > last_checkpoint:
                    graph_manager.restore_checkpoint()

            last_checkpoint = new_checkpoint
Example #9
0
    def create_graph(self, task_parameters: TaskParameters):
        self.task_parameters = task_parameters

        if isinstance(task_parameters, DistributedTaskParameters):
            screen.log_title("Creating graph - name: {} task id: {} type: {}".format(self.__class__.__name__,
                                                                                     task_parameters.task_index,
                                                                                     task_parameters.job_type))
        else:
            screen.log_title("Creating graph - name: {}".format(self.__class__.__name__))

        # "hide" the gpu if necessary
        if task_parameters.use_cpu:
            set_cpu()

        # create a target server for the worker and a device
        if isinstance(task_parameters, DistributedTaskParameters):
            task_parameters.worker_target, task_parameters.device = \
                self.create_worker_or_parameters_server(task_parameters=task_parameters)

        # create the graph modules
        self.level_managers, self.environments = self._create_graph(task_parameters)

        # set self as the parent of all the level managers
        self.top_level_manager = self.level_managers[0]
        for level_manager in self.level_managers:
            level_manager.parent_graph_manager = self

        # create a session (it needs to be created after all the graph ops were created)
        self.sess = None
        self.create_session(task_parameters=task_parameters)

        self._phase = self.phase = RunPhase.UNDEFINED

        self.setup_logger()
Example #10
0
    def run_graph_manager(self, graph_manager: 'GraphManager',
                          args: argparse.Namespace):
        if args.distributed_coach and not graph_manager.agent_params.algorithm.distributed_coach_synchronization_type:
            screen.error(
                "{} algorithm is not supported using distributed Coach.".
                format(graph_manager.agent_params.algorithm))

        if args.distributed_coach and args.checkpoint_save_secs and graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.SYNC:
            screen.warning(
                "The --checkpoint_save_secs or -s argument will be ignored as SYNC distributed coach sync type is used. Checkpoint will be saved every training iteration."
            )

        if args.distributed_coach and not args.checkpoint_save_secs and graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.ASYNC:
            screen.error(
                "Distributed coach with ASYNC distributed coach sync type requires --checkpoint_save_secs or -s."
            )

        # Intel optimized TF seems to run significantly faster when limiting to a single OMP thread.
        # This will not affect GPU runs.
        os.environ["OMP_NUM_THREADS"] = "1"

        # turn TF debug prints off
        if args.framework == Frameworks.tensorflow:
            os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(args.tf_verbosity)

        # turn off the summary at the end of the run if necessary
        if not args.no_summary and not args.distributed_coach:
            atexit.register(logger.summarize_experiment)
            screen.change_terminal_title(args.experiment_name)

        task_parameters = TaskParameters(
            framework_type=args.framework,
            evaluate_only=args.evaluate,
            experiment_path=args.experiment_path,
            seed=args.seed,
            use_cpu=args.use_cpu,
            checkpoint_save_secs=args.checkpoint_save_secs,
            checkpoint_restore_dir=args.checkpoint_restore_dir,
            checkpoint_save_dir=args.checkpoint_save_dir,
            export_onnx_graph=args.export_onnx_graph,
            apply_stop_condition=args.apply_stop_condition)

        # open dashboard
        if args.open_dashboard:
            open_dashboard(args.experiment_path)

        if args.distributed_coach and args.distributed_coach_run_type != RunType.ORCHESTRATOR:
            handle_distributed_coach_tasks(graph_manager, args,
                                           task_parameters)
            return

        if args.distributed_coach and args.distributed_coach_run_type == RunType.ORCHESTRATOR:
            handle_distributed_coach_orchestrator(args)
            return

        # Single-threaded runs
        if args.num_workers == 1:
            self.start_single_threaded(task_parameters, graph_manager, args)
        else:
            self.start_multi_threaded(graph_manager, args)
def test_basic_rl_graph_manager_with_cartpole_dqn():
    tf.reset_default_graph()
    from rl_coach.presets.CartPole_DQN import graph_manager
    assert graph_manager
    graph_manager.create_graph(
        task_parameters=TaskParameters(framework_type=Frameworks.tensorflow,
                                       experiment_path="./experiments/test"))
def test_basic_rl_graph_manager_with_lab_acer():
    tf.reset_default_graph()
    from rl_coach.presets.Lab_nav_maze_static_01_ACER import graph_manager
    assert graph_manager
    graph_manager.create_graph(
        task_parameters=TaskParameters(framework_type=Frameworks.tensorflow,
                                       experiment_path="./experiments/test"))
Example #13
0
def test_basic_rl_graph_manager_with_pong_nec():
    tf.reset_default_graph()
    from rl_coach.presets.Atari_NEC import graph_manager
    assert graph_manager
    graph_manager.env_params.level = "PongDeterministic-v4"
    graph_manager.create_graph(task_parameters=TaskParameters(
        framework_type="tensorflow", experiment_path="./experiments/test"))
Example #14
0
def test_get_QActionStateValue_predictions():
    tf.reset_default_graph()
    from rl_coach.presets.CartPole_DQN import graph_manager as cartpole_dqn_graph_manager
    assert cartpole_dqn_graph_manager
    cartpole_dqn_graph_manager.create_graph(task_parameters=TaskParameters(
        framework_type="tensorflow", experiment_path="./experiments/test"))
    cartpole_dqn_graph_manager.improve_steps.num_steps = 1
    cartpole_dqn_graph_manager.steps_between_evaluation_periods.num_steps = 5
Example #15
0
def coach_adc(model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn):
    task_parameters = TaskParameters(framework_type="tensorflow",
                                     experiment_path="./experiments/test")
    extra_params = {'save_checkpoint_secs': None,
                    'render': True}
    task_parameters.__dict__.update(extra_params)

    # Create a dictionary of parameters that Coach will handover to CNNEnvironment
    # Once it creates it.
    if True:
        exploration_noise = 0.5
        #exploration_noise = 0.25
        exploitation_decay = 0.996
        graph_manager.env_params.additional_simulator_parameters = {
            'model': model,
            'dataset': dataset,
            'arch': arch,
            'data_loader': data_loader,
            'validate_fn': validate_fn,
            'save_checkpoint_fn': save_checkpoint_fn,
            #'action_range': (0.10, 0.95),
            'action_range': (0.70, 0.95),
            'onehot_encoding': False,
            'normalize_obs': True,
            'desired_reduction': None,
            'reward_fn': lambda top1, top5, vloss, total_macs: -1 * (1-top5/100) * math.log(total_macs)
            #'reward_fn': lambda top1, total_macs: -1 * (1-top1/100) * math.log(total_macs)
            #'reward_fn': lambda top1, total_macs: -1 * max(1-top1/100, 0.25) * math.log(total_macs)
            #'reward_fn': lambda top1, total_macs: -1 * (1-top1/100) * math.log(total_macs/100000)
            #'reward_fn': lambda top1, total_macs:  top1/100 * total_macs/self.dense_model_macs
        }
    else:
        exploration_noise = 0.5
        #exploration_noise = 0.25
        exploitation_decay = 0.996
        graph_manager.env_params.additional_simulator_parameters = {
            'model': model,
            'dataset': dataset,
            'arch': arch,
            'data_loader': data_loader,
            'validate_fn': validate_fn,
            'save_checkpoint_fn': save_checkpoint_fn,
            'action_range': (0.10, 0.95),
            'onehot_encoding': False,
            'normalize_obs': True,
            'desired_reduction': 1.5e8,
            'reward_fn': lambda top1, total_macs: top1/100
            #'reward_fn': lambda top1, total_macs: min(top1/100, 0.75)
        }

    #msglogger.debug('Experiment configuarion:\n' + json.dumps(graph_manager.env_params.additional_simulator_parameters, indent=2))
    steps_per_episode = 13
    agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([(ConstantSchedule(exploration_noise),
                                                                             EnvironmentSteps(100*steps_per_episode)),
                                                                            (ExponentialSchedule(exploration_noise, 0, exploitation_decay),
                                                                             EnvironmentSteps(300*steps_per_episode))])
    graph_manager.create_graph(task_parameters)
    graph_manager.improve()
Example #16
0
    def create_graph(self,
                     task_parameters=TaskParameters(),
                     stop_physics=None,
                     start_physics=None,
                     empty_service_call=None):
        self.graph_creation_time = time.time()
        self.task_parameters = task_parameters

        if isinstance(task_parameters, DistributedTaskParameters):
            screen.log_title(
                "Creating graph - name: {} task id: {} type: {}".format(
                    self.__class__.__name__, task_parameters.task_index,
                    task_parameters.job_type))
        else:
            screen.log_title("Creating graph - name: {}".format(
                self.__class__.__name__))

        # "hide" the gpu if necessary
        if task_parameters.use_cpu:
            set_cpu()

        # create a target server for the worker and a device
        if isinstance(task_parameters, DistributedTaskParameters):
            task_parameters.worker_target, task_parameters.device = \
                self.create_worker_or_parameters_server(task_parameters=task_parameters)
        # If necessary start the physics and then stop it after agent creation
        screen.log_title("Start physics before creating graph")
        if start_physics and empty_service_call:
            start_physics(empty_service_call())
        # create the graph modules
        screen.log_title("Create graph")
        self.level_managers, self.environments = self._create_graph(
            task_parameters)
        screen.log_title("Stop physics after creating graph")
        if stop_physics and empty_service_call:
            stop_physics(empty_service_call())
        # set self as the parent of all the level managers
        self.top_level_manager = self.level_managers[0]
        for level_manager in self.level_managers:
            level_manager.parent_graph_manager = self

        import smdebug.tensorflow as smd
        self.smdebug_hook = smd.SessionHook.create_from_json_file()
        self.smdebug_hook.set_mode(smd.modes.TRAIN)

        # create a session (it needs to be created after all the graph ops were created)
        self.sess = {
            agent_params.name: None
            for agent_params in self.agents_params
        }
        screen.log_title("Creating session")
        self.create_session(task_parameters=task_parameters)
        self._phase = self.phase = RunPhase.UNDEFINED

        self.setup_logger()

        return self
def test_get_QActionStateValue_predictions_lab():
    tf.reset_default_graph()
    from rl_coach.presets.Lab_nav_maze_static_01_ACER import graph_manager
    assert graph_manager
    graph_manager.create_graph(
        task_parameters=TaskParameters(framework_type=Frameworks.tensorflow,
                                       experiment_path="./experiments/test"))
    graph_manager.improve_steps.num_steps = 1
    graph_manager.steps_between_evaluation_periods.num_steps = 5
def test_basic_rl_graph_manager_with_cartpole_dqn_and_repeated_checkpoint_restore(
):
    tf.reset_default_graph()
    from rl_coach.presets.CartPole_DQN import graph_manager
    assert graph_manager
    graph_manager.create_graph(
        task_parameters=TaskParameters(framework_type=Frameworks.tensorflow,
                                       experiment_path="./experiments/test",
                                       apply_stop_condition=True))
Example #19
0
def training_worker(graph_manager, checkpoint_dir):
    """
    restore a checkpoint then perform rollouts using the restored model
    """
    # initialize graph
    task_parameters = TaskParameters()
    task_parameters.__dict__['checkpoint_save_dir'] = checkpoint_dir
    task_parameters.__dict__['checkpoint_save_secs'] = 20
    graph_manager.create_graph(task_parameters)

    # save randomly initialized graph
    graph_manager.save_checkpoint()

    # training loop
    steps = 0

    # evaluation offset
    eval_offset = 1

    graph_manager.setup_memory_backend()

    while (steps < graph_manager.improve_steps.num_steps):

        graph_manager.phase = core_types.RunPhase.TRAIN
        graph_manager.fetch_from_worker(
            graph_manager.agent_params.algorithm.num_consecutive_playing_steps)
        graph_manager.phase = core_types.RunPhase.UNDEFINED

        if graph_manager.should_train():
            steps += 1

            graph_manager.phase = core_types.RunPhase.TRAIN
            graph_manager.train()
            graph_manager.phase = core_types.RunPhase.UNDEFINED

            if steps * graph_manager.agent_params.algorithm.num_consecutive_playing_steps.num_steps > graph_manager.steps_between_evaluation_periods.num_steps * eval_offset:
                eval_offset += 1
                if graph_manager.evaluate(graph_manager.evaluation_steps):
                    break

            if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.SYNC:
                graph_manager.save_checkpoint()
            else:
                graph_manager.occasionally_save_checkpoint()
def evaluation_worker(graph_manager, number_of_trials, local_model_directory):
    # initialize graph
    task_parameters = TaskParameters(evaluate_only=True)
    task_parameters.__dict__['checkpoint_restore_dir'] = local_model_directory
    graph_manager.create_graph(task_parameters)
    graph_manager.reset_internal_state()

    data_store = graph_manager.data_store

    episodes_counter = Counter()

    try:
        # This will only work for DeepRacerRacetrackEnv enviroments
        graph_manager.top_level_manager.environment.env.env.set_allow_servo_step_signals(
            True)
    except Exception as ex:
        print("[ERROR] Method not defined in enviroment class: {}".format(ex))

    while True:
        # Get current checkpoint number
        current_checkpoint = get_latest_checkpoint(local_model_directory)

        # Register the checkpoint with the environment for logging
        graph_manager.top_level_manager.environment.env.env.set_checkpoint_num(
            current_checkpoint)

        while episodes_counter[current_checkpoint] < 15:
            graph_manager.evaluate(EnvironmentEpisodes(1))
            episodes_counter[current_checkpoint] += 1

        latest_checkpoint = data_store.get_latest_checkpoint()
        if latest_checkpoint:
            if latest_checkpoint > current_checkpoint:
                data_store.get_a_particular_model(
                    checkpoint_number=current_checkpoint + 1)
                graph_manager.restore_checkpoint()

        if should_stop(local_model_directory):
            break

    # Close the down the job
    graph_manager.top_level_manager.environment.env.env.cancel_simulation_job()
Example #21
0
def coach_adc(model, dataset, arch, optimizer_data, validate_fn, save_checkpoint_fn, train_fn):
    # task_parameters = TaskParameters(framework_type="tensorflow",
    #                                  experiment_path="./experiments/test")
    # extra_params = {'save_checkpoint_secs': None,
    #                 'render': True}
    # task_parameters.__dict__.update(extra_params)
    task_parameters = TaskParameters(experiment_path=logger.get_experiment_path('adc'))
    conv_cnt = count_conv_layer(model)

    # Create a dictionary of parameters that Coach will handover to CNNEnvironment
    # Once it creates it.
    services = distiller.utils.MutableNamedTuple({
                'validate_fn': validate_fn,
                'save_checkpoint_fn': save_checkpoint_fn,
                'train_fn': train_fn})

    app_args = distiller.utils.MutableNamedTuple({
                'dataset': dataset,
                'arch': arch,
                'optimizer_data': optimizer_data})
    if True:
        amc_cfg = distiller.utils.MutableNamedTuple({
                #'action_range': (0.20, 0.95),
                'action_range': (0.20, 0.80),
                'onehot_encoding': False,
                'normalize_obs': True,
                'desired_reduction': None,
                'reward_fn': lambda top1, top5, vloss, total_macs: -1 * (1-top1/100) * math.log(total_macs),
                'conv_cnt': conv_cnt,
                'max_reward': -1000})
    else:
        amc_cfg = distiller.utils.MutableNamedTuple({
                'action_range': (0.10, 0.95),
                'onehot_encoding': False,
                'normalize_obs': True,
                'desired_reduction': 1.5e8,
                'reward_fn': lambda top1, top5, vloss, total_macs: top1/100,
                #'reward_fn': lambda top1, total_macs: min(top1/100, 0.75),
                'conv_cnt': conv_cnt,
                'max_reward': -1000})

    # These parameters are passed to the Distiller environment
    graph_manager.env_params.additional_simulator_parameters = {'model': model,
                                                                'app_args': app_args,
                                                                'amc_cfg': amc_cfg,
                                                                'services': services}
    exploration_noise = 0.5
    exploitation_decay = 0.996
    steps_per_episode = conv_cnt
    agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([
        (ConstantSchedule(exploration_noise), EnvironmentSteps(100*steps_per_episode)),
        (ExponentialSchedule(exploration_noise, 0, exploitation_decay), EnvironmentSteps(300*steps_per_episode))])
    graph_manager.create_graph(task_parameters)
    graph_manager.improve()
def evaluation_worker(graph_manager, number_of_trials, local_model_directory):
    # initialize graph
    task_parameters = TaskParameters(evaluate_only=True)
    task_parameters.__dict__['checkpoint_restore_dir'] = local_model_directory
    graph_manager.create_graph(task_parameters)

    try:
        # This will only work for DeepRacerRacetrackEnv enviroments
        graph_manager.top_level_manager.environment.env.env.set_allow_servo_step_signals(True)
    except Exception as ex:
        print("[ERROR] Method not defined in enviroment class: {}".format(ex))

    curr_num_trials = 0

    while curr_num_trials < number_of_trials:
        graph_manager.evaluate(EnvironmentSteps(1))
        curr_num_trials += 1

    # Close the down the job
    graph_manager.top_level_manager.environment.env.env.cancel_simulation_job()
Example #23
0
    def start_single_threaded(self, graph_manager: 'GraphManager', args: argparse.Namespace):
        # Start the training or evaluation
        task_parameters = TaskParameters(
            framework_type=args.framework,
            evaluate_only=args.evaluate,
            experiment_path=args.experiment_path,
            seed=args.seed,
            use_cpu=args.use_cpu,
            checkpoint_save_secs=args.checkpoint_save_secs,
            checkpoint_restore_dir=args.checkpoint_restore_dir,
            checkpoint_save_dir=args.checkpoint_save_dir,
            export_onnx_graph=args.export_onnx_graph
        )

        start_graph(graph_manager=graph_manager, task_parameters=task_parameters)
Example #24
0
def do_adc_internal(model, args, optimizer_data, validate_fn,
                    save_checkpoint_fn, train_fn):
    dataset = args.dataset
    arch = args.arch
    perform_thinning = True  # args.amc_thinning
    num_ft_epochs = args.amc_ft_epochs
    action_range = args.amc_action_range
    np.random.seed()
    conv_cnt = count_conv_layer(model)

    msglogger.info("Executing AMC: RL agent - %s   RL library - %s",
                   args.amc_agent_algo, RLLIB)

    # Create a dictionary of parameters that Coach will handover to DistillerWrapperEnvironment
    # Once it creates it.
    services = distiller.utils.MutableNamedTuple({
        'validate_fn': validate_fn,
        'save_checkpoint_fn': save_checkpoint_fn,
        'train_fn': train_fn
    })

    app_args = distiller.utils.MutableNamedTuple({
        'dataset':
        dataset,
        'arch':
        arch,
        'optimizer_data':
        optimizer_data
    })

    amc_cfg = distiller.utils.MutableNamedTuple({
        'protocol':
        args.amc_protocol,
        'agent_algo':
        args.amc_agent_algo,
        'perform_thinning':
        perform_thinning,
        'num_ft_epochs':
        num_ft_epochs,
        'action_range':
        action_range,
        'conv_cnt':
        conv_cnt,
        'reward_frequency':
        args.amc_reward_frequency
    })

    #net_wrapper = NetworkWrapper(model, app_args, services)
    #return sample_networks(net_wrapper, services)

    if args.amc_protocol == "accuracy-guaranteed":
        amc_cfg.target_density = None
        amc_cfg.reward_fn = lambda env, top1, top5, vloss, total_macs: -(
            1 - top1 / 100) * math.log(total_macs)
        amc_cfg.action_constrain_fn = None
    elif args.amc_protocol == "mac-constrained":
        amc_cfg.target_density = args.amc_target_density
        amc_cfg.reward_fn = lambda env, top1, top5, vloss, total_macs: top1 / 100  #(90.5 - top1) / 10
        amc_cfg.action_constrain_fn = DistillerWrapperEnvironment.get_action
    elif args.amc_protocol == "mac-constrained-experimental":
        amc_cfg.target_density = args.amc_target_density
        amc_cfg.reward_fn = experimental_reward_fn
        amc_cfg.action_constrain_fn = None
    else:
        raise ValueError("{} is not supported currently".format(
            args.amc_protocol))

    steps_per_episode = conv_cnt
    if args.amc_agent_algo == "DDPG":
        amc_cfg.heatup_noise = 0.5
        amc_cfg.initial_training_noise = 0.5
        amc_cfg.training_noise_decay = 0.996  # 0.998
        amc_cfg.num_heatup_epochs = args.amc_heatup_epochs
        amc_cfg.num_training_epochs = args.amc_training_epochs
        training_noise_duration = amc_cfg.num_training_epochs * steps_per_episode
        heatup_duration = amc_cfg.num_heatup_epochs * steps_per_episode

    if amc_cfg.agent_algo == "Random-policy":
        return random_agent(
            DistillerWrapperEnvironment(model, app_args, amc_cfg, services))

    if RLLIB == "spinup":
        msglogger.info("AMC: Using spinup")
        env1 = DistillerWrapperEnvironment(model, app_args, amc_cfg, services)
        env2 = DistillerWrapperEnvironment(model, app_args, amc_cfg, services)
        ddpg_spinup(env1, env2)
    else:
        msglogger.info("AMC: Using coach")

        # When we import the graph_manager from the ADC_DDPG preset, we implicitly instruct
        # Coach to create and use our DistillerWrapperEnvironment environment.
        # So Distiller calls Coach, which creates the environment, trains the agent, and ends.
        if args.amc_agent_algo == "DDPG":
            from examples.automated_deep_compression.presets.ADC_DDPG import graph_manager, agent_params
            agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule(
                [(ConstantSchedule(amc_cfg.heatup_noise),
                  EnvironmentSteps(heatup_duration)),
                 (ExponentialSchedule(amc_cfg.initial_training_noise, 0,
                                      amc_cfg.training_noise_decay),
                  EnvironmentSteps(training_noise_duration))])
            # agent_params.exploration.noise_percentage_schedule = ConstantSchedule(0)
        elif "ClippedPPO" in args.amc_agent_algo:
            from examples.automated_deep_compression.presets.ADC_ClippedPPO import graph_manager, agent_params

        # These parameters are passed to the Distiller environment
        graph_manager.env_params.additional_simulator_parameters = {
            'model': model,
            'app_args': app_args,
            'amc_cfg': amc_cfg,
            'services': services
        }

        coach_logs_dir = os.path.join(msglogger.logdir, 'coach')
        os.mkdir(coach_logs_dir)
        task_parameters = TaskParameters(experiment_path=coach_logs_dir)
        graph_manager.create_graph(task_parameters)
        graph_manager.improve()
Example #25
0
def training_worker(graph_manager, checkpoint_dir, use_pretrained_model,
                    framework):
    """
    restore a checkpoint then perform rollouts using the restored model
    """
    # initialize graph
    task_parameters = TaskParameters()
    task_parameters.__dict__['checkpoint_save_dir'] = checkpoint_dir
    task_parameters.__dict__['checkpoint_save_secs'] = 20
    task_parameters.__dict__['experiment_path'] = SM_MODEL_OUTPUT_DIR

    if framework.lower() == "mxnet":
        task_parameters.framework_type = Frameworks.mxnet
        if hasattr(graph_manager, 'agent_params'):
            for network_parameters in graph_manager.agent_params.network_wrappers.values(
            ):
                network_parameters.framework = Frameworks.mxnet
        elif hasattr(graph_manager, 'agents_params'):
            for ap in graph_manager.agents_params:
                for network_parameters in ap.network_wrappers.values():
                    network_parameters.framework = Frameworks.mxnet

    if use_pretrained_model:
        task_parameters.__dict__[
            'checkpoint_restore_dir'] = PRETRAINED_MODEL_DIR

    graph_manager.create_graph(task_parameters)

    # save randomly initialized graph
    graph_manager.save_checkpoint()

    # training loop
    steps = 0
    graph_manager.setup_memory_backend()

    # To handle SIGTERM
    door_man = DoorMan()

    try:
        while (steps < graph_manager.improve_steps.num_steps):
            graph_manager.phase = core_types.RunPhase.TRAIN
            graph_manager.fetch_from_worker(
                graph_manager.agent_params.algorithm.
                num_consecutive_playing_steps)
            graph_manager.phase = core_types.RunPhase.UNDEFINED

            if graph_manager.should_train():
                steps += graph_manager.agent_params.algorithm.num_consecutive_playing_steps.num_steps

                graph_manager.phase = core_types.RunPhase.TRAIN
                graph_manager.train()
                graph_manager.phase = core_types.RunPhase.UNDEFINED

                if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.SYNC:
                    graph_manager.save_checkpoint()
                else:
                    graph_manager.occasionally_save_checkpoint()

            if door_man.terminate_now:
                "Received SIGTERM. Checkpointing before exiting."
                graph_manager.save_checkpoint()
                break

    except Exception as e:
        raise RuntimeError("An error occured while training: %s" % e)
    finally:
        print("Terminating training worker")
        graph_manager.data_store.upload_finished_file()
Example #26
0
def main():
    """ Main function for tournament worker """
    parser = argparse.ArgumentParser()
    parser.add_argument('-p',
                        '--preset',
                        help="(string) Name of a preset to run \
                             (class name from the 'presets' directory.)",
                        type=str,
                        required=False)
    parser.add_argument('--s3_bucket',
                        help='list(string) S3 bucket',
                        type=str,
                        nargs='+',
                        default=rospy.get_param("MODEL_S3_BUCKET",
                                                ["gsaur-test"]))
    parser.add_argument('--s3_prefix',
                        help='list(string) S3 prefix',
                        type=str,
                        nargs='+',
                        default=rospy.get_param("MODEL_S3_PREFIX",
                                                ["sagemaker"]))
    parser.add_argument('--aws_region',
                        help='(string) AWS region',
                        type=str,
                        default=rospy.get_param("AWS_REGION", "us-east-1"))
    parser.add_argument('--number_of_trials',
                        help='(integer) Number of trials',
                        type=int,
                        default=int(rospy.get_param("NUMBER_OF_TRIALS", 10)))
    parser.add_argument(
        '-c',
        '--local_model_directory',
        help='(string) Path to a folder containing a checkpoint \
                             to restore the model from.',
        type=str,
        default='./checkpoint')
    parser.add_argument('--number_of_resets',
                        help='(integer) Number of resets',
                        type=int,
                        default=int(rospy.get_param("NUMBER_OF_RESETS", 0)))
    parser.add_argument('--penalty_seconds',
                        help='(float) penalty second',
                        type=float,
                        default=float(rospy.get_param("PENALTY_SECONDS", 2.0)))
    parser.add_argument('--job_type',
                        help='(string) job type',
                        type=str,
                        default=rospy.get_param("JOB_TYPE", "EVALUATION"))
    parser.add_argument('--is_continuous',
                        help='(boolean) is continous after lap completion',
                        type=bool,
                        default=utils.str2bool(
                            rospy.get_param("IS_CONTINUOUS", False)))
    parser.add_argument('--race_type',
                        help='(string) Race type',
                        type=str,
                        default=rospy.get_param("RACE_TYPE", "TIME_TRIAL"))
    parser.add_argument('--off_track_penalty',
                        help='(float) off track penalty second',
                        type=float,
                        default=float(rospy.get_param("OFF_TRACK_PENALTY",
                                                      2.0)))
    parser.add_argument('--collision_penalty',
                        help='(float) collision penalty second',
                        type=float,
                        default=float(rospy.get_param("COLLISION_PENALTY",
                                                      5.0)))

    args = parser.parse_args()
    arg_s3_bucket = args.s3_bucket
    arg_s3_prefix = args.s3_prefix
    logger.info("S3 bucket: %s \n S3 prefix: %s", arg_s3_bucket, arg_s3_prefix)

    # tournament_worker: names to be displayed in MP4.
    # This is racer alias in tournament worker case.
    display_names = rospy.get_param('DISPLAY_NAME', "")

    metrics_s3_buckets = rospy.get_param('METRICS_S3_BUCKET')
    metrics_s3_object_keys = rospy.get_param('METRICS_S3_OBJECT_KEY')

    arg_s3_bucket, arg_s3_prefix = utils.force_list(
        arg_s3_bucket), utils.force_list(arg_s3_prefix)
    metrics_s3_buckets = utils.force_list(metrics_s3_buckets)
    metrics_s3_object_keys = utils.force_list(metrics_s3_object_keys)

    validate_list = [
        arg_s3_bucket, arg_s3_prefix, metrics_s3_buckets,
        metrics_s3_object_keys
    ]

    simtrace_s3_bucket = rospy.get_param('SIMTRACE_S3_BUCKET', None)
    mp4_s3_bucket = rospy.get_param('MP4_S3_BUCKET', None)
    if simtrace_s3_bucket:
        simtrace_s3_object_prefix = rospy.get_param('SIMTRACE_S3_PREFIX')
        simtrace_s3_bucket = utils.force_list(simtrace_s3_bucket)
        simtrace_s3_object_prefix = utils.force_list(simtrace_s3_object_prefix)
        validate_list.extend([simtrace_s3_bucket, simtrace_s3_object_prefix])
    if mp4_s3_bucket:
        mp4_s3_object_prefix = rospy.get_param('MP4_S3_OBJECT_PREFIX')
        mp4_s3_bucket = utils.force_list(mp4_s3_bucket)
        mp4_s3_object_prefix = utils.force_list(mp4_s3_object_prefix)
        validate_list.extend([mp4_s3_bucket, mp4_s3_object_prefix])

    if not all([lambda x: len(x) == len(validate_list[0]), validate_list]):
        utils.log_and_exit(
            "Eval worker error: Incorrect arguments passed: {}".format(
                validate_list), utils.SIMAPP_SIMULATION_WORKER_EXCEPTION,
            utils.SIMAPP_EVENT_ERROR_CODE_500)
    if args.number_of_resets != 0 and args.number_of_resets < MIN_RESET_COUNT:
        raise GenericRolloutException(
            "number of resets is less than {}".format(MIN_RESET_COUNT))

    # Instantiate Cameras
    if len(arg_s3_bucket) == 1:
        configure_camera(namespaces=['racecar'])
    else:
        configure_camera(namespaces=[
            'racecar_{}'.format(str(agent_index))
            for agent_index in range(len(arg_s3_bucket))
        ])

    agent_list = list()
    s3_bucket_dict = dict()
    s3_prefix_dict = dict()
    s3_writers = list()

    # tournament_worker: list of required S3 locations
    simtrace_s3_bucket_dict = dict()
    simtrace_s3_prefix_dict = dict()
    metrics_s3_bucket_dict = dict()
    metrics_s3_obect_key_dict = dict()
    mp4_s3_bucket_dict = dict()
    mp4_s3_object_prefix_dict = dict()

    for agent_index, s3_bucket_val in enumerate(arg_s3_bucket):
        agent_name = 'agent' if len(arg_s3_bucket) == 1 else 'agent_{}'.format(
            str(agent_index))
        racecar_name = 'racecar' if len(
            arg_s3_bucket) == 1 else 'racecar_{}'.format(str(agent_index))
        s3_bucket_dict[agent_name] = arg_s3_bucket[agent_index]
        s3_prefix_dict[agent_name] = arg_s3_prefix[agent_index]

        # tournament_worker: remap key with agent_name instead of agent_index for list of S3 locations.
        simtrace_s3_bucket_dict[agent_name] = simtrace_s3_bucket[agent_index]
        simtrace_s3_prefix_dict[agent_name] = simtrace_s3_object_prefix[
            agent_index]
        metrics_s3_bucket_dict[agent_name] = metrics_s3_buckets[agent_index]
        metrics_s3_obect_key_dict[agent_name] = metrics_s3_object_keys[
            agent_index]
        mp4_s3_bucket_dict[agent_name] = mp4_s3_bucket[agent_index]
        mp4_s3_object_prefix_dict[agent_name] = mp4_s3_object_prefix[
            agent_index]

        s3_client = SageS3Client(bucket=arg_s3_bucket[agent_index],
                                 s3_prefix=arg_s3_prefix[agent_index],
                                 aws_region=args.aws_region)

        # Load the model metadata
        if not os.path.exists(os.path.join(CUSTOM_FILES_PATH, agent_name)):
            os.makedirs(os.path.join(CUSTOM_FILES_PATH, agent_name))
        model_metadata_local_path = os.path.join(
            os.path.join(CUSTOM_FILES_PATH, agent_name), 'model_metadata.json')
        utils.load_model_metadata(
            s3_client,
            os.path.normpath("%s/model/model_metadata.json" %
                             arg_s3_prefix[agent_index]),
            model_metadata_local_path)
        # Handle backward compatibility
        _, _, version = parse_model_metadata(model_metadata_local_path)
        if float(version) < float(utils.SIMAPP_VERSION) and \
        not utils.has_current_ckpnt_name(arg_s3_bucket[agent_index], arg_s3_prefix[agent_index], args.aws_region):
            utils.make_compatible(arg_s3_bucket[agent_index],
                                  arg_s3_prefix[agent_index], args.aws_region,
                                  SyncFiles.TRAINER_READY.value)

        # Select the optimal model
        utils.do_model_selection(s3_bucket=arg_s3_bucket[agent_index],
                                 s3_prefix=arg_s3_prefix[agent_index],
                                 region=args.aws_region)

        # Download hyperparameters from SageMaker
        if not os.path.exists(agent_name):
            os.makedirs(agent_name)
        hyperparameters_file_success = False
        hyperparams_s3_key = os.path.normpath(arg_s3_prefix[agent_index] +
                                              "/ip/hyperparameters.json")
        hyperparameters_file_success = s3_client.download_file(
            s3_key=hyperparams_s3_key,
            local_path=os.path.join(agent_name, "hyperparameters.json"))
        sm_hyperparams_dict = {}
        if hyperparameters_file_success:
            logger.info("Received Sagemaker hyperparameters successfully!")
            with open(os.path.join(agent_name,
                                   "hyperparameters.json")) as file:
                sm_hyperparams_dict = json.load(file)
        else:
            logger.info("SageMaker hyperparameters not found.")

        agent_config = {
            'model_metadata': model_metadata_local_path,
            ConfigParams.CAR_CTRL_CONFIG.value: {
                ConfigParams.LINK_NAME_LIST.value: [
                    link_name.replace('racecar', racecar_name)
                    for link_name in LINK_NAMES
                ],
                ConfigParams.VELOCITY_LIST.value: [
                    velocity_topic.replace('racecar', racecar_name)
                    for velocity_topic in VELOCITY_TOPICS
                ],
                ConfigParams.STEERING_LIST.value: [
                    steering_topic.replace('racecar', racecar_name)
                    for steering_topic in STEERING_TOPICS
                ],
                ConfigParams.CHANGE_START.value:
                utils.str2bool(rospy.get_param('CHANGE_START_POSITION',
                                               False)),
                ConfigParams.ALT_DIR.value:
                utils.str2bool(
                    rospy.get_param('ALTERNATE_DRIVING_DIRECTION', False)),
                ConfigParams.ACTION_SPACE_PATH.value:
                'custom_files/' + agent_name + '/model_metadata.json',
                ConfigParams.REWARD.value:
                reward_function,
                ConfigParams.AGENT_NAME.value:
                racecar_name,
                ConfigParams.VERSION.value:
                version,
                ConfigParams.NUMBER_OF_RESETS.value:
                args.number_of_resets,
                ConfigParams.PENALTY_SECONDS.value:
                args.penalty_seconds,
                ConfigParams.NUMBER_OF_TRIALS.value:
                args.number_of_trials,
                ConfigParams.IS_CONTINUOUS.value:
                args.is_continuous,
                ConfigParams.RACE_TYPE.value:
                args.race_type,
                ConfigParams.COLLISION_PENALTY.value:
                args.collision_penalty,
                ConfigParams.OFF_TRACK_PENALTY.value:
                args.off_track_penalty
            }
        }

        metrics_s3_config = {
            MetricsS3Keys.METRICS_BUCKET.value:
            metrics_s3_buckets[agent_index],
            MetricsS3Keys.METRICS_KEY.value:
            metrics_s3_object_keys[agent_index],
            # Replaced rospy.get_param('AWS_REGION') to be equal to the argument being passed
            # or default argument set
            MetricsS3Keys.REGION.value:
            args.aws_region,
            # Replaced rospy.get_param('MODEL_S3_BUCKET') to be equal to the argument being passed
            # or default argument set
            MetricsS3Keys.STEP_BUCKET.value:
            arg_s3_bucket[agent_index],
            # Replaced rospy.get_param('MODEL_S3_PREFIX') to be equal to the argument being passed
            # or default argument set
            MetricsS3Keys.STEP_KEY.value:
            os.path.join(arg_s3_prefix[agent_index],
                         EVALUATION_SIMTRACE_DATA_S3_OBJECT_KEY)
        }
        aws_region = rospy.get_param('AWS_REGION', args.aws_region)
        s3_writer_job_info = []
        if simtrace_s3_bucket:
            s3_writer_job_info.append(
                IterationData(
                    'simtrace', simtrace_s3_bucket[agent_index],
                    simtrace_s3_object_prefix[agent_index], aws_region,
                    os.path.join(
                        ITERATION_DATA_LOCAL_FILE_PATH, agent_name,
                        IterationDataLocalFileNames.
                        SIM_TRACE_EVALUATION_LOCAL_FILE.value)))
        if mp4_s3_bucket:
            s3_writer_job_info.extend([
                IterationData(
                    'pip', mp4_s3_bucket[agent_index],
                    mp4_s3_object_prefix[agent_index], aws_region,
                    os.path.join(
                        ITERATION_DATA_LOCAL_FILE_PATH, agent_name,
                        IterationDataLocalFileNames.
                        CAMERA_PIP_MP4_VALIDATION_LOCAL_PATH.value)),
                IterationData(
                    '45degree', mp4_s3_bucket[agent_index],
                    mp4_s3_object_prefix[agent_index], aws_region,
                    os.path.join(
                        ITERATION_DATA_LOCAL_FILE_PATH, agent_name,
                        IterationDataLocalFileNames.
                        CAMERA_45DEGREE_MP4_VALIDATION_LOCAL_PATH.value)),
                IterationData(
                    'topview', mp4_s3_bucket[agent_index],
                    mp4_s3_object_prefix[agent_index], aws_region,
                    os.path.join(
                        ITERATION_DATA_LOCAL_FILE_PATH, agent_name,
                        IterationDataLocalFileNames.
                        CAMERA_TOPVIEW_MP4_VALIDATION_LOCAL_PATH.value))
            ])

        s3_writers.append(S3Writer(job_info=s3_writer_job_info))
        run_phase_subject = RunPhaseSubject()
        agent_list.append(
            create_rollout_agent(agent_config,
                                 EvalMetrics(agent_name, metrics_s3_config),
                                 run_phase_subject))
    agent_list.append(create_obstacles_agent())
    agent_list.append(create_bot_cars_agent())
    # ROS service to indicate all the robomaker markov packages are ready for consumption
    signal_robomaker_markov_package_ready()

    PhaseObserver('/agent/training_phase', run_phase_subject)

    graph_manager, _ = get_graph_manager(hp_dict=sm_hyperparams_dict,
                                         agent_list=agent_list,
                                         run_phase_subject=run_phase_subject)

    ds_params_instance = S3BotoDataStoreParameters(
        aws_region=args.aws_region,
        bucket_names=s3_bucket_dict,
        base_checkpoint_dir=args.local_model_directory,
        s3_folders=s3_prefix_dict)

    graph_manager.data_store = S3BotoDataStore(params=ds_params_instance,
                                               graph_manager=graph_manager,
                                               ignore_lock=True)
    graph_manager.env_params.seed = 0

    task_parameters = TaskParameters()
    task_parameters.checkpoint_restore_path = args.local_model_directory

    tournament_worker(graph_manager=graph_manager,
                      number_of_trials=args.number_of_trials,
                      task_parameters=task_parameters,
                      s3_writers=s3_writers,
                      is_continuous=args.is_continuous)

    # tournament_worker: write race report to local file.
    write_race_report(graph_manager,
                      model_s3_bucket_map=s3_bucket_dict,
                      model_s3_prefix_map=s3_prefix_dict,
                      metrics_s3_bucket_map=metrics_s3_bucket_dict,
                      metrics_s3_key_map=metrics_s3_obect_key_dict,
                      simtrace_s3_bucket_map=simtrace_s3_bucket_dict,
                      simtrace_s3_prefix_map=simtrace_s3_prefix_dict,
                      mp4_s3_bucket_map=mp4_s3_bucket_dict,
                      mp4_s3_prefix_map=mp4_s3_object_prefix_dict,
                      display_names=display_names)

    # tournament_worker: terminate tournament_race_node.
    terminate_tournament_race()
Example #27
0
def validate(s3_bucket, s3_prefix, aws_region):
    screen.set_use_colors(False)
    screen.log_title(" S3 bucket: {} \n S3 prefix: {}".format(
        s3_bucket, s3_prefix))

    # download model metadata
    model_metadata = ModelMetadata(bucket=s3_bucket,
                                   s3_key=get_s3_key(
                                       s3_prefix, MODEL_METADATA_S3_POSTFIX),
                                   region_name=aws_region,
                                   local_path=MODEL_METADATA_LOCAL_PATH)

    # Create model local path
    os.makedirs(LOCAL_MODEL_DIR)

    try:
        # Handle backward compatibility
        model_metadata_info = model_metadata.get_model_metadata_info()
        observation_list = model_metadata_info[ModelMetadataKeys.SENSOR.value]
        version = model_metadata_info[ModelMetadataKeys.VERSION.value]
    except Exception as ex:
        log_and_exit("Failed to parse model_metadata file: {}".format(ex),
                     SIMAPP_VALIDATION_WORKER_EXCEPTION,
                     SIMAPP_EVENT_ERROR_CODE_400)

    # Below get_transition_data function must called before create_training_agent function
    # to avoid 500 in case unsupported Sensor is received.
    # create_training_agent will exit with 500 if unsupported sensor is received,
    # and get_transition_data function below will exit with 400 if unsupported sensor is received.
    # We want to return 400 in model validation case if unsupported sensor is received.
    # Thus, call this get_transition_data function before create_traning_agent function!
    transitions = get_transition_data(observation_list)

    checkpoint = Checkpoint(bucket=s3_bucket,
                            s3_prefix=s3_prefix,
                            region_name=args.aws_region,
                            agent_name='agent',
                            checkpoint_dir=LOCAL_MODEL_DIR)
    # make coach checkpoint compatible
    if version < SIMAPP_VERSION_2 and not checkpoint.rl_coach_checkpoint.is_compatible(
    ):
        checkpoint.rl_coach_checkpoint.make_compatible(
            checkpoint.syncfile_ready)
    # add checkpoint into checkpoint_dict
    checkpoint_dict = {'agent': checkpoint}

    agent_config = {
        'model_metadata': model_metadata,
        ConfigParams.CAR_CTRL_CONFIG.value: {
            ConfigParams.LINK_NAME_LIST.value: [],
            ConfigParams.VELOCITY_LIST.value: {},
            ConfigParams.STEERING_LIST.value: {},
            ConfigParams.CHANGE_START.value: None,
            ConfigParams.ALT_DIR.value: None,
            ConfigParams.MODEL_METADATA.value: model_metadata,
            ConfigParams.REWARD.value: None,
            ConfigParams.AGENT_NAME.value: 'racecar'
        }
    }

    agent_list = list()
    agent_list.append(create_training_agent(agent_config))

    sm_hyperparams_dict = {}
    graph_manager, _ = get_graph_manager(hp_dict=sm_hyperparams_dict,
                                         agent_list=agent_list,
                                         run_phase_subject=None)

    ds_params_instance = S3BotoDataStoreParameters(
        checkpoint_dict=checkpoint_dict)

    graph_manager.data_store = S3BotoDataStore(ds_params_instance,
                                               graph_manager,
                                               ignore_lock=True)

    task_parameters = TaskParameters()
    task_parameters.checkpoint_restore_path = LOCAL_MODEL_DIR
    _validate(graph_manager=graph_manager,
              task_parameters=task_parameters,
              transitions=transitions,
              s3_bucket=s3_bucket,
              s3_prefix=s3_prefix,
              aws_region=aws_region)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--markov-preset-file',
                        help="(string) Name of a preset file to run in Markov's preset directory.",
                        type=str,
                        default=os.environ.get("MARKOV_PRESET_FILE", "mars_presets.py"))
    parser.add_argument('-c', '--local_model_directory',
                        help='(string) Path to a folder containing a checkpoint to restore the model from.',
                        type=str,
                        default=os.environ.get("LOCAL_MODEL_DIRECTORY", "./checkpoint"))
    parser.add_argument('-n', '--num_workers',
                        help="(int) Number of workers for multi-process based agents, e.g. A3C",
                        default=1,
                        type=int)
    parser.add_argument('--model-s3-bucket',
                        help='(string) S3 bucket where trained models are stored. It contains model checkpoints.',
                        type=str,
                        default=os.environ.get("MODEL_S3_BUCKET"))
    parser.add_argument('--model-s3-prefix',
                        help='(string) S3 prefix where trained models are stored. It contains model checkpoints.',
                        type=str,
                        default=os.environ.get("MODEL_S3_PREFIX"))
    parser.add_argument('--aws-region',
                        help='(string) AWS region',
                        type=str,
                        default=os.environ.get("ROS_AWS_REGION", "us-west-1"))
    parser.add_argument('--checkpoint-save-secs',
                        help="(int) Time period in second between 2 checkpoints",
                        type=int,
                        default=600)
    parser.add_argument('--save-frozen-graph',
                        help="(bool) True if we need to store the frozen graph",
                        type=bool,
                        default=True)

    args = parser.parse_args()

    if args.markov_preset_file:
        markov_path = imp.find_module("markov")[1]
        preset_location = os.path.join(markov_path, "presets", args.markov_preset_file)
        path_and_module = preset_location + ":graph_manager"
        graph_manager = short_dynamic_import(path_and_module, ignore_module_case=True)

    else:
        raise ValueError("Unable to determine preset file")

    # TODO: support other frameworks
    task_parameters = TaskParameters(framework_type=Frameworks.tensorflow,
                                     checkpoint_save_secs=args.checkpoint_save_secs)
    task_parameters.__dict__['checkpoint_save_dir'] = args.local_model_directory
    task_parameters.__dict__ = add_items_to_dict(task_parameters.__dict__, args.__dict__)

#    data_store_params_instance = S3BotoDataStoreParameters(bucket_name=args.model_s3_bucket,
#                                                           s3_folder=args.model_s3_prefix,
#                                                           checkpoint_dir=args.local_model_directory,
#                                                           aws_region=args.aws_region)


    #data_store = S3BotoDataStore(data_store_params_instance)

    #if args.save_frozen_graph:
    #    data_store.graph_manager = graph_manager

    #graph_manager.data_store_params = data_store_params_instance
    #graph_manager.data_store = data_store
    graph_manager.should_stop = should_stop_training_based_on_evaluation
    start_graph(graph_manager=graph_manager, task_parameters=task_parameters)
def main():
    screen.set_use_colors(False)

    parser = argparse.ArgumentParser()
    parser.add_argument('-pk',
                        '--preset_s3_key',
                        help="(string) Name of a preset to download from S3",
                        type=str,
                        required=False)
    parser.add_argument(
        '-ek',
        '--environment_s3_key',
        help="(string) Name of an environment file to download from S3",
        type=str,
        required=False)
    parser.add_argument('--model_metadata_s3_key',
                        help="(string) Model Metadata File S3 Key",
                        type=str,
                        required=False)
    parser.add_argument(
        '-c',
        '--checkpoint_dir',
        help=
        '(string) Path to a folder containing a checkpoint to write the model to.',
        type=str,
        default='./checkpoint')
    parser.add_argument(
        '--pretrained_checkpoint_dir',
        help='(string) Path to a folder for downloading a pre-trained model',
        type=str,
        default=PRETRAINED_MODEL_DIR)
    parser.add_argument('--s3_bucket',
                        help='(string) S3 bucket',
                        type=str,
                        default=os.environ.get(
                            "SAGEMAKER_SHARED_S3_BUCKET_PATH", "gsaur-test"))
    parser.add_argument('--s3_prefix',
                        help='(string) S3 prefix',
                        type=str,
                        default='sagemaker')
    parser.add_argument('--framework',
                        help='(string) tensorflow or mxnet',
                        type=str,
                        default='tensorflow')
    parser.add_argument('--pretrained_s3_bucket',
                        help='(string) S3 bucket for pre-trained model',
                        type=str)
    parser.add_argument('--pretrained_s3_prefix',
                        help='(string) S3 prefix for pre-trained model',
                        type=str,
                        default='sagemaker')
    parser.add_argument('--aws_region',
                        help='(string) AWS region',
                        type=str,
                        default=os.environ.get("AWS_REGION", "us-east-1"))

    args, _ = parser.parse_known_args()

    s3_client = S3Client(region_name=args.aws_region, max_retry_attempts=0)

    # download model metadata
    # TODO: replace 'agent' with name of each agent
    model_metadata_download = ModelMetadata(
        bucket=args.s3_bucket,
        s3_key=args.model_metadata_s3_key,
        region_name=args.aws_region,
        local_path=MODEL_METADATA_LOCAL_PATH_FORMAT.format('agent'))
    model_metadata_info = model_metadata_download.get_model_metadata_info()
    network_type = model_metadata_info[ModelMetadataKeys.NEURAL_NETWORK.value]
    version = model_metadata_info[ModelMetadataKeys.VERSION.value]

    # upload model metadata
    model_metadata_upload = ModelMetadata(
        bucket=args.s3_bucket,
        s3_key=get_s3_key(args.s3_prefix, MODEL_METADATA_S3_POSTFIX),
        region_name=args.aws_region,
        local_path=MODEL_METADATA_LOCAL_PATH_FORMAT.format('agent'))
    model_metadata_upload.persist(
        s3_kms_extra_args=utils.get_s3_kms_extra_args())

    shutil.copy2(model_metadata_download.local_path, SM_MODEL_OUTPUT_DIR)

    success_custom_preset = False
    if args.preset_s3_key:
        preset_local_path = "./markov/presets/preset.py"
        try:
            s3_client.download_file(bucket=args.s3_bucket,
                                    s3_key=args.preset_s3_key,
                                    local_path=preset_local_path)
            success_custom_preset = True
        except botocore.exceptions.ClientError:
            pass
        if not success_custom_preset:
            logger.info(
                "Could not download the preset file. Using the default DeepRacer preset."
            )
        else:
            preset_location = "markov.presets.preset:graph_manager"
            graph_manager = short_dynamic_import(preset_location,
                                                 ignore_module_case=True)
            s3_client.upload_file(
                bucket=args.s3_bucket,
                s3_key=os.path.normpath("%s/presets/preset.py" %
                                        args.s3_prefix),
                local_path=preset_local_path,
                s3_kms_extra_args=utils.get_s3_kms_extra_args())
            if success_custom_preset:
                logger.info("Using preset: %s" % args.preset_s3_key)

    if not success_custom_preset:
        params_blob = os.environ.get('SM_TRAINING_ENV', '')
        if params_blob:
            params = json.loads(params_blob)
            sm_hyperparams_dict = params["hyperparameters"]
        else:
            sm_hyperparams_dict = {}

        #! TODO each agent should have own config
        agent_config = {
            'model_metadata': model_metadata_download,
            ConfigParams.CAR_CTRL_CONFIG.value: {
                ConfigParams.LINK_NAME_LIST.value: [],
                ConfigParams.VELOCITY_LIST.value: {},
                ConfigParams.STEERING_LIST.value: {},
                ConfigParams.CHANGE_START.value: None,
                ConfigParams.ALT_DIR.value: None,
                ConfigParams.MODEL_METADATA.value: model_metadata_download,
                ConfigParams.REWARD.value: None,
                ConfigParams.AGENT_NAME.value: 'racecar'
            }
        }

        agent_list = list()
        agent_list.append(create_training_agent(agent_config))

        graph_manager, robomaker_hyperparams_json = get_graph_manager(
            hp_dict=sm_hyperparams_dict,
            agent_list=agent_list,
            run_phase_subject=None,
            run_type=str(RunType.TRAINER))

        # Upload hyperparameters to SageMaker shared s3 bucket
        hyperparameters = Hyperparameters(bucket=args.s3_bucket,
                                          s3_key=get_s3_key(
                                              args.s3_prefix,
                                              HYPERPARAMETER_S3_POSTFIX),
                                          region_name=args.aws_region)
        hyperparameters.persist(
            hyperparams_json=robomaker_hyperparams_json,
            s3_kms_extra_args=utils.get_s3_kms_extra_args())

        # Attach sample collector to graph_manager only if sample count > 0
        max_sample_count = int(sm_hyperparams_dict.get("max_sample_count", 0))
        if max_sample_count > 0:
            sample_collector = SampleCollector(
                bucket=args.s3_bucket,
                s3_prefix=args.s3_prefix,
                region_name=args.aws_region,
                max_sample_count=max_sample_count,
                sampling_frequency=int(
                    sm_hyperparams_dict.get("sampling_frequency", 1)))
            graph_manager.sample_collector = sample_collector

    # persist IP config from sagemaker to s3
    ip_config = IpConfig(bucket=args.s3_bucket,
                         s3_prefix=args.s3_prefix,
                         region_name=args.aws_region)
    ip_config.persist(s3_kms_extra_args=utils.get_s3_kms_extra_args())

    training_algorithm = model_metadata_download.training_algorithm
    output_head_format = FROZEN_HEAD_OUTPUT_GRAPH_FORMAT_MAPPING[
        training_algorithm]

    use_pretrained_model = args.pretrained_s3_bucket and args.pretrained_s3_prefix
    # Handle backward compatibility
    if use_pretrained_model:
        # checkpoint s3 instance for pretrained model
        # TODO: replace 'agent' for multiagent training
        checkpoint = Checkpoint(bucket=args.pretrained_s3_bucket,
                                s3_prefix=args.pretrained_s3_prefix,
                                region_name=args.aws_region,
                                agent_name='agent',
                                checkpoint_dir=args.pretrained_checkpoint_dir,
                                output_head_format=output_head_format)
        # make coach checkpoint compatible
        if version < SIMAPP_VERSION_2 and not checkpoint.rl_coach_checkpoint.is_compatible(
        ):
            checkpoint.rl_coach_checkpoint.make_compatible(
                checkpoint.syncfile_ready)
        # get best model checkpoint string
        model_checkpoint_name = checkpoint.deepracer_checkpoint_json.get_deepracer_best_checkpoint(
        )
        # Select the best checkpoint model by uploading rl coach .coach_checkpoint file
        checkpoint.rl_coach_checkpoint.update(
            model_checkpoint_name=model_checkpoint_name,
            s3_kms_extra_args=utils.get_s3_kms_extra_args())
        # add checkpoint into checkpoint_dict
        checkpoint_dict = {'agent': checkpoint}
        # load pretrained model
        ds_params_instance_pretrained = S3BotoDataStoreParameters(
            checkpoint_dict=checkpoint_dict)
        data_store_pretrained = S3BotoDataStore(ds_params_instance_pretrained,
                                                graph_manager, True)
        data_store_pretrained.load_from_store()

    memory_backend_params = DeepRacerRedisPubSubMemoryBackendParameters(
        redis_address="localhost",
        redis_port=6379,
        run_type=str(RunType.TRAINER),
        channel=args.s3_prefix,
        network_type=network_type)

    graph_manager.memory_backend_params = memory_backend_params

    # checkpoint s3 instance for training model
    checkpoint = Checkpoint(bucket=args.s3_bucket,
                            s3_prefix=args.s3_prefix,
                            region_name=args.aws_region,
                            agent_name='agent',
                            checkpoint_dir=args.checkpoint_dir,
                            output_head_format=output_head_format)
    checkpoint_dict = {'agent': checkpoint}
    ds_params_instance = S3BotoDataStoreParameters(
        checkpoint_dict=checkpoint_dict)

    graph_manager.data_store_params = ds_params_instance

    graph_manager.data_store = S3BotoDataStore(ds_params_instance,
                                               graph_manager)

    task_parameters = TaskParameters()
    task_parameters.experiment_path = SM_MODEL_OUTPUT_DIR
    task_parameters.checkpoint_save_secs = 20
    if use_pretrained_model:
        task_parameters.checkpoint_restore_path = args.pretrained_checkpoint_dir
    task_parameters.checkpoint_save_dir = args.checkpoint_dir

    training_worker(
        graph_manager=graph_manager,
        task_parameters=task_parameters,
        user_batch_size=json.loads(robomaker_hyperparams_json)["batch_size"],
        user_episode_per_rollout=json.loads(
            robomaker_hyperparams_json)["num_episodes_between_training"],
        training_algorithm=training_algorithm)
def main():
    screen.set_use_colors(False)
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-c',
        '--checkpoint_dir',
        help=
        '(string) Path to a folder containing a checkpoint to restore the model from.',
        type=str,
        default='./checkpoint')
    parser.add_argument('--s3_bucket',
                        help='(string) S3 bucket',
                        type=str,
                        default=rospy.get_param("SAGEMAKER_SHARED_S3_BUCKET",
                                                "gsaur-test"))
    parser.add_argument('--s3_prefix',
                        help='(string) S3 prefix',
                        type=str,
                        default=rospy.get_param("SAGEMAKER_SHARED_S3_PREFIX",
                                                "sagemaker"))
    parser.add_argument(
        '--num_workers',
        help="(int) The number of workers started in this pool",
        type=int,
        default=int(rospy.get_param("NUM_WORKERS", 1)))
    parser.add_argument('--rollout_idx',
                        help="(int) The index of current rollout worker",
                        type=int,
                        default=0)
    parser.add_argument('-r',
                        '--redis_ip',
                        help="(string) IP or host for the redis server",
                        default='localhost',
                        type=str)
    parser.add_argument('-rp',
                        '--redis_port',
                        help="(int) Port of the redis server",
                        default=6379,
                        type=int)
    parser.add_argument('--aws_region',
                        help='(string) AWS region',
                        type=str,
                        default=rospy.get_param("AWS_REGION", "us-east-1"))
    parser.add_argument('--reward_file_s3_key',
                        help='(string) Reward File S3 Key',
                        type=str,
                        default=rospy.get_param("REWARD_FILE_S3_KEY", None))
    parser.add_argument('--model_metadata_s3_key',
                        help='(string) Model Metadata File S3 Key',
                        type=str,
                        default=rospy.get_param("MODEL_METADATA_FILE_S3_KEY",
                                                None))
    # For training job, reset is not allowed. penalty_seconds, off_track_penalty, and
    # collision_penalty will all be 0 be default
    parser.add_argument('--number_of_resets',
                        help='(integer) Number of resets',
                        type=int,
                        default=int(rospy.get_param("NUMBER_OF_RESETS", 0)))
    parser.add_argument('--penalty_seconds',
                        help='(float) penalty second',
                        type=float,
                        default=float(rospy.get_param("PENALTY_SECONDS", 0.0)))
    parser.add_argument('--job_type',
                        help='(string) job type',
                        type=str,
                        default=rospy.get_param("JOB_TYPE", "TRAINING"))
    parser.add_argument('--is_continuous',
                        help='(boolean) is continous after lap completion',
                        type=bool,
                        default=utils.str2bool(
                            rospy.get_param("IS_CONTINUOUS", False)))
    parser.add_argument('--race_type',
                        help='(string) Race type',
                        type=str,
                        default=rospy.get_param("RACE_TYPE", "TIME_TRIAL"))
    parser.add_argument('--off_track_penalty',
                        help='(float) off track penalty second',
                        type=float,
                        default=float(rospy.get_param("OFF_TRACK_PENALTY",
                                                      0.0)))
    parser.add_argument('--collision_penalty',
                        help='(float) collision penalty second',
                        type=float,
                        default=float(rospy.get_param("COLLISION_PENALTY",
                                                      0.0)))

    args = parser.parse_args()

    s3_client = SageS3Client(bucket=args.s3_bucket,
                             s3_prefix=args.s3_prefix,
                             aws_region=args.aws_region)
    logger.info("S3 bucket: %s", args.s3_bucket)
    logger.info("S3 prefix: %s", args.s3_prefix)

    # Load the model metadata
    model_metadata_local_path = os.path.join(CUSTOM_FILES_PATH,
                                             'model_metadata.json')
    utils.load_model_metadata(s3_client, args.model_metadata_s3_key,
                              model_metadata_local_path)

    # Download and import reward function
    if not args.reward_file_s3_key:
        log_and_exit(
            "Reward function code S3 key not available for S3 bucket {} and prefix {}"
            .format(args.s3_bucket, args.s3_prefix),
            SIMAPP_SIMULATION_WORKER_EXCEPTION, SIMAPP_EVENT_ERROR_CODE_500)
    download_customer_reward_function(s3_client, args.reward_file_s3_key)

    try:
        from custom_files.customer_reward_function import reward_function
    except Exception as e:
        log_and_exit("Failed to import user's reward_function: {}".format(e),
                     SIMAPP_SIMULATION_WORKER_EXCEPTION,
                     SIMAPP_EVENT_ERROR_CODE_400)

    # Instantiate Cameras
    configure_camera(namespaces=['racecar'])

    preset_file_success, _ = download_custom_files_if_present(
        s3_client, args.s3_prefix)

    #! TODO each agent should have own config
    _, _, version = utils_parse_model_metadata.parse_model_metadata(
        model_metadata_local_path)
    agent_config = {
        'model_metadata': model_metadata_local_path,
        ConfigParams.CAR_CTRL_CONFIG.value: {
            ConfigParams.LINK_NAME_LIST.value:
            LINK_NAMES,
            ConfigParams.VELOCITY_LIST.value:
            VELOCITY_TOPICS,
            ConfigParams.STEERING_LIST.value:
            STEERING_TOPICS,
            ConfigParams.CHANGE_START.value:
            utils.str2bool(rospy.get_param('CHANGE_START_POSITION', True)),
            ConfigParams.ALT_DIR.value:
            utils.str2bool(
                rospy.get_param('ALTERNATE_DRIVING_DIRECTION', False)),
            ConfigParams.ACTION_SPACE_PATH.value:
            'custom_files/model_metadata.json',
            ConfigParams.REWARD.value:
            reward_function,
            ConfigParams.AGENT_NAME.value:
            'racecar',
            ConfigParams.VERSION.value:
            version,
            ConfigParams.NUMBER_OF_RESETS.value:
            args.number_of_resets,
            ConfigParams.PENALTY_SECONDS.value:
            args.penalty_seconds,
            ConfigParams.NUMBER_OF_TRIALS.value:
            None,
            ConfigParams.IS_CONTINUOUS.value:
            args.is_continuous,
            ConfigParams.RACE_TYPE.value:
            args.race_type,
            ConfigParams.COLLISION_PENALTY.value:
            args.collision_penalty,
            ConfigParams.OFF_TRACK_PENALTY.value:
            args.off_track_penalty
        }
    }

    #! TODO each agent should have own s3 bucket
    step_metrics_prefix = rospy.get_param('SAGEMAKER_SHARED_S3_PREFIX')
    if args.num_workers > 1:
        step_metrics_prefix = os.path.join(step_metrics_prefix,
                                           str(args.rollout_idx))
    metrics_s3_config = {
        MetricsS3Keys.METRICS_BUCKET.value:
        rospy.get_param('METRICS_S3_BUCKET'),
        MetricsS3Keys.METRICS_KEY.value:
        rospy.get_param('METRICS_S3_OBJECT_KEY'),
        MetricsS3Keys.REGION.value: rospy.get_param('AWS_REGION')
    }
    metrics_s3_model_cfg = {
        MetricsS3Keys.METRICS_BUCKET.value:
        args.s3_bucket,
        MetricsS3Keys.METRICS_KEY.value:
        os.path.join(args.s3_prefix, DEEPRACER_CHKPNT_KEY_SUFFIX),
        MetricsS3Keys.REGION.value:
        args.aws_region
    }
    run_phase_subject = RunPhaseSubject()

    agent_list = list()
    agent_list.append(
        create_rollout_agent(
            agent_config,
            TrainingMetrics(agent_name='agent',
                            s3_dict_metrics=metrics_s3_config,
                            s3_dict_model=metrics_s3_model_cfg,
                            ckpnt_dir=args.checkpoint_dir,
                            run_phase_sink=run_phase_subject,
                            use_model_picker=(args.rollout_idx == 0)),
            run_phase_subject))
    agent_list.append(create_obstacles_agent())
    agent_list.append(create_bot_cars_agent())
    # ROS service to indicate all the robomaker markov packages are ready for consumption
    signal_robomaker_markov_package_ready()

    PhaseObserver('/agent/training_phase', run_phase_subject)

    aws_region = rospy.get_param('AWS_REGION', args.aws_region)
    simtrace_s3_bucket = rospy.get_param('SIMTRACE_S3_BUCKET', None)
    mp4_s3_bucket = rospy.get_param('MP4_S3_BUCKET',
                                    None) if args.rollout_idx == 0 else None
    if simtrace_s3_bucket:
        simtrace_s3_object_prefix = rospy.get_param('SIMTRACE_S3_PREFIX')
        if args.num_workers > 1:
            simtrace_s3_object_prefix = os.path.join(simtrace_s3_object_prefix,
                                                     str(args.rollout_idx))
    if mp4_s3_bucket:
        mp4_s3_object_prefix = rospy.get_param('MP4_S3_OBJECT_PREFIX')

    s3_writer_job_info = []
    if simtrace_s3_bucket:
        s3_writer_job_info.append(
            IterationData(
                'simtrace', simtrace_s3_bucket, simtrace_s3_object_prefix,
                aws_region,
                os.path.join(
                    ITERATION_DATA_LOCAL_FILE_PATH, 'agent',
                    IterationDataLocalFileNames.SIM_TRACE_TRAINING_LOCAL_FILE.
                    value)))
    if mp4_s3_bucket:
        s3_writer_job_info.extend([
            IterationData(
                'pip', mp4_s3_bucket, mp4_s3_object_prefix, aws_region,
                os.path.join(
                    ITERATION_DATA_LOCAL_FILE_PATH, 'agent',
                    IterationDataLocalFileNames.
                    CAMERA_PIP_MP4_VALIDATION_LOCAL_PATH.value)),
            IterationData(
                '45degree', mp4_s3_bucket, mp4_s3_object_prefix, aws_region,
                os.path.join(
                    ITERATION_DATA_LOCAL_FILE_PATH, 'agent',
                    IterationDataLocalFileNames.
                    CAMERA_45DEGREE_MP4_VALIDATION_LOCAL_PATH.value)),
            IterationData(
                'topview', mp4_s3_bucket, mp4_s3_object_prefix, aws_region,
                os.path.join(
                    ITERATION_DATA_LOCAL_FILE_PATH, 'agent',
                    IterationDataLocalFileNames.
                    CAMERA_TOPVIEW_MP4_VALIDATION_LOCAL_PATH.value))
        ])

    s3_writer = S3Writer(job_info=s3_writer_job_info)

    redis_ip = s3_client.get_ip()
    logger.info("Received IP from SageMaker successfully: %s", redis_ip)

    # Download hyperparameters from SageMaker
    hyperparameters_file_success = False
    hyperparams_s3_key = os.path.normpath(args.s3_prefix +
                                          "/ip/hyperparameters.json")
    hyperparameters_file_success = s3_client.download_file(
        s3_key=hyperparams_s3_key, local_path="hyperparameters.json")
    sm_hyperparams_dict = {}
    if hyperparameters_file_success:
        logger.info("Received Sagemaker hyperparameters successfully!")
        with open("hyperparameters.json") as filepointer:
            sm_hyperparams_dict = json.load(filepointer)
    else:
        logger.info("SageMaker hyperparameters not found.")

    enable_domain_randomization = utils.str2bool(
        rospy.get_param('ENABLE_DOMAIN_RANDOMIZATION', False))
    if preset_file_success:
        preset_location = os.path.join(CUSTOM_FILES_PATH, "preset.py")
        preset_location += ":graph_manager"
        graph_manager = short_dynamic_import(preset_location,
                                             ignore_module_case=True)
        logger.info("Using custom preset file!")
    else:
        graph_manager, _ = get_graph_manager(
            hp_dict=sm_hyperparams_dict,
            agent_list=agent_list,
            run_phase_subject=run_phase_subject,
            enable_domain_randomization=enable_domain_randomization)

    # If num_episodes_between_training is smaller than num_workers then cancel worker early.
    episode_steps_per_rollout = graph_manager.agent_params.algorithm.num_consecutive_playing_steps.num_steps
    # Reduce number of workers if allocated more than num_episodes_between_training
    if args.num_workers > episode_steps_per_rollout:
        logger.info(
            "Excess worker allocated. Reducing from {} to {}...".format(
                args.num_workers, episode_steps_per_rollout))
        args.num_workers = episode_steps_per_rollout
    if args.rollout_idx >= episode_steps_per_rollout or args.rollout_idx >= args.num_workers:
        err_msg_format = "Exiting excess worker..."
        err_msg_format += "(rollout_idx[{}] >= num_workers[{}] or num_episodes_between_training[{}])"
        logger.info(
            err_msg_format.format(args.rollout_idx, args.num_workers,
                                  episode_steps_per_rollout))
        # Close the down the job
        utils.cancel_simulation_job(
            os.environ.get('AWS_ROBOMAKER_SIMULATION_JOB_ARN'),
            rospy.get_param('AWS_REGION'))

    memory_backend_params = DeepRacerRedisPubSubMemoryBackendParameters(
        redis_address=redis_ip,
        redis_port=6379,
        run_type=str(RunType.ROLLOUT_WORKER),
        channel=args.s3_prefix,
        num_workers=args.num_workers,
        rollout_idx=args.rollout_idx)

    graph_manager.memory_backend_params = memory_backend_params

    ds_params_instance = S3BotoDataStoreParameters(
        aws_region=args.aws_region,
        bucket_names={'agent': args.s3_bucket},
        base_checkpoint_dir=args.checkpoint_dir,
        s3_folders={'agent': args.s3_prefix})

    graph_manager.data_store = S3BotoDataStore(ds_params_instance,
                                               graph_manager)

    task_parameters = TaskParameters()
    task_parameters.checkpoint_restore_path = args.checkpoint_dir

    rollout_worker(graph_manager=graph_manager,
                   num_workers=args.num_workers,
                   rollout_idx=args.rollout_idx,
                   task_parameters=task_parameters,
                   s3_writer=s3_writer)
def training_worker(graph_manager, checkpoint_dir, use_pretrained_model, framework):
    """
    restore a checkpoint then perform rollouts using the restored model
    """
    # initialize graph
    task_parameters = TaskParameters()
    task_parameters.__dict__['checkpoint_save_dir'] = checkpoint_dir
    task_parameters.__dict__['checkpoint_save_secs'] = 20
    task_parameters.__dict__['experiment_path'] = INTERMEDIATE_FOLDER

    if framework.lower() == "mxnet":
        task_parameters.framework_type = Frameworks.mxnet
        if hasattr(graph_manager, 'agent_params'):
            for network_parameters in graph_manager.agent_params.network_wrappers.values():
                network_parameters.framework = Frameworks.mxnet
        elif hasattr(graph_manager, 'agents_params'):
            for ap in graph_manager.agents_params:
                for network_parameters in ap.network_wrappers.values():
                    network_parameters.framework = Frameworks.mxnet

    if use_pretrained_model:
        task_parameters.__dict__['checkpoint_restore_dir'] = PRETRAINED_MODEL_DIR

    graph_manager.create_graph(task_parameters)

    # save randomly initialized graph
    graph_manager.save_checkpoint()

    # training loop
    steps = 0
    graph_manager.setup_memory_backend()

    # To handle SIGTERM
    door_man = DoorMan()

    try:
        while (steps < graph_manager.improve_steps.num_steps):
            graph_manager.phase = core_types.RunPhase.TRAIN
            graph_manager.fetch_from_worker(graph_manager.agent_params.algorithm.num_consecutive_playing_steps)
            graph_manager.phase = core_types.RunPhase.UNDEFINED

            if graph_manager.should_train():
                steps += graph_manager.agent_params.algorithm.num_consecutive_playing_steps.num_steps

                graph_manager.phase = core_types.RunPhase.TRAIN
                graph_manager.train()
                graph_manager.phase = core_types.RunPhase.UNDEFINED

                if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.SYNC:
                    graph_manager.save_checkpoint()
                else:
                    graph_manager.occasionally_save_checkpoint()

            if door_man.terminate_now:
                "Received SIGTERM. Checkpointing before exiting."
                graph_manager.save_checkpoint()
                break

    except Exception as e:
        raise RuntimeError("An error occured while training: %s" % e)
    finally:
        print("Terminating training worker")
        graph_manager.data_store.upload_finished_file()
Example #32
0
def main():
    screen.set_use_colors(False)

    logger.info("src/training_worker.py - INIZIO MAIN")

    parser = argparse.ArgumentParser()
    parser.add_argument('-pk',
                        '--preset_s3_key',
                        help="(string) Name of a preset to download from S3",
                        type=str,
                        required=False)
    parser.add_argument(
        '-ek',
        '--environment_s3_key',
        help="(string) Name of an environment file to download from S3",
        type=str,
        required=False)
    parser.add_argument('--model_metadata_s3_key',
                        help="(string) Model Metadata File S3 Key",
                        type=str,
                        required=False)
    parser.add_argument(
        '-c',
        '--checkpoint-dir',
        help=
        '(string) Path to a folder containing a checkpoint to write the model to.',
        type=str,
        default='./checkpoint')
    parser.add_argument(
        '--pretrained-checkpoint-dir',
        help='(string) Path to a folder for downloading a pre-trained model',
        type=str,
        default=PRETRAINED_MODEL_DIR)
    parser.add_argument('--s3_bucket',
                        help='(string) S3 bucket',
                        type=str,
                        default=os.environ.get(
                            "SAGEMAKER_SHARED_S3_BUCKET_PATH", "gsaur-test"))
    parser.add_argument('--s3_prefix',
                        help='(string) S3 prefix',
                        type=str,
                        default='sagemaker')
    parser.add_argument('--s3_endpoint_url',
                        help='(string) S3 endpoint URL',
                        type=str,
                        default=os.environ.get("S3_ENDPOINT_URL", None))
    parser.add_argument('--framework',
                        help='(string) tensorflow or mxnet',
                        type=str,
                        default='tensorflow')
    parser.add_argument('--pretrained_s3_bucket',
                        help='(string) S3 bucket for pre-trained model',
                        type=str)
    parser.add_argument('--pretrained_s3_prefix',
                        help='(string) S3 prefix for pre-trained model',
                        type=str,
                        default='sagemaker')
    parser.add_argument('--aws_region',
                        help='(string) AWS region',
                        type=str,
                        default=os.environ.get("AWS_REGION", "us-east-1"))

    args, _ = parser.parse_known_args()
    logger.info("S3 bucket: %s \n S3 prefix: %s \n S3 endpoint URL: %s",
                args.s3_bucket, args.s3_prefix, args.s3_endpoint_url)

    s3_client = SageS3Client(bucket=args.s3_bucket,
                             s3_prefix=args.s3_prefix,
                             aws_region=args.aws_region,
                             s3_endpoint_url=args.s3_endpoint_url)

    # Load the model metadata
    model_metadata_local_path = os.path.join(CUSTOM_FILES_PATH,
                                             'model_metadata.json')
    utils.load_model_metadata(s3_client, args.model_metadata_s3_key,
                              model_metadata_local_path)
    s3_client.upload_file(
        os.path.normpath("%s/model/model_metadata.json" % args.s3_prefix),
        model_metadata_local_path)
    shutil.copy2(model_metadata_local_path, SM_MODEL_OUTPUT_DIR)

    success_custom_preset = False
    if args.preset_s3_key:
        preset_local_path = "./markov/presets/preset.py"
        success_custom_preset = s3_client.download_file(
            s3_key=args.preset_s3_key, local_path=preset_local_path)
        if not success_custom_preset:
            logger.info(
                "Could not download the preset file. Using the default DeepRacer preset."
            )
        else:
            preset_location = "markov.presets.preset:graph_manager"
            graph_manager = short_dynamic_import(preset_location,
                                                 ignore_module_case=True)
            success_custom_preset = s3_client.upload_file(
                s3_key=os.path.normpath("%s/presets/preset.py" %
                                        args.s3_prefix),
                local_path=preset_local_path)
            if success_custom_preset:
                logger.info("Using preset: %s" % args.preset_s3_key)

    if not success_custom_preset:
        params_blob = os.environ.get('SM_TRAINING_ENV', '')
        if params_blob:
            params = json.loads(params_blob)
            sm_hyperparams_dict = params["hyperparameters"]
        else:
            sm_hyperparams_dict = {}

        #configurazione agente: metadati del modello impostati dall'utente (angolo di sterzo + velocità) + nome

        #! TODO each agent should have own config
        agent_config = {
            'model_metadata': model_metadata_local_path,
            ConfigParams.CAR_CTRL_CONFIG.value: {
                ConfigParams.LINK_NAME_LIST.value: [],
                ConfigParams.VELOCITY_LIST.value: {},
                ConfigParams.STEERING_LIST.value: {},
                ConfigParams.CHANGE_START.value: None,
                ConfigParams.ALT_DIR.value: None,
                ConfigParams.ACTION_SPACE_PATH.value:
                'custom_files/model_metadata.json',
                ConfigParams.REWARD.value: None,
                ConfigParams.AGENT_NAME.value: 'racecar'
            }
        }

        agent_list = list()
        agent_list.append(create_training_agent(agent_config))

        logger.info(
            "src/training_worker.py - ora chiamo la get_graph_manager, che recupera l'agente"
        )

        graph_manager, robomaker_hyperparams_json = get_graph_manager(
            hp_dict=sm_hyperparams_dict,
            agent_list=agent_list,
            run_phase_subject=None)

        logger.info("src/training_worker.py - ho l'agente")

        s3_client.upload_hyperparameters(robomaker_hyperparams_json)
        logger.info("Uploaded hyperparameters.json to S3")

        # Attach sample collector to graph_manager only if sample count > 0
        max_sample_count = int(sm_hyperparams_dict.get("max_sample_count", 0))
        if max_sample_count > 0:
            sample_collector = SampleCollector(
                s3_client=s3_client,
                s3_prefix=args.s3_prefix,
                max_sample_count=max_sample_count,
                sampling_frequency=int(
                    sm_hyperparams_dict.get("sampling_frequency", 1)))
            graph_manager.sample_collector = sample_collector

    host_ip_address = utils.get_ip_from_host()
    s3_client.write_ip_config(host_ip_address)
    logger.info("Uploaded IP address information to S3: %s" % host_ip_address)
    use_pretrained_model = args.pretrained_s3_bucket and args.pretrained_s3_prefix
    # Handle backward compatibility
    _, network_type, version = parse_model_metadata(model_metadata_local_path)
    if use_pretrained_model:
        if float(version) < float(SIMAPP_VERSION) and \
        not utils.has_current_ckpnt_name(args.pretrained_s3_bucket, args.pretrained_s3_prefix, args.aws_region, args.s3_endpoint_url):
            utils.make_compatible(args.pretrained_s3_bucket,
                                  args.pretrained_s3_prefix, args.aws_region,
                                  SyncFiles.TRAINER_READY.value)
        #Select the optimal model for the starting weights
        utils.do_model_selection(s3_bucket=args.s3_bucket,
                                 s3_prefix=args.s3_prefix,
                                 region=args.aws_region,
                                 s3_endpoint_url=args.s3_endpoint_url)

        ds_params_instance_pretrained = S3BotoDataStoreParameters(
            aws_region=args.aws_region,
            bucket_names={'agent': args.pretrained_s3_bucket},
            base_checkpoint_dir=args.pretrained_checkpoint_dir,
            s3_folders={'agent': args.pretrained_s3_prefix},
            s3_endpoint_url=args.s3_endpoint_url)
        data_store_pretrained = S3BotoDataStore(ds_params_instance_pretrained,
                                                graph_manager, True)
        data_store_pretrained.load_from_store()

    memory_backend_params = DeepRacerRedisPubSubMemoryBackendParameters(
        redis_address="localhost",
        redis_port=6379,
        run_type=str(RunType.TRAINER),
        channel=args.s3_prefix,
        network_type=network_type)

    graph_manager.memory_backend_params = memory_backend_params

    ds_params_instance = S3BotoDataStoreParameters(
        aws_region=args.aws_region,
        bucket_names={'agent': args.s3_bucket},
        base_checkpoint_dir=args.checkpoint_dir,
        s3_folders={'agent': args.s3_prefix},
        s3_endpoint_url=args.s3_endpoint_url)

    graph_manager.data_store_params = ds_params_instance

    graph_manager.data_store = S3BotoDataStore(ds_params_instance,
                                               graph_manager)

    task_parameters = TaskParameters()
    task_parameters.experiment_path = SM_MODEL_OUTPUT_DIR
    task_parameters.checkpoint_save_secs = 20
    if use_pretrained_model:
        task_parameters.checkpoint_restore_path = args.pretrained_checkpoint_dir
    task_parameters.checkpoint_save_dir = args.checkpoint_dir

    #funzione riga 48
    #prende in input:
    #       - il grafo (creato con la get_graph_manager)
    #       - robomaker_hyperparams_json (ritornato dalla get_graph_manager)

    training_worker(
        graph_manager=graph_manager,
        task_parameters=task_parameters,
        user_batch_size=json.loads(robomaker_hyperparams_json)["batch_size"],
        user_episode_per_rollout=json.loads(
            robomaker_hyperparams_json)["num_episodes_between_training"])