def test_parse_model_metadata_exception():
    """This function tests the functionality of parse_model_metadata function
    in utils_parse_model_metadata.py when an exception occurs
    """
    with pytest.raises(Exception, match=r".*Model metadata does not exist:.*"):
        sensor, network, simapp_version = parse_model_metadata(
            "dummy_file.json")
def test_parse_model_metadata(create_model_metadata):
    """This function tests the functionality of parse_model_metadata function
    in utils_parse_model_metadata.py when we pass a model metadata file with
    sensor and neural network information

    Args:
        create_model_metadata (String): Gives the path for model metadata file for testing
    """
    sensor, network, simapp_version = parse_model_metadata(
        create_model_metadata)
    os.remove(create_model_metadata)
    assert sensor == ["STEREO_CAMERAS"]
    assert network == "DEEP_CONVOLUTIONAL_NETWORK_SHALLOW"
    assert simapp_version == SIMAPP_VERSION
def test_parse_model_metadata_only_action(create_model_metadata_action_space):
    """This function tests the functionality of parse_model_metadata function
    in utils_parse_model_metadata.py when we pass a model metadata file with only
    action space and no sensor or neural network information.

    Args:
        create_model_metadata_action_space (String): Gives the path for model metadata file for testing
    """
    sensor, network, simapp_version = parse_model_metadata(
        create_model_metadata_action_space)
    os.remove(create_model_metadata_action_space)
    assert sensor == [Input.OBSERVATION.value]
    assert network == NeuralNetwork.DEEP_CONVOLUTIONAL_NETWORK_SHALLOW.value
    assert simapp_version == "1.0"
Beispiel #4
0
def create_training_agent(agent_config):
    '''Returns an training agent object
       agent_config - Dictionary containing the key specified in ConfigParams
    '''
    model_metadata = agent_config['model_metadata']
    observation_list, network, _ = utils_parse_model_metadata.parse_model_metadata(model_metadata)
    sensor = construct_sensor(observation_list, SensorFactory)
    network_settings = get_network_settings(sensor, network)
    sensor = construct_sensor(observation_list, SensorFactory)

    ctrl_config = agent_config['car_ctrl_cnfig']
    ctrl = TrainingCtrl(ctrl_config[ConfigParams.AGENT_NAME.value],
                        ctrl_config[ConfigParams.ACTION_SPACE_PATH.value])

    return Agent(network_settings, sensor, ctrl, None)
Beispiel #5
0
def create_rollout_agent(agent_config, metrics):
    '''Returns an rollout agent object
       agent_config - Dictionary containing the key specified in ConfigParams
       metrics - Metrics object for the agent
    '''
    model_metadata = agent_config['model_metadata']
    observation_list, network, _ = utils_parse_model_metadata.parse_model_metadata(
        model_metadata)
    sensor = construct_sensor(observation_list, SensorFactory)
    network_settings = get_network_settings(sensor, network)
    frustum = Frustum.get_instance()
    frustum.add_cameras(observation_list)

    ctrl_config = agent_config['car_ctrl_cnfig']
    ctrl = RolloutCtrl(ctrl_config)

    return Agent(network_settings, sensor, ctrl, metrics)
Beispiel #6
0
def create_rollout_agent(agent_config, metrics, run_phase_subject):
    '''Returns an rollout agent object
       agent_config - Dictionary containing the key specified in ConfigParams
       metrics - Metrics object for the agent
       run_phase_subject - Subject that notifies observers when the run phase changes
    '''
    model_metadata = agent_config['model_metadata']
    observation_list, network, _ = utils_parse_model_metadata.parse_model_metadata(
        model_metadata)
    agent_name = agent_config[ConfigParams.CAR_CTRL_CONFIG.value][
        ConfigParams.AGENT_NAME.value]
    sensor = construct_sensor(agent_name, observation_list, SensorFactory)
    network_settings = get_network_settings(sensor, network)
    FrustumManager.get_instance().add(agent_name=agent_name,
                                      observation_list=observation_list)

    ctrl_config = agent_config[ConfigParams.CAR_CTRL_CONFIG.value]
    ctrl = RolloutCtrl(ctrl_config, run_phase_subject, metrics)

    return Agent(network_settings, sensor, ctrl)
Beispiel #7
0
def main():
    screen.set_use_colors(False)

    logger.info("src/training_worker.py - INIZIO MAIN")

    parser = argparse.ArgumentParser()
    parser.add_argument('-pk',
                        '--preset_s3_key',
                        help="(string) Name of a preset to download from S3",
                        type=str,
                        required=False)
    parser.add_argument(
        '-ek',
        '--environment_s3_key',
        help="(string) Name of an environment file to download from S3",
        type=str,
        required=False)
    parser.add_argument('--model_metadata_s3_key',
                        help="(string) Model Metadata File S3 Key",
                        type=str,
                        required=False)
    parser.add_argument(
        '-c',
        '--checkpoint-dir',
        help=
        '(string) Path to a folder containing a checkpoint to write the model to.',
        type=str,
        default='./checkpoint')
    parser.add_argument(
        '--pretrained-checkpoint-dir',
        help='(string) Path to a folder for downloading a pre-trained model',
        type=str,
        default=PRETRAINED_MODEL_DIR)
    parser.add_argument('--s3_bucket',
                        help='(string) S3 bucket',
                        type=str,
                        default=os.environ.get(
                            "SAGEMAKER_SHARED_S3_BUCKET_PATH", "gsaur-test"))
    parser.add_argument('--s3_prefix',
                        help='(string) S3 prefix',
                        type=str,
                        default='sagemaker')
    parser.add_argument('--s3_endpoint_url',
                        help='(string) S3 endpoint URL',
                        type=str,
                        default=os.environ.get("S3_ENDPOINT_URL", None))
    parser.add_argument('--framework',
                        help='(string) tensorflow or mxnet',
                        type=str,
                        default='tensorflow')
    parser.add_argument('--pretrained_s3_bucket',
                        help='(string) S3 bucket for pre-trained model',
                        type=str)
    parser.add_argument('--pretrained_s3_prefix',
                        help='(string) S3 prefix for pre-trained model',
                        type=str,
                        default='sagemaker')
    parser.add_argument('--aws_region',
                        help='(string) AWS region',
                        type=str,
                        default=os.environ.get("AWS_REGION", "us-east-1"))

    args, _ = parser.parse_known_args()
    logger.info("S3 bucket: %s \n S3 prefix: %s \n S3 endpoint URL: %s",
                args.s3_bucket, args.s3_prefix, args.s3_endpoint_url)

    s3_client = SageS3Client(bucket=args.s3_bucket,
                             s3_prefix=args.s3_prefix,
                             aws_region=args.aws_region,
                             s3_endpoint_url=args.s3_endpoint_url)

    # Load the model metadata
    model_metadata_local_path = os.path.join(CUSTOM_FILES_PATH,
                                             'model_metadata.json')
    utils.load_model_metadata(s3_client, args.model_metadata_s3_key,
                              model_metadata_local_path)
    s3_client.upload_file(
        os.path.normpath("%s/model/model_metadata.json" % args.s3_prefix),
        model_metadata_local_path)
    shutil.copy2(model_metadata_local_path, SM_MODEL_OUTPUT_DIR)

    success_custom_preset = False
    if args.preset_s3_key:
        preset_local_path = "./markov/presets/preset.py"
        success_custom_preset = s3_client.download_file(
            s3_key=args.preset_s3_key, local_path=preset_local_path)
        if not success_custom_preset:
            logger.info(
                "Could not download the preset file. Using the default DeepRacer preset."
            )
        else:
            preset_location = "markov.presets.preset:graph_manager"
            graph_manager = short_dynamic_import(preset_location,
                                                 ignore_module_case=True)
            success_custom_preset = s3_client.upload_file(
                s3_key=os.path.normpath("%s/presets/preset.py" %
                                        args.s3_prefix),
                local_path=preset_local_path)
            if success_custom_preset:
                logger.info("Using preset: %s" % args.preset_s3_key)

    if not success_custom_preset:
        params_blob = os.environ.get('SM_TRAINING_ENV', '')
        if params_blob:
            params = json.loads(params_blob)
            sm_hyperparams_dict = params["hyperparameters"]
        else:
            sm_hyperparams_dict = {}

        #configurazione agente: metadati del modello impostati dall'utente (angolo di sterzo + velocità) + nome

        #! TODO each agent should have own config
        agent_config = {
            'model_metadata': model_metadata_local_path,
            ConfigParams.CAR_CTRL_CONFIG.value: {
                ConfigParams.LINK_NAME_LIST.value: [],
                ConfigParams.VELOCITY_LIST.value: {},
                ConfigParams.STEERING_LIST.value: {},
                ConfigParams.CHANGE_START.value: None,
                ConfigParams.ALT_DIR.value: None,
                ConfigParams.ACTION_SPACE_PATH.value:
                'custom_files/model_metadata.json',
                ConfigParams.REWARD.value: None,
                ConfigParams.AGENT_NAME.value: 'racecar'
            }
        }

        agent_list = list()
        agent_list.append(create_training_agent(agent_config))

        logger.info(
            "src/training_worker.py - ora chiamo la get_graph_manager, che recupera l'agente"
        )

        graph_manager, robomaker_hyperparams_json = get_graph_manager(
            hp_dict=sm_hyperparams_dict,
            agent_list=agent_list,
            run_phase_subject=None)

        logger.info("src/training_worker.py - ho l'agente")

        s3_client.upload_hyperparameters(robomaker_hyperparams_json)
        logger.info("Uploaded hyperparameters.json to S3")

        # Attach sample collector to graph_manager only if sample count > 0
        max_sample_count = int(sm_hyperparams_dict.get("max_sample_count", 0))
        if max_sample_count > 0:
            sample_collector = SampleCollector(
                s3_client=s3_client,
                s3_prefix=args.s3_prefix,
                max_sample_count=max_sample_count,
                sampling_frequency=int(
                    sm_hyperparams_dict.get("sampling_frequency", 1)))
            graph_manager.sample_collector = sample_collector

    host_ip_address = utils.get_ip_from_host()
    s3_client.write_ip_config(host_ip_address)
    logger.info("Uploaded IP address information to S3: %s" % host_ip_address)
    use_pretrained_model = args.pretrained_s3_bucket and args.pretrained_s3_prefix
    # Handle backward compatibility
    _, network_type, version = parse_model_metadata(model_metadata_local_path)
    if use_pretrained_model:
        if float(version) < float(SIMAPP_VERSION) and \
        not utils.has_current_ckpnt_name(args.pretrained_s3_bucket, args.pretrained_s3_prefix, args.aws_region, args.s3_endpoint_url):
            utils.make_compatible(args.pretrained_s3_bucket,
                                  args.pretrained_s3_prefix, args.aws_region,
                                  SyncFiles.TRAINER_READY.value)
        #Select the optimal model for the starting weights
        utils.do_model_selection(s3_bucket=args.s3_bucket,
                                 s3_prefix=args.s3_prefix,
                                 region=args.aws_region,
                                 s3_endpoint_url=args.s3_endpoint_url)

        ds_params_instance_pretrained = S3BotoDataStoreParameters(
            aws_region=args.aws_region,
            bucket_names={'agent': args.pretrained_s3_bucket},
            base_checkpoint_dir=args.pretrained_checkpoint_dir,
            s3_folders={'agent': args.pretrained_s3_prefix},
            s3_endpoint_url=args.s3_endpoint_url)
        data_store_pretrained = S3BotoDataStore(ds_params_instance_pretrained,
                                                graph_manager, True)
        data_store_pretrained.load_from_store()

    memory_backend_params = DeepRacerRedisPubSubMemoryBackendParameters(
        redis_address="localhost",
        redis_port=6379,
        run_type=str(RunType.TRAINER),
        channel=args.s3_prefix,
        network_type=network_type)

    graph_manager.memory_backend_params = memory_backend_params

    ds_params_instance = S3BotoDataStoreParameters(
        aws_region=args.aws_region,
        bucket_names={'agent': args.s3_bucket},
        base_checkpoint_dir=args.checkpoint_dir,
        s3_folders={'agent': args.s3_prefix},
        s3_endpoint_url=args.s3_endpoint_url)

    graph_manager.data_store_params = ds_params_instance

    graph_manager.data_store = S3BotoDataStore(ds_params_instance,
                                               graph_manager)

    task_parameters = TaskParameters()
    task_parameters.experiment_path = SM_MODEL_OUTPUT_DIR
    task_parameters.checkpoint_save_secs = 20
    if use_pretrained_model:
        task_parameters.checkpoint_restore_path = args.pretrained_checkpoint_dir
    task_parameters.checkpoint_save_dir = args.checkpoint_dir

    #funzione riga 48
    #prende in input:
    #       - il grafo (creato con la get_graph_manager)
    #       - robomaker_hyperparams_json (ritornato dalla get_graph_manager)

    training_worker(
        graph_manager=graph_manager,
        task_parameters=task_parameters,
        user_batch_size=json.loads(robomaker_hyperparams_json)["batch_size"],
        user_episode_per_rollout=json.loads(
            robomaker_hyperparams_json)["num_episodes_between_training"])
def main():
    screen.set_use_colors(False)

    parser = argparse.ArgumentParser()
    parser.add_argument('-pk', '--preset_s3_key',
                        help="(string) Name of a preset to download from S3",
                        type=str,
                        required=False)
    parser.add_argument('-ek', '--environment_s3_key',
                        help="(string) Name of an environment file to download from S3",
                        type=str,
                        required=False)
    parser.add_argument('--model_metadata_s3_key',
                        help="(string) Model Metadata File S3 Key",
                        type=str,
                        required=False)
    parser.add_argument('-c', '--checkpoint-dir',
                        help='(string) Path to a folder containing a checkpoint to write the model to.',
                        type=str,
                        default='./checkpoint')
    parser.add_argument('--pretrained-checkpoint-dir',
                        help='(string) Path to a folder for downloading a pre-trained model',
                        type=str,
                        default=PRETRAINED_MODEL_DIR)
    parser.add_argument('--s3_bucket',
                        help='(string) S3 bucket',
                        type=str,
                        default=os.environ.get("SAGEMAKER_SHARED_S3_BUCKET_PATH", "gsaur-test"))
    parser.add_argument('--s3_prefix',
                        help='(string) S3 prefix',
                        type=str,
                        default='sagemaker')
    parser.add_argument('--framework',
                        help='(string) tensorflow or mxnet',
                        type=str,
                        default='tensorflow')
    parser.add_argument('--pretrained_s3_bucket',
                        help='(string) S3 bucket for pre-trained model',
                        type=str)
    parser.add_argument('--pretrained_s3_prefix',
                        help='(string) S3 prefix for pre-trained model',
                        type=str,
                        default='sagemaker')
    parser.add_argument('--aws_region',
                        help='(string) AWS region',
                        type=str,
                        default=os.environ.get("AWS_REGION", "us-east-1"))

    start_redis_server()

    args, _ = parser.parse_known_args()

    s3_client = SageS3Client(bucket=args.s3_bucket, s3_prefix=args.s3_prefix, aws_region=args.aws_region)

    # Load the model metadata
    model_metadata_local_path = os.path.join(CUSTOM_FILES_PATH, 'model_metadata.json')
    utils.load_model_metadata(s3_client, args.model_metadata_s3_key, model_metadata_local_path)
    s3_client.upload_file(os.path.normpath("%s/model/model_metadata.json" % args.s3_prefix), model_metadata_local_path)
    shutil.copy2(model_metadata_local_path, SM_MODEL_OUTPUT_DIR)

    success_custom_preset = False
    if args.preset_s3_key:
        preset_local_path = "./markov/presets/preset.py"
        success_custom_preset = s3_client.download_file(s3_key=args.preset_s3_key, local_path=preset_local_path)
        if not success_custom_preset:
            logger.info("Could not download the preset file. Using the default DeepRacer preset.")
        else:
            preset_location = "markov.presets.preset:graph_manager"
            graph_manager = short_dynamic_import(preset_location, ignore_module_case=True)
            success_custom_preset = s3_client.upload_file(
                s3_key=os.path.normpath("%s/presets/preset.py" % args.s3_prefix), local_path=preset_local_path)
            if success_custom_preset:
                logger.info("Using preset: %s" % args.preset_s3_key)

    if not success_custom_preset:
        params_blob = os.environ.get('SM_TRAINING_ENV', '')
        if params_blob:
            params = json.loads(params_blob)
            sm_hyperparams_dict = params["hyperparameters"]
        else:
            sm_hyperparams_dict = {}

        #! TODO each agent should have own config
        agent_config = {'model_metadata': model_metadata_local_path,
                        'car_ctrl_cnfig': {ConfigParams.LINK_NAME_LIST.value: [],
                                           ConfigParams.VELOCITY_LIST.value : {},
                                           ConfigParams.STEERING_LIST.value : {},
                                           ConfigParams.CHANGE_START.value : None,
                                           ConfigParams.ALT_DIR.value : None,
                                           ConfigParams.ACTION_SPACE_PATH.value : 'custom_files/model_metadata.json',
                                           ConfigParams.REWARD.value : None,
                                           ConfigParams.AGENT_NAME.value : 'racecar'}}

        agent_list = list()
        agent_list.append(create_training_agent(agent_config))
        #agent_list.append(create_training_agent(agent_config))

        graph_manager, robomaker_hyperparams_json = get_graph_manager(sm_hyperparams_dict, agent_list)

        s3_client.upload_hyperparameters(robomaker_hyperparams_json)
        logger.info("Uploaded hyperparameters.json to S3")

    host_ip_address = utils.get_ip_from_host()
    s3_client.write_ip_config(host_ip_address)
    logger.info("Uploaded IP address information to S3: %s" % host_ip_address)
    use_pretrained_model = args.pretrained_s3_bucket and args.pretrained_s3_prefix
    if use_pretrained_model:
        # Handle backward compatibility
        _, _, version = parse_model_metadata(model_metadata_local_path)
        if float(version) < float(utils.SIMAPP_VERSION) and \
        not utils.has_current_ckpnt_name(args.pretrained_s3_bucket, args.pretrained_s3_prefix, args.aws_region):
            utils.make_compatible(args.pretrained_s3_bucket, args.pretrained_s3_prefix,
                                  args.aws_region, SyncFiles.TRAINER_READY.value)

        ds_params_instance_pretrained = S3BotoDataStoreParameters(aws_region=args.aws_region,
                                                                  bucket_name=args.pretrained_s3_bucket,
                                                                  checkpoint_dir=args.pretrained_checkpoint_dir,
                                                                  s3_folder=args.pretrained_s3_prefix)
        data_store_pretrained = S3BotoDataStore(ds_params_instance_pretrained)
        data_store_pretrained.load_from_store()

    memory_backend_params = RedisPubSubMemoryBackendParameters(redis_address="localhost",
                                                               redis_port=6379,
                                                               run_type=str(RunType.TRAINER),
                                                               channel=args.s3_prefix)

    graph_manager.memory_backend_params = memory_backend_params

    ds_params_instance = S3BotoDataStoreParameters(aws_region=args.aws_region,
                                                   bucket_name=args.s3_bucket,
                                                   checkpoint_dir=args.checkpoint_dir,
                                                   s3_folder=args.s3_prefix)
    graph_manager.data_store_params = ds_params_instance

    data_store = S3BotoDataStore(ds_params_instance)
    data_store.graph_manager = graph_manager
    graph_manager.data_store = data_store

    task_parameters = TaskParameters()
    task_parameters.experiment_path = SM_MODEL_OUTPUT_DIR
    task_parameters.checkpoint_save_secs = 20
    if use_pretrained_model:
        task_parameters.checkpoint_restore_path = args.pretrained_checkpoint_dir
    task_parameters.checkpoint_save_dir = args.checkpoint_dir

    training_worker(
        graph_manager=graph_manager,
        task_parameters=task_parameters,
        user_batch_size=json.loads(robomaker_hyperparams_json)["batch_size"],
        user_episode_per_rollout=json.loads(robomaker_hyperparams_json)["num_episodes_between_training"]
    )
Beispiel #9
0
def validate(s3_bucket, s3_prefix, custom_files_path, aws_region):
    screen.set_use_colors(False)
    logger.info("S3 bucket: %s \n S3 prefix: %s", s3_bucket, s3_prefix)

    if not os.path.exists(custom_files_path):
        os.makedirs(custom_files_path)
    else:
        GenericValidatorException(
            "Custom Files Path already exists!").log_except_and_exit()

    s3_client = SageS3Client(bucket=s3_bucket,
                             s3_prefix=s3_prefix,
                             aws_region=aws_region)
    # Load the model metadata
    model_metadata_local_path = os.path.join(custom_files_path,
                                             'model_metadata.json')
    utils.load_model_metadata(
        s3_client,
        os.path.normpath("%s/model/model_metadata.json" % s3_prefix),
        model_metadata_local_path)

    # Create model local path
    local_model_dir = os.path.join(custom_files_path, 'checkpoint')
    os.makedirs(local_model_dir)

    try:
        # Handle backward compatibility
        observation_list, _, version = parse_model_metadata(
            model_metadata_local_path)
    except Exception as ex:
        log_and_exit("Failed to parse model_metadata file: {}".format(ex),
                     SIMAPP_VALIDATION_WORKER_EXCEPTION,
                     SIMAPP_EVENT_ERROR_CODE_400)

    transitions = get_transition_data(observation_list)

    if float(version) < float(SIMAPP_VERSION) and \
            not utils.has_current_ckpnt_name(s3_bucket, s3_prefix, aws_region):
        utils.make_compatible(s3_bucket, s3_prefix, aws_region,
                              SyncFiles.TRAINER_READY.value)

    agent_config = {
        'model_metadata': model_metadata_local_path,
        ConfigParams.CAR_CTRL_CONFIG.value: {
            ConfigParams.LINK_NAME_LIST.value: [],
            ConfigParams.VELOCITY_LIST.value: {},
            ConfigParams.STEERING_LIST.value: {},
            ConfigParams.CHANGE_START.value: None,
            ConfigParams.ALT_DIR.value: None,
            ConfigParams.ACTION_SPACE_PATH.value: model_metadata_local_path,
            ConfigParams.REWARD.value: None,
            ConfigParams.AGENT_NAME.value: 'racecar'
        }
    }

    agent_list = list()
    agent_list.append(create_training_agent(agent_config))

    sm_hyperparams_dict = {}
    graph_manager, _ = get_graph_manager(hp_dict=sm_hyperparams_dict,
                                         agent_list=agent_list,
                                         run_phase_subject=None)

    ds_params_instance = S3BotoDataStoreParameters(
        aws_region=aws_region,
        bucket_names={'agent': s3_bucket},
        s3_folders={'agent': s3_prefix},
        base_checkpoint_dir=local_model_dir)

    graph_manager.data_store = S3BotoDataStore(ds_params_instance,
                                               graph_manager,
                                               ignore_lock=True)

    task_parameters = TaskParameters()
    task_parameters.checkpoint_restore_path = local_model_dir
    _validate(graph_manager=graph_manager,
              task_parameters=task_parameters,
              transitions=transitions,
              s3_bucket=s3_bucket,
              s3_prefix=s3_prefix,
              aws_region=aws_region)
def main():
    """ Main function for tournament"""
    try:
        # parse argument
        s3_region = sys.argv[1]
        s3_bucket = sys.argv[2]
        s3_prefix = sys.argv[3]
        s3_yaml_name = sys.argv[4]

        # create boto3 session/client and download yaml/json file
        session = boto3.session.Session()
        s3_endpoint_url = os.environ.get("S3_ENDPOINT_URL", None)
        s3_client = session.client('s3',
                                   region_name=s3_region,
                                   endpoint_url=s3_endpoint_url,
                                   config=get_boto_config())

        yaml_key = os.path.normpath(os.path.join(s3_prefix, s3_yaml_name))
        local_yaml_path = os.path.abspath(
            os.path.join(os.getcwd(), s3_yaml_name))
        s3_client.download_file(Bucket=s3_bucket,
                                Key=yaml_key,
                                Filename=local_yaml_path)

        # Intermediate tournament files
        queue_pickle_name = 'tournament_candidate_queue.pkl'
        queue_pickle_s3_key = os.path.normpath(
            os.path.join(s3_prefix, queue_pickle_name))
        local_queue_pickle_path = os.path.abspath(
            os.path.join(os.getcwd(), queue_pickle_name))

        report_pickle_name = 'tournament_report.pkl'
        report_pickle_s3_key = os.path.normpath(
            os.path.join(s3_prefix, report_pickle_name))
        local_report_pickle_path = os.path.abspath(
            os.path.join(os.getcwd(), report_pickle_name))

        final_report_name = 'tournament_report.json'
        final_report_s3_key = os.path.normpath(
            os.path.join(s3_prefix, final_report_name))

        try:
            s3_client.download_file(Bucket=s3_bucket,
                                    Key=queue_pickle_s3_key,
                                    Filename=local_queue_pickle_path)
            s3_client.download_file(Bucket=s3_bucket,
                                    Key=report_pickle_s3_key,
                                    Filename=local_report_pickle_path)
        except:
            pass

        # Get values passed in yaml files. Default values are for backward compatibility and for single racecar racing
        yaml_dict = get_yaml_dict(local_yaml_path)

        # Forcing the yaml parameter to list
        force_list_params = [
            MODEL_S3_BUCKET_YAML_KEY, MODEL_S3_PREFIX_YAML_KEY,
            MODEL_METADATA_FILE_S3_YAML_KEY, METRICS_S3_BUCKET_YAML_KEY,
            METRICS_S3_PREFIX_YAML_KEY, SIMTRACE_S3_BUCKET_YAML_KEY,
            SIMTRACE_S3_PREFIX_YAML_KEY, MP4_S3_BUCKET_YAML_KEY,
            MP4_S3_PREFIX_YAML_KEY, DISPLAY_NAME_YAML_KEY
        ]
        for params in force_list_params:
            yaml_dict[params] = force_list(yaml_dict.get(params, None))

        # Populate the model_metadata_s3_key values to handle both training and evaluation for all race_formats
        if None in yaml_dict[MODEL_METADATA_FILE_S3_YAML_KEY]:
            # MODEL_METADATA_FILE_S3_KEY not passed as part of yaml file ==> This happens during evaluation
            # Assume model_metadata.json is present in the s3_prefix/model/ folder
            yaml_dict[MODEL_METADATA_FILE_S3_YAML_KEY] = list()
            for s3_prefix in yaml_dict[MODEL_S3_PREFIX_YAML_KEY]:
                yaml_dict[MODEL_METADATA_FILE_S3_YAML_KEY].append(
                    os.path.join(s3_prefix, 'model/model_metadata.json'))

        # Validate the yaml values
        validate_yaml_values(yaml_dict)
        if os.path.exists(local_queue_pickle_path):
            with open(local_queue_pickle_path, 'rb') as f:
                tournament_candidate_queue = pickle.load(f)
            with open(local_report_pickle_path, 'rb') as f:
                tournament_report = pickle.load(f)
            logger.info('tournament_candidate_queue loaded from existing file')
        else:
            logger.info('tournament_candidate_queue initialized')
            tournament_candidate_queue = deque()
            for agent_idx, _ in enumerate(yaml_dict[MODEL_S3_BUCKET_YAML_KEY]):
                tournament_candidate_queue.append(
                    (yaml_dict[MODEL_S3_BUCKET_YAML_KEY][agent_idx],
                     yaml_dict[MODEL_S3_PREFIX_YAML_KEY][agent_idx],
                     yaml_dict[MODEL_METADATA_FILE_S3_YAML_KEY][agent_idx],
                     yaml_dict[METRICS_S3_BUCKET_YAML_KEY][agent_idx],
                     yaml_dict[METRICS_S3_PREFIX_YAML_KEY][agent_idx],
                     yaml_dict[SIMTRACE_S3_BUCKET_YAML_KEY][agent_idx],
                     yaml_dict[SIMTRACE_S3_PREFIX_YAML_KEY][agent_idx],
                     yaml_dict[MP4_S3_BUCKET_YAML_KEY][agent_idx],
                     yaml_dict[MP4_S3_PREFIX_YAML_KEY][agent_idx],
                     yaml_dict[DISPLAY_NAME_YAML_KEY][agent_idx]))
            tournament_report = []

        race_idx = len(tournament_report)
        while len(tournament_candidate_queue) > 1:
            car1 = tournament_candidate_queue.popleft()
            car2 = tournament_candidate_queue.popleft()
            (car1_model_s3_bucket, car1_s3_prefix, car1_model_metadata,
             car1_metrics_bucket, car1_metrics_s3_key, car1_simtrace_bucket,
             car1_simtrace_prefix, car1_mp4_bucket, car1_mp4_prefix,
             car1_display_name) = car1
            (car2_model_s3_bucket, car2_s3_prefix, car2_model_metadata,
             car2_metrics_bucket, car2_metrics_s3_key, car2_simtrace_bucket,
             car2_simtrace_prefix, car2_mp4_bucket, car2_mp4_prefix,
             car2_display_name) = car2

            race_yaml_dict = generate_race_yaml(yaml_dict=yaml_dict,
                                                car1=car1,
                                                car2=car2,
                                                race_idx=race_idx)

            race_car_colors = ["Orange", "Purple"]
            race_model_s3_buckets = [
                car1_model_s3_bucket, car2_model_s3_bucket
            ]
            race_model_metadatas = [car1_model_metadata, car2_model_metadata]

            # List of directories created
            dirs_to_delete = list()
            yaml_dir = os.path.abspath(os.path.join(os.getcwd(),
                                                    str(race_idx)))
            os.makedirs(yaml_dir)

            dirs_to_delete.append(yaml_dir)
            race_yaml_path = os.path.abspath(
                os.path.join(yaml_dir, 'evaluation_params.yaml'))
            with open(race_yaml_path, 'w') as race_yaml_file:
                yaml.dump(race_yaml_dict, race_yaml_file)

            # List of racecar names that should include second camera while launching
            racecars_with_stereo_cameras = list()
            # List of racecar names that should include lidar while launching
            racecars_with_lidars = list()
            # List of SimApp versions
            simapp_versions = list()
            for agent_index, model_s3_bucket in enumerate(
                    race_model_s3_buckets):
                racecar_name = 'racecar_' + str(agent_index)
                # Make a local folder with the racecar name to download the model_metadata.json
                os.makedirs(os.path.join(os.getcwd(), racecar_name))
                dirs_to_delete.append(os.path.join(os.getcwd(), racecar_name))
                local_model_metadata_path = os.path.abspath(
                    os.path.join(os.path.join(os.getcwd(), racecar_name),
                                 'model_metadata.json'))
                json_key = race_model_metadatas[agent_index]
                json_key = json_key.replace('s3://{}/'.format(model_s3_bucket),
                                            '')
                s3_client.download_file(Bucket=model_s3_bucket,
                                        Key=json_key,
                                        Filename=local_model_metadata_path)
                sensors, _, simapp_version = utils_parse_model_metadata.parse_model_metadata(
                    local_model_metadata_path)
                simapp_versions.append(simapp_version)
                if Input.STEREO.value in sensors:
                    racecars_with_stereo_cameras.append(racecar_name)
                if Input.LIDAR.value in sensors or Input.SECTOR_LIDAR.value in sensors:
                    racecars_with_lidars.append(racecar_name)

            cmd = [
                os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             "tournament_race_node.py"),
                str(race_idx), race_yaml_path,
                ','.join(racecars_with_stereo_cameras),
                ','.join(racecars_with_lidars), ','.join(race_car_colors),
                ','.join(simapp_versions)
            ]
            try:
                return_code, _, stderr = run_cmd(cmd_args=cmd,
                                                 shell=False,
                                                 stdout=None,
                                                 stderr=None)
            except KeyboardInterrupt:
                logger.info(
                    "KeyboardInterrupt raised, SimApp must be faulted! exiting..."
                )
                return

            # Retrieve winner and append tournament report
            with open('race_report.pkl', 'rb') as f:
                race_report = pickle.load(f)
            race_report['race_idx'] = race_idx
            winner = car1 if race_report[
                'winner'] == car1_display_name else car2
            logger.info("race {}'s winner: {}".format(race_idx,
                                                      race_report['winner']))

            tournament_candidate_queue.append(winner)
            tournament_report.append(race_report)

            # Clean up directories created
            for dir_to_delete in dirs_to_delete:
                shutil.rmtree(dir_to_delete, ignore_errors=True)
            race_idx += 1

            # Persist latest queue and report to use after job restarts.
            with open(local_queue_pickle_path, 'wb') as f:
                pickle.dump(tournament_candidate_queue, f, protocol=2)
            s3_client.upload_file(Filename=local_queue_pickle_path,
                                  Bucket=s3_bucket,
                                  Key=queue_pickle_s3_key)

            with open(local_report_pickle_path, 'wb') as f:
                pickle.dump(tournament_report, f, protocol=2)
            s3_client.upload_file(Filename=local_report_pickle_path,
                                  Bucket=s3_bucket,
                                  Key=report_pickle_s3_key)

            # If there is more than 1 candidates then restart the simulation job otherwise
            # tournament is finished, persists final report and ends the job.
            if len(tournament_candidate_queue) > 1:
                restart_simulation_job(
                    os.environ.get('AWS_ROBOMAKER_SIMULATION_JOB_ARN'),
                    s3_region)
                break
            else:
                # Persist final tournament report in json format
                # and terminate the job by canceling it
                s3_client.put_object(Bucket=s3_bucket,
                                     Key=final_report_s3_key,
                                     Body=json.dumps(tournament_report))

                cancel_simulation_job(
                    os.environ.get('AWS_ROBOMAKER_SIMULATION_JOB_ARN'),
                    s3_region)
    except Exception as e:
        log_and_exit(
            "Tournament node failed: s3_bucket: {}, yaml_key: {}, {}".format(
                s3_bucket, yaml_key, e), SIMAPP_SIMULATION_WORKER_EXCEPTION,
            SIMAPP_EVENT_ERROR_CODE_500)
Beispiel #11
0
def main():
    """ Main function for downloading yaml params """
    try:
        # parse argument
        s3_region = sys.argv[1]
        s3_bucket = sys.argv[2]
        s3_prefix = sys.argv[3]
        s3_yaml_name = sys.argv[4]
        launch_name = sys.argv[5]

        # create boto3 session/client and download yaml/json file
        session = boto3.session.Session()

        s3_endpoint_url = os.environ.get("S3_ENDPOINT_URL", None)

        if s3_endpoint_url is not None:
            LOG.info('Endpoint URL {}'.format(s3_endpoint_url))
            rospy.set_param('S3_ENDPOINT_URL', s3_endpoint_url)
        else:
            # create boto3 session/client and download yaml/json file
            ec2_client = session.client('ec2', s3_region)
            LOG.info('Checking internet connection...')
            response = ec2_client.describe_vpcs()
            if not response['Vpcs']:
                log_and_exit("No VPC attached to instance",
                             SIMAPP_SIMULATION_WORKER_EXCEPTION,
                             SIMAPP_EVENT_ERROR_CODE_500)
            LOG.info('Verified internet connection')

        s3_client = session.client('s3',
                                   region_name=s3_region,
                                   endpoint_url=s3_endpoint_url,
                                   config=get_boto_config())

        yaml_key = os.path.normpath(os.path.join(s3_prefix, s3_yaml_name))
        local_yaml_path = os.path.abspath(
            os.path.join(os.getcwd(), s3_yaml_name))
        s3_client.download_file(Bucket=s3_bucket,
                                Key=yaml_key,
                                Filename=local_yaml_path)
        # Get values passed in yaml files. Default values are for backward compatibility and for single racecar racing
        default_yaml_values = {
            RACE_TYPE_YAML_KEY: TIME_TRIAL_RACE_TYPE,
            MODEL_S3_BUCKET_YAML_KEY: s3_bucket,
            MODEL_S3_PREFIX_YAML_KEY: s3_prefix,
            CAR_COLOR_YAML_KEY: DEFAULT_COLOR,
            MODEL_METADATA_FILE_S3_YAML_KEY: None
        }
        yaml_dict = get_yaml_dict(local_yaml_path)
        yaml_values = get_yaml_values(yaml_dict, default_yaml_values)

        # Forcing the yaml parameter to list
        force_list_params = [
            MODEL_METADATA_FILE_S3_YAML_KEY, MODEL_S3_BUCKET_YAML_KEY,
            MODEL_S3_PREFIX_YAML_KEY, CAR_COLOR_YAML_KEY
        ]

        for params in force_list_params:
            yaml_values[params] = force_list(yaml_values[params])

        # Populate the model_metadata_s3_key values to handle both training and evaluation for all race_formats
        if None in yaml_values[MODEL_METADATA_FILE_S3_YAML_KEY]:
            # MODEL_METADATA_FILE_S3_KEY not passed as part of yaml file ==> This happens during evaluation
            # Assume model_metadata.json is present in the s3_prefix/model/ folder
            yaml_values[MODEL_METADATA_FILE_S3_YAML_KEY] = list()
            for s3_prefix in yaml_values[MODEL_S3_PREFIX_YAML_KEY]:
                yaml_values[MODEL_METADATA_FILE_S3_YAML_KEY].append(
                    os.path.join(s3_prefix, 'model/model_metadata.json'))

        # Set multicar value if its a head to model racetype
        multicar = yaml_values[RACE_TYPE_YAML_KEY] == HEAD_TO_MODEL_RACE_TYPE
        # Validate the yaml values
        validate_yaml_values(yaml_values, multicar)
        # List of racecar names that should include second camera while launching
        racecars_with_stereo_cameras = list()

        # List of racecar names that should include lidar while launching
        racecars_with_lidars = list()

        # List of SimApp versions
        simapp_versions = list()

        for agent_index, model_s3_bucket in enumerate(
                yaml_values[MODEL_S3_BUCKET_YAML_KEY]):

            racecar_name = 'racecar_' + str(agent_index) if len(
                yaml_values[MODEL_S3_BUCKET_YAML_KEY]) > 1 else 'racecar'
            # Make a local folder with the racecar name to download the model_metadata.json
            if not os.path.exists(os.path.join(os.getcwd(), racecar_name)):
                os.makedirs(os.path.join(os.getcwd(), racecar_name))
            local_model_metadata_path = os.path.abspath(
                os.path.join(os.path.join(os.getcwd(), racecar_name),
                             'model_metadata.json'))
            json_key = yaml_values[MODEL_METADATA_FILE_S3_YAML_KEY][
                agent_index]
            json_key = json_key.replace('s3://{}/'.format(model_s3_bucket), '')
            s3_client.download_file(Bucket=model_s3_bucket,
                                    Key=json_key,
                                    Filename=local_model_metadata_path)
            sensors, _, simapp_version = utils_parse_model_metadata.parse_model_metadata(
                local_model_metadata_path)
            simapp_versions.append(simapp_version)
            if Input.STEREO.value in sensors:
                racecars_with_stereo_cameras.append(racecar_name)
            if Input.LIDAR.value in sensors or Input.SECTOR_LIDAR.value in sensors:
                racecars_with_lidars.append(racecar_name)

        cmd = [
            ''.join(("roslaunch deepracer_simulation_environment {} ".format(
                launch_name), "local_yaml_path:={} ".format(local_yaml_path),
                     "racecars_with_stereo_cameras:={} ".format(
                         ','.join(racecars_with_stereo_cameras)),
                     "racecars_with_lidars:={} multicar:={} ".format(
                         ','.join(racecars_with_lidars), multicar),
                     "car_colors:={} simapp_versions:={}".format(
                         ','.join(yaml_values[CAR_COLOR_YAML_KEY]),
                         ','.join(simapp_versions))))
        ]
        Popen(cmd, shell=True, executable="/bin/bash")

    except botocore.exceptions.ClientError as ex:
        log_and_exit(
            "Download params and launch of agent node failed: s3_bucket: {}, yaml_key: {}, {}"
            .format(s3_bucket, yaml_key, ex),
            SIMAPP_SIMULATION_WORKER_EXCEPTION, SIMAPP_EVENT_ERROR_CODE_400)
    except botocore.exceptions.EndpointConnectionError:
        log_and_exit("No Internet connection or s3 service unavailable",
                     SIMAPP_SIMULATION_WORKER_EXCEPTION,
                     SIMAPP_EVENT_ERROR_CODE_500)
    except Exception as ex:
        log_and_exit(
            "Download params and launch of agent node failed: s3_bucket: {}, yaml_key: {}, {}"
            .format(s3_bucket, yaml_key, ex),
            SIMAPP_SIMULATION_WORKER_EXCEPTION, SIMAPP_EVENT_ERROR_CODE_500)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-p',
                        '--preset',
                        help="(string) Name of a preset to run \
                             (class name from the 'presets' directory.)",
                        type=str,
                        required=False)
    parser.add_argument('--s3_bucket',
                        help='(string) S3 bucket',
                        type=str,
                        default=rospy.get_param("MODEL_S3_BUCKET",
                                                "gsaur-test"))
    parser.add_argument('--s3_prefix',
                        help='(string) S3 prefix',
                        type=str,
                        default=rospy.get_param("MODEL_S3_PREFIX",
                                                "sagemaker"))
    parser.add_argument('--aws_region',
                        help='(string) AWS region',
                        type=str,
                        default=rospy.get_param("AWS_REGION", "us-east-1"))
    parser.add_argument('--number_of_trials',
                        help='(integer) Number of trials',
                        type=int,
                        default=int(rospy.get_param("NUMBER_OF_TRIALS", 10)))
    parser.add_argument(
        '-c',
        '--local_model_directory',
        help='(string) Path to a folder containing a checkpoint \
                             to restore the model from.',
        type=str,
        default='./checkpoint')

    args = parser.parse_args()
    logger.info("S3 bucket: %s \n S3 prefix: %s", args.s3_bucket,
                args.s3_prefix)

    s3_client = SageS3Client(bucket=args.s3_bucket,
                             s3_prefix=args.s3_prefix,
                             aws_region=args.aws_region)

    # Load the model metadata
    model_metadata_local_path = os.path.join(CUSTOM_FILES_PATH,
                                             'model_metadata.json')
    utils.load_model_metadata(
        s3_client,
        os.path.normpath("%s/model/model_metadata.json" % args.s3_prefix),
        model_metadata_local_path)
    # Handle backward compatibility
    _, _, version = parse_model_metadata(model_metadata_local_path)
    if float(version) < float(utils.SIMAPP_VERSION) and \
    not utils.has_current_ckpnt_name(args.s3_bucket, args.s3_prefix, args.aws_region):
        utils.make_compatible(args.s3_bucket, args.s3_prefix, args.aws_region,
                              SyncFiles.TRAINER_READY.value)
    # Download hyperparameters from SageMaker
    hyperparameters_file_success = False
    hyperparams_s3_key = os.path.normpath(args.s3_prefix +
                                          "/ip/hyperparameters.json")
    hyperparameters_file_success = s3_client.download_file(
        s3_key=hyperparams_s3_key, local_path="hyperparameters.json")
    sm_hyperparams_dict = {}
    if hyperparameters_file_success:
        logger.info("Received Sagemaker hyperparameters successfully!")
        with open("hyperparameters.json") as file:
            sm_hyperparams_dict = json.load(file)
    else:
        logger.info("SageMaker hyperparameters not found.")

    #! TODO each agent should have own config
    _, _, version = utils_parse_model_metadata.parse_model_metadata(
        model_metadata_local_path)
    agent_config = {
        'model_metadata': model_metadata_local_path,
        'car_ctrl_cnfig': {
            ConfigParams.LINK_NAME_LIST.value:
            LINK_NAMES,
            ConfigParams.VELOCITY_LIST.value:
            VELOCITY_TOPICS,
            ConfigParams.STEERING_LIST.value:
            STEERING_TOPICS,
            ConfigParams.CHANGE_START.value:
            utils.str2bool(rospy.get_param('CHANGE_START_POSITION', False)),
            ConfigParams.ALT_DIR.value:
            utils.str2bool(
                rospy.get_param('ALTERNATE_DRIVING_DIRECTION', False)),
            ConfigParams.ACTION_SPACE_PATH.value:
            'custom_files/model_metadata.json',
            ConfigParams.REWARD.value:
            reward_function,
            ConfigParams.AGENT_NAME.value:
            'racecar',
            ConfigParams.VERSION.value:
            version
        }
    }

    #! TODO each agent should have own s3 bucket
    metrics_s3_config = {
        MetricsS3Keys.METRICS_BUCKET.value:
        rospy.get_param('METRICS_S3_BUCKET'),
        MetricsS3Keys.METRICS_KEY.value:
        rospy.get_param('METRICS_S3_OBJECT_KEY'),
        MetricsS3Keys.REGION.value:
        rospy.get_param('AWS_REGION'),
        MetricsS3Keys.STEP_BUCKET.value:
        rospy.get_param('MODEL_S3_BUCKET'),
        MetricsS3Keys.STEP_KEY.value:
        os.path.join(rospy.get_param('MODEL_S3_PREFIX'),
                     EVALUATION_SIMTRACE_DATA_S3_OBJECT_KEY)
    }

    agent_list = list()
    agent_list.append(
        create_rollout_agent(agent_config, EvalMetrics(metrics_s3_config)))
    agent_list.append(create_obstacles_agent())
    agent_list.append(create_bot_cars_agent())

    graph_manager, _ = get_graph_manager(sm_hyperparams_dict, agent_list)

    ds_params_instance = S3BotoDataStoreParameters(
        aws_region=args.aws_region,
        bucket_name=args.s3_bucket,
        checkpoint_dir=args.local_model_directory,
        s3_folder=args.s3_prefix)

    data_store = S3BotoDataStore(ds_params_instance)
    data_store.graph_manager = graph_manager
    graph_manager.data_store = data_store
    graph_manager.env_params.seed = 0

    task_parameters = TaskParameters()
    task_parameters.checkpoint_restore_path = args.local_model_directory

    evaluation_worker(
        graph_manager=graph_manager,
        data_store=data_store,
        number_of_trials=args.number_of_trials,
        task_parameters=task_parameters,
    )
def main():
    screen.set_use_colors(False)
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-c',
        '--checkpoint_dir',
        help=
        '(string) Path to a folder containing a checkpoint to restore the model from.',
        type=str,
        default='./checkpoint')
    parser.add_argument('--s3_bucket',
                        help='(string) S3 bucket',
                        type=str,
                        default=rospy.get_param("SAGEMAKER_SHARED_S3_BUCKET",
                                                "gsaur-test"))
    parser.add_argument('--s3_prefix',
                        help='(string) S3 prefix',
                        type=str,
                        default=rospy.get_param("SAGEMAKER_SHARED_S3_PREFIX",
                                                "sagemaker"))
    parser.add_argument(
        '--num-workers',
        help="(int) The number of workers started in this pool",
        type=int,
        default=1)
    parser.add_argument('-r',
                        '--redis_ip',
                        help="(string) IP or host for the redis server",
                        default='localhost',
                        type=str)
    parser.add_argument('-rp',
                        '--redis_port',
                        help="(int) Port of the redis server",
                        default=6379,
                        type=int)
    parser.add_argument('--aws_region',
                        help='(string) AWS region',
                        type=str,
                        default=rospy.get_param("AWS_REGION", "us-east-1"))
    parser.add_argument('--reward_file_s3_key',
                        help='(string) Reward File S3 Key',
                        type=str,
                        default=rospy.get_param("REWARD_FILE_S3_KEY", None))
    parser.add_argument('--model_metadata_s3_key',
                        help='(string) Model Metadata File S3 Key',
                        type=str,
                        default=rospy.get_param("MODEL_METADATA_FILE_S3_KEY",
                                                None))

    args = parser.parse_args()

    s3_client = SageS3Client(bucket=args.s3_bucket,
                             s3_prefix=args.s3_prefix,
                             aws_region=args.aws_region)
    logger.info("S3 bucket: %s" % args.s3_bucket)
    logger.info("S3 prefix: %s" % args.s3_prefix)

    # Load the model metadata
    model_metadata_local_path = os.path.join(CUSTOM_FILES_PATH,
                                             'model_metadata.json')
    utils.load_model_metadata(s3_client, args.model_metadata_s3_key,
                              model_metadata_local_path)

    # Download and import reward function
    if not args.reward_file_s3_key:
        utils.log_and_exit(
            "Reward function code S3 key not available for S3 bucket {} and prefix {}"
            .format(args.s3_bucket,
                    args.s3_prefix), utils.SIMAPP_SIMULATION_WORKER_EXCEPTION,
            utils.SIMAPP_EVENT_ERROR_CODE_500)
    download_customer_reward_function(s3_client, args.reward_file_s3_key)

    try:
        from custom_files.customer_reward_function import reward_function
    except Exception as e:
        utils.log_and_exit(
            "Failed to import user's reward_function: {}".format(e),
            utils.SIMAPP_SIMULATION_WORKER_EXCEPTION,
            utils.SIMAPP_EVENT_ERROR_CODE_400)

    # Instantiate Cameras
    configure_camera()

    redis_ip = s3_client.get_ip()
    logger.info("Received IP from SageMaker successfully: %s" % redis_ip)

    # Download hyperparameters from SageMaker
    hyperparameters_file_success = False
    hyperparams_s3_key = os.path.normpath(args.s3_prefix +
                                          "/ip/hyperparameters.json")
    hyperparameters_file_success = s3_client.download_file(
        s3_key=hyperparams_s3_key, local_path="hyperparameters.json")
    sm_hyperparams_dict = {}
    if hyperparameters_file_success:
        logger.info("Received Sagemaker hyperparameters successfully!")
        with open("hyperparameters.json") as fp:
            sm_hyperparams_dict = json.load(fp)
    else:
        logger.info("SageMaker hyperparameters not found.")

    preset_file_success, _ = download_custom_files_if_present(
        s3_client, args.s3_prefix)

    #! TODO each agent should have own config
    _, _, version = utils_parse_model_metadata.parse_model_metadata(
        model_metadata_local_path)
    agent_config = {
        'model_metadata': model_metadata_local_path,
        'car_ctrl_cnfig': {
            ConfigParams.LINK_NAME_LIST.value:
            LINK_NAMES,
            ConfigParams.VELOCITY_LIST.value:
            VELOCITY_TOPICS,
            ConfigParams.STEERING_LIST.value:
            STEERING_TOPICS,
            ConfigParams.CHANGE_START.value:
            utils.str2bool(rospy.get_param('CHANGE_START_POSITION', True)),
            ConfigParams.ALT_DIR.value:
            utils.str2bool(
                rospy.get_param('ALTERNATE_DRIVING_DIRECTION', False)),
            ConfigParams.ACTION_SPACE_PATH.value:
            'custom_files/model_metadata.json',
            ConfigParams.REWARD.value:
            reward_function,
            ConfigParams.AGENT_NAME.value:
            'racecar',
            ConfigParams.VERSION.value:
            version
        }
    }

    #! TODO each agent should have own s3 bucket
    metrics_s3_config = {
        MetricsS3Keys.METRICS_BUCKET.value:
        rospy.get_param('METRICS_S3_BUCKET'),
        MetricsS3Keys.METRICS_KEY.value:
        rospy.get_param('METRICS_S3_OBJECT_KEY'),
        MetricsS3Keys.REGION.value:
        rospy.get_param('AWS_REGION'),
        MetricsS3Keys.STEP_BUCKET.value:
        rospy.get_param('SAGEMAKER_SHARED_S3_BUCKET'),
        MetricsS3Keys.STEP_KEY.value:
        os.path.join(rospy.get_param('SAGEMAKER_SHARED_S3_PREFIX'),
                     TRAINING_SIMTRACE_DATA_S3_OBJECT_KEY)
    }

    agent_list = list()
    agent_list.append(
        create_rollout_agent(agent_config, TrainingMetrics(metrics_s3_config)))
    agent_list.append(create_obstacles_agent())
    agent_list.append(create_bot_cars_agent())

    if preset_file_success:
        preset_location = os.path.join(CUSTOM_FILES_PATH, "preset.py")
        preset_location += ":graph_manager"
        graph_manager = short_dynamic_import(preset_location,
                                             ignore_module_case=True)
        logger.info("Using custom preset file!")
    else:
        graph_manager, _ = get_graph_manager(sm_hyperparams_dict, agent_list)

    memory_backend_params = RedisPubSubMemoryBackendParameters(
        redis_address=redis_ip,
        redis_port=6379,
        run_type=str(RunType.ROLLOUT_WORKER),
        channel=args.s3_prefix)

    graph_manager.memory_backend_params = memory_backend_params

    ds_params_instance = S3BotoDataStoreParameters(
        aws_region=args.aws_region,
        bucket_name=args.s3_bucket,
        checkpoint_dir=args.checkpoint_dir,
        s3_folder=args.s3_prefix)

    data_store = S3BotoDataStore(ds_params_instance)
    data_store.graph_manager = graph_manager
    graph_manager.data_store = data_store

    task_parameters = TaskParameters()
    task_parameters.checkpoint_restore_path = args.checkpoint_dir

    rollout_worker(graph_manager=graph_manager,
                   data_store=data_store,
                   num_workers=args.num_workers,
                   task_parameters=task_parameters)
Beispiel #14
0
def validate(s3_bucket, s3_prefix, aws_region):
    screen.set_use_colors(False)
    screen.log_title(" S3 bucket: {} \n S3 prefix: {}".format(
        s3_bucket, s3_prefix))

    s3_client = SageS3Client(bucket=s3_bucket,
                             s3_prefix=s3_prefix,
                             aws_region=aws_region)
    # Load the model metadata
    utils.load_model_metadata(
        s3_client,
        os.path.normpath("%s/model/model_metadata.json" % s3_prefix),
        MODEL_METADATA_LOCAL_PATH)

    # Create model local path
    os.makedirs(LOCAL_MODEL_DIR)

    try:
        # Handle backward compatibility
        observation_list, _, version = parse_model_metadata(
            MODEL_METADATA_LOCAL_PATH)
    except Exception as ex:
        log_and_exit("Failed to parse model_metadata file: {}".format(ex),
                     SIMAPP_VALIDATION_WORKER_EXCEPTION,
                     SIMAPP_EVENT_ERROR_CODE_400)

    # Below get_transition_data function must called before create_training_agent function
    # to avoid 500 in case unsupported Sensor is received.
    # create_training_agent will exit with 500 if unsupported sensor is received,
    # and get_transition_data function below will exit with 400 if unsupported sensor is received.
    # We want to return 400 in model validation case if unsupported sensor is received.
    # Thus, call this get_transition_data function before create_traning_agent function!
    transitions = get_transition_data(observation_list)

    if float(version) < float(SIMAPP_VERSION) and \
            not utils.has_current_ckpnt_name(s3_bucket, s3_prefix, aws_region):
        utils.make_compatible(s3_bucket, s3_prefix, aws_region,
                              SyncFiles.TRAINER_READY.value)

    agent_config = {
        'model_metadata': MODEL_METADATA_LOCAL_PATH,
        ConfigParams.CAR_CTRL_CONFIG.value: {
            ConfigParams.LINK_NAME_LIST.value: [],
            ConfigParams.VELOCITY_LIST.value: {},
            ConfigParams.STEERING_LIST.value: {},
            ConfigParams.CHANGE_START.value: None,
            ConfigParams.ALT_DIR.value: None,
            ConfigParams.ACTION_SPACE_PATH.value: MODEL_METADATA_LOCAL_PATH,
            ConfigParams.REWARD.value: None,
            ConfigParams.AGENT_NAME.value: 'racecar'
        }
    }

    agent_list = list()
    agent_list.append(create_training_agent(agent_config))

    sm_hyperparams_dict = {}
    graph_manager, _ = get_graph_manager(hp_dict=sm_hyperparams_dict,
                                         agent_list=agent_list,
                                         run_phase_subject=None)

    ds_params_instance = S3BotoDataStoreParameters(
        aws_region=aws_region,
        bucket_names={'agent': s3_bucket},
        s3_folders={'agent': s3_prefix},
        base_checkpoint_dir=LOCAL_MODEL_DIR)

    graph_manager.data_store = S3BotoDataStore(ds_params_instance,
                                               graph_manager,
                                               ignore_lock=True)

    task_parameters = TaskParameters()
    task_parameters.checkpoint_restore_path = LOCAL_MODEL_DIR
    _validate(graph_manager=graph_manager,
              task_parameters=task_parameters,
              transitions=transitions,
              s3_bucket=s3_bucket,
              s3_prefix=s3_prefix,
              aws_region=aws_region)
def main():
    screen.set_use_colors(False)
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-c',
        '--checkpoint_dir',
        help=
        '(string) Path to a folder containing a checkpoint to restore the model from.',
        type=str,
        default='./checkpoint')
    parser.add_argument('--s3_bucket',
                        help='(string) S3 bucket',
                        type=str,
                        default=rospy.get_param("SAGEMAKER_SHARED_S3_BUCKET",
                                                "gsaur-test"))
    parser.add_argument('--s3_prefix',
                        help='(string) S3 prefix',
                        type=str,
                        default=rospy.get_param("SAGEMAKER_SHARED_S3_PREFIX",
                                                "sagemaker"))
    parser.add_argument(
        '--num_workers',
        help="(int) The number of workers started in this pool",
        type=int,
        default=int(rospy.get_param("NUM_WORKERS", 1)))
    parser.add_argument('--rollout_idx',
                        help="(int) The index of current rollout worker",
                        type=int,
                        default=0)
    parser.add_argument('-r',
                        '--redis_ip',
                        help="(string) IP or host for the redis server",
                        default='localhost',
                        type=str)
    parser.add_argument('-rp',
                        '--redis_port',
                        help="(int) Port of the redis server",
                        default=6379,
                        type=int)
    parser.add_argument('--aws_region',
                        help='(string) AWS region',
                        type=str,
                        default=rospy.get_param("AWS_REGION", "us-east-1"))
    parser.add_argument('--reward_file_s3_key',
                        help='(string) Reward File S3 Key',
                        type=str,
                        default=rospy.get_param("REWARD_FILE_S3_KEY", None))
    parser.add_argument('--model_metadata_s3_key',
                        help='(string) Model Metadata File S3 Key',
                        type=str,
                        default=rospy.get_param("MODEL_METADATA_FILE_S3_KEY",
                                                None))
    # For training job, reset is not allowed. penalty_seconds, off_track_penalty, and
    # collision_penalty will all be 0 be default
    parser.add_argument('--number_of_resets',
                        help='(integer) Number of resets',
                        type=int,
                        default=int(rospy.get_param("NUMBER_OF_RESETS", 0)))
    parser.add_argument('--penalty_seconds',
                        help='(float) penalty second',
                        type=float,
                        default=float(rospy.get_param("PENALTY_SECONDS", 0.0)))
    parser.add_argument('--job_type',
                        help='(string) job type',
                        type=str,
                        default=rospy.get_param("JOB_TYPE", "TRAINING"))
    parser.add_argument('--is_continuous',
                        help='(boolean) is continous after lap completion',
                        type=bool,
                        default=utils.str2bool(
                            rospy.get_param("IS_CONTINUOUS", False)))
    parser.add_argument('--race_type',
                        help='(string) Race type',
                        type=str,
                        default=rospy.get_param("RACE_TYPE", "TIME_TRIAL"))
    parser.add_argument('--off_track_penalty',
                        help='(float) off track penalty second',
                        type=float,
                        default=float(rospy.get_param("OFF_TRACK_PENALTY",
                                                      0.0)))
    parser.add_argument('--collision_penalty',
                        help='(float) collision penalty second',
                        type=float,
                        default=float(rospy.get_param("COLLISION_PENALTY",
                                                      0.0)))

    args = parser.parse_args()

    s3_client = SageS3Client(bucket=args.s3_bucket,
                             s3_prefix=args.s3_prefix,
                             aws_region=args.aws_region)
    logger.info("S3 bucket: %s", args.s3_bucket)
    logger.info("S3 prefix: %s", args.s3_prefix)

    # Load the model metadata
    model_metadata_local_path = os.path.join(CUSTOM_FILES_PATH,
                                             'model_metadata.json')
    utils.load_model_metadata(s3_client, args.model_metadata_s3_key,
                              model_metadata_local_path)

    # Download and import reward function
    if not args.reward_file_s3_key:
        log_and_exit(
            "Reward function code S3 key not available for S3 bucket {} and prefix {}"
            .format(args.s3_bucket, args.s3_prefix),
            SIMAPP_SIMULATION_WORKER_EXCEPTION, SIMAPP_EVENT_ERROR_CODE_500)
    download_customer_reward_function(s3_client, args.reward_file_s3_key)

    try:
        from custom_files.customer_reward_function import reward_function
    except Exception as e:
        log_and_exit("Failed to import user's reward_function: {}".format(e),
                     SIMAPP_SIMULATION_WORKER_EXCEPTION,
                     SIMAPP_EVENT_ERROR_CODE_400)

    # Instantiate Cameras
    configure_camera(namespaces=['racecar'])

    preset_file_success, _ = download_custom_files_if_present(
        s3_client, args.s3_prefix)

    #! TODO each agent should have own config
    _, _, version = utils_parse_model_metadata.parse_model_metadata(
        model_metadata_local_path)
    agent_config = {
        'model_metadata': model_metadata_local_path,
        ConfigParams.CAR_CTRL_CONFIG.value: {
            ConfigParams.LINK_NAME_LIST.value:
            LINK_NAMES,
            ConfigParams.VELOCITY_LIST.value:
            VELOCITY_TOPICS,
            ConfigParams.STEERING_LIST.value:
            STEERING_TOPICS,
            ConfigParams.CHANGE_START.value:
            utils.str2bool(rospy.get_param('CHANGE_START_POSITION', True)),
            ConfigParams.ALT_DIR.value:
            utils.str2bool(
                rospy.get_param('ALTERNATE_DRIVING_DIRECTION', False)),
            ConfigParams.ACTION_SPACE_PATH.value:
            'custom_files/model_metadata.json',
            ConfigParams.REWARD.value:
            reward_function,
            ConfigParams.AGENT_NAME.value:
            'racecar',
            ConfigParams.VERSION.value:
            version,
            ConfigParams.NUMBER_OF_RESETS.value:
            args.number_of_resets,
            ConfigParams.PENALTY_SECONDS.value:
            args.penalty_seconds,
            ConfigParams.NUMBER_OF_TRIALS.value:
            None,
            ConfigParams.IS_CONTINUOUS.value:
            args.is_continuous,
            ConfigParams.RACE_TYPE.value:
            args.race_type,
            ConfigParams.COLLISION_PENALTY.value:
            args.collision_penalty,
            ConfigParams.OFF_TRACK_PENALTY.value:
            args.off_track_penalty
        }
    }

    #! TODO each agent should have own s3 bucket
    step_metrics_prefix = rospy.get_param('SAGEMAKER_SHARED_S3_PREFIX')
    if args.num_workers > 1:
        step_metrics_prefix = os.path.join(step_metrics_prefix,
                                           str(args.rollout_idx))
    metrics_s3_config = {
        MetricsS3Keys.METRICS_BUCKET.value:
        rospy.get_param('METRICS_S3_BUCKET'),
        MetricsS3Keys.METRICS_KEY.value:
        rospy.get_param('METRICS_S3_OBJECT_KEY'),
        MetricsS3Keys.REGION.value: rospy.get_param('AWS_REGION')
    }
    metrics_s3_model_cfg = {
        MetricsS3Keys.METRICS_BUCKET.value:
        args.s3_bucket,
        MetricsS3Keys.METRICS_KEY.value:
        os.path.join(args.s3_prefix, DEEPRACER_CHKPNT_KEY_SUFFIX),
        MetricsS3Keys.REGION.value:
        args.aws_region
    }
    run_phase_subject = RunPhaseSubject()

    agent_list = list()
    agent_list.append(
        create_rollout_agent(
            agent_config,
            TrainingMetrics(agent_name='agent',
                            s3_dict_metrics=metrics_s3_config,
                            s3_dict_model=metrics_s3_model_cfg,
                            ckpnt_dir=args.checkpoint_dir,
                            run_phase_sink=run_phase_subject,
                            use_model_picker=(args.rollout_idx == 0)),
            run_phase_subject))
    agent_list.append(create_obstacles_agent())
    agent_list.append(create_bot_cars_agent())
    # ROS service to indicate all the robomaker markov packages are ready for consumption
    signal_robomaker_markov_package_ready()

    PhaseObserver('/agent/training_phase', run_phase_subject)

    aws_region = rospy.get_param('AWS_REGION', args.aws_region)
    simtrace_s3_bucket = rospy.get_param('SIMTRACE_S3_BUCKET', None)
    mp4_s3_bucket = rospy.get_param('MP4_S3_BUCKET',
                                    None) if args.rollout_idx == 0 else None
    if simtrace_s3_bucket:
        simtrace_s3_object_prefix = rospy.get_param('SIMTRACE_S3_PREFIX')
        if args.num_workers > 1:
            simtrace_s3_object_prefix = os.path.join(simtrace_s3_object_prefix,
                                                     str(args.rollout_idx))
    if mp4_s3_bucket:
        mp4_s3_object_prefix = rospy.get_param('MP4_S3_OBJECT_PREFIX')

    s3_writer_job_info = []
    if simtrace_s3_bucket:
        s3_writer_job_info.append(
            IterationData(
                'simtrace', simtrace_s3_bucket, simtrace_s3_object_prefix,
                aws_region,
                os.path.join(
                    ITERATION_DATA_LOCAL_FILE_PATH, 'agent',
                    IterationDataLocalFileNames.SIM_TRACE_TRAINING_LOCAL_FILE.
                    value)))
    if mp4_s3_bucket:
        s3_writer_job_info.extend([
            IterationData(
                'pip', mp4_s3_bucket, mp4_s3_object_prefix, aws_region,
                os.path.join(
                    ITERATION_DATA_LOCAL_FILE_PATH, 'agent',
                    IterationDataLocalFileNames.
                    CAMERA_PIP_MP4_VALIDATION_LOCAL_PATH.value)),
            IterationData(
                '45degree', mp4_s3_bucket, mp4_s3_object_prefix, aws_region,
                os.path.join(
                    ITERATION_DATA_LOCAL_FILE_PATH, 'agent',
                    IterationDataLocalFileNames.
                    CAMERA_45DEGREE_MP4_VALIDATION_LOCAL_PATH.value)),
            IterationData(
                'topview', mp4_s3_bucket, mp4_s3_object_prefix, aws_region,
                os.path.join(
                    ITERATION_DATA_LOCAL_FILE_PATH, 'agent',
                    IterationDataLocalFileNames.
                    CAMERA_TOPVIEW_MP4_VALIDATION_LOCAL_PATH.value))
        ])

    s3_writer = S3Writer(job_info=s3_writer_job_info)

    redis_ip = s3_client.get_ip()
    logger.info("Received IP from SageMaker successfully: %s", redis_ip)

    # Download hyperparameters from SageMaker
    hyperparameters_file_success = False
    hyperparams_s3_key = os.path.normpath(args.s3_prefix +
                                          "/ip/hyperparameters.json")
    hyperparameters_file_success = s3_client.download_file(
        s3_key=hyperparams_s3_key, local_path="hyperparameters.json")
    sm_hyperparams_dict = {}
    if hyperparameters_file_success:
        logger.info("Received Sagemaker hyperparameters successfully!")
        with open("hyperparameters.json") as filepointer:
            sm_hyperparams_dict = json.load(filepointer)
    else:
        logger.info("SageMaker hyperparameters not found.")

    enable_domain_randomization = utils.str2bool(
        rospy.get_param('ENABLE_DOMAIN_RANDOMIZATION', False))
    if preset_file_success:
        preset_location = os.path.join(CUSTOM_FILES_PATH, "preset.py")
        preset_location += ":graph_manager"
        graph_manager = short_dynamic_import(preset_location,
                                             ignore_module_case=True)
        logger.info("Using custom preset file!")
    else:
        graph_manager, _ = get_graph_manager(
            hp_dict=sm_hyperparams_dict,
            agent_list=agent_list,
            run_phase_subject=run_phase_subject,
            enable_domain_randomization=enable_domain_randomization)

    # If num_episodes_between_training is smaller than num_workers then cancel worker early.
    episode_steps_per_rollout = graph_manager.agent_params.algorithm.num_consecutive_playing_steps.num_steps
    # Reduce number of workers if allocated more than num_episodes_between_training
    if args.num_workers > episode_steps_per_rollout:
        logger.info(
            "Excess worker allocated. Reducing from {} to {}...".format(
                args.num_workers, episode_steps_per_rollout))
        args.num_workers = episode_steps_per_rollout
    if args.rollout_idx >= episode_steps_per_rollout or args.rollout_idx >= args.num_workers:
        err_msg_format = "Exiting excess worker..."
        err_msg_format += "(rollout_idx[{}] >= num_workers[{}] or num_episodes_between_training[{}])"
        logger.info(
            err_msg_format.format(args.rollout_idx, args.num_workers,
                                  episode_steps_per_rollout))
        # Close the down the job
        utils.cancel_simulation_job(
            os.environ.get('AWS_ROBOMAKER_SIMULATION_JOB_ARN'),
            rospy.get_param('AWS_REGION'))

    memory_backend_params = DeepRacerRedisPubSubMemoryBackendParameters(
        redis_address=redis_ip,
        redis_port=6379,
        run_type=str(RunType.ROLLOUT_WORKER),
        channel=args.s3_prefix,
        num_workers=args.num_workers,
        rollout_idx=args.rollout_idx)

    graph_manager.memory_backend_params = memory_backend_params

    ds_params_instance = S3BotoDataStoreParameters(
        aws_region=args.aws_region,
        bucket_names={'agent': args.s3_bucket},
        base_checkpoint_dir=args.checkpoint_dir,
        s3_folders={'agent': args.s3_prefix})

    graph_manager.data_store = S3BotoDataStore(ds_params_instance,
                                               graph_manager)

    task_parameters = TaskParameters()
    task_parameters.checkpoint_restore_path = args.checkpoint_dir

    rollout_worker(graph_manager=graph_manager,
                   num_workers=args.num_workers,
                   rollout_idx=args.rollout_idx,
                   task_parameters=task_parameters,
                   s3_writer=s3_writer)
Beispiel #16
0
def main():
    """ Main function for tournament worker """
    parser = argparse.ArgumentParser()
    parser.add_argument('-p',
                        '--preset',
                        help="(string) Name of a preset to run \
                             (class name from the 'presets' directory.)",
                        type=str,
                        required=False)
    parser.add_argument('--s3_bucket',
                        help='list(string) S3 bucket',
                        type=str,
                        nargs='+',
                        default=rospy.get_param("MODEL_S3_BUCKET",
                                                ["gsaur-test"]))
    parser.add_argument('--s3_prefix',
                        help='list(string) S3 prefix',
                        type=str,
                        nargs='+',
                        default=rospy.get_param("MODEL_S3_PREFIX",
                                                ["sagemaker"]))
    parser.add_argument('--aws_region',
                        help='(string) AWS region',
                        type=str,
                        default=rospy.get_param("AWS_REGION", "us-east-1"))
    parser.add_argument('--number_of_trials',
                        help='(integer) Number of trials',
                        type=int,
                        default=int(rospy.get_param("NUMBER_OF_TRIALS", 10)))
    parser.add_argument(
        '-c',
        '--local_model_directory',
        help='(string) Path to a folder containing a checkpoint \
                             to restore the model from.',
        type=str,
        default='./checkpoint')
    parser.add_argument('--number_of_resets',
                        help='(integer) Number of resets',
                        type=int,
                        default=int(rospy.get_param("NUMBER_OF_RESETS", 0)))
    parser.add_argument('--penalty_seconds',
                        help='(float) penalty second',
                        type=float,
                        default=float(rospy.get_param("PENALTY_SECONDS", 2.0)))
    parser.add_argument('--job_type',
                        help='(string) job type',
                        type=str,
                        default=rospy.get_param("JOB_TYPE", "EVALUATION"))
    parser.add_argument('--is_continuous',
                        help='(boolean) is continous after lap completion',
                        type=bool,
                        default=utils.str2bool(
                            rospy.get_param("IS_CONTINUOUS", False)))
    parser.add_argument('--race_type',
                        help='(string) Race type',
                        type=str,
                        default=rospy.get_param("RACE_TYPE", "TIME_TRIAL"))
    parser.add_argument('--off_track_penalty',
                        help='(float) off track penalty second',
                        type=float,
                        default=float(rospy.get_param("OFF_TRACK_PENALTY",
                                                      2.0)))
    parser.add_argument('--collision_penalty',
                        help='(float) collision penalty second',
                        type=float,
                        default=float(rospy.get_param("COLLISION_PENALTY",
                                                      5.0)))

    args = parser.parse_args()
    arg_s3_bucket = args.s3_bucket
    arg_s3_prefix = args.s3_prefix
    logger.info("S3 bucket: %s \n S3 prefix: %s", arg_s3_bucket, arg_s3_prefix)

    # tournament_worker: names to be displayed in MP4.
    # This is racer alias in tournament worker case.
    display_names = rospy.get_param('DISPLAY_NAME', "")

    metrics_s3_buckets = rospy.get_param('METRICS_S3_BUCKET')
    metrics_s3_object_keys = rospy.get_param('METRICS_S3_OBJECT_KEY')

    arg_s3_bucket, arg_s3_prefix = utils.force_list(
        arg_s3_bucket), utils.force_list(arg_s3_prefix)
    metrics_s3_buckets = utils.force_list(metrics_s3_buckets)
    metrics_s3_object_keys = utils.force_list(metrics_s3_object_keys)

    validate_list = [
        arg_s3_bucket, arg_s3_prefix, metrics_s3_buckets,
        metrics_s3_object_keys
    ]

    simtrace_s3_bucket = rospy.get_param('SIMTRACE_S3_BUCKET', None)
    mp4_s3_bucket = rospy.get_param('MP4_S3_BUCKET', None)
    if simtrace_s3_bucket:
        simtrace_s3_object_prefix = rospy.get_param('SIMTRACE_S3_PREFIX')
        simtrace_s3_bucket = utils.force_list(simtrace_s3_bucket)
        simtrace_s3_object_prefix = utils.force_list(simtrace_s3_object_prefix)
        validate_list.extend([simtrace_s3_bucket, simtrace_s3_object_prefix])
    if mp4_s3_bucket:
        mp4_s3_object_prefix = rospy.get_param('MP4_S3_OBJECT_PREFIX')
        mp4_s3_bucket = utils.force_list(mp4_s3_bucket)
        mp4_s3_object_prefix = utils.force_list(mp4_s3_object_prefix)
        validate_list.extend([mp4_s3_bucket, mp4_s3_object_prefix])

    if not all([lambda x: len(x) == len(validate_list[0]), validate_list]):
        utils.log_and_exit(
            "Eval worker error: Incorrect arguments passed: {}".format(
                validate_list), utils.SIMAPP_SIMULATION_WORKER_EXCEPTION,
            utils.SIMAPP_EVENT_ERROR_CODE_500)
    if args.number_of_resets != 0 and args.number_of_resets < MIN_RESET_COUNT:
        raise GenericRolloutException(
            "number of resets is less than {}".format(MIN_RESET_COUNT))

    # Instantiate Cameras
    if len(arg_s3_bucket) == 1:
        configure_camera(namespaces=['racecar'])
    else:
        configure_camera(namespaces=[
            'racecar_{}'.format(str(agent_index))
            for agent_index in range(len(arg_s3_bucket))
        ])

    agent_list = list()
    s3_bucket_dict = dict()
    s3_prefix_dict = dict()
    s3_writers = list()

    # tournament_worker: list of required S3 locations
    simtrace_s3_bucket_dict = dict()
    simtrace_s3_prefix_dict = dict()
    metrics_s3_bucket_dict = dict()
    metrics_s3_obect_key_dict = dict()
    mp4_s3_bucket_dict = dict()
    mp4_s3_object_prefix_dict = dict()

    for agent_index, s3_bucket_val in enumerate(arg_s3_bucket):
        agent_name = 'agent' if len(arg_s3_bucket) == 1 else 'agent_{}'.format(
            str(agent_index))
        racecar_name = 'racecar' if len(
            arg_s3_bucket) == 1 else 'racecar_{}'.format(str(agent_index))
        s3_bucket_dict[agent_name] = arg_s3_bucket[agent_index]
        s3_prefix_dict[agent_name] = arg_s3_prefix[agent_index]

        # tournament_worker: remap key with agent_name instead of agent_index for list of S3 locations.
        simtrace_s3_bucket_dict[agent_name] = simtrace_s3_bucket[agent_index]
        simtrace_s3_prefix_dict[agent_name] = simtrace_s3_object_prefix[
            agent_index]
        metrics_s3_bucket_dict[agent_name] = metrics_s3_buckets[agent_index]
        metrics_s3_obect_key_dict[agent_name] = metrics_s3_object_keys[
            agent_index]
        mp4_s3_bucket_dict[agent_name] = mp4_s3_bucket[agent_index]
        mp4_s3_object_prefix_dict[agent_name] = mp4_s3_object_prefix[
            agent_index]

        s3_client = SageS3Client(bucket=arg_s3_bucket[agent_index],
                                 s3_prefix=arg_s3_prefix[agent_index],
                                 aws_region=args.aws_region)

        # Load the model metadata
        if not os.path.exists(os.path.join(CUSTOM_FILES_PATH, agent_name)):
            os.makedirs(os.path.join(CUSTOM_FILES_PATH, agent_name))
        model_metadata_local_path = os.path.join(
            os.path.join(CUSTOM_FILES_PATH, agent_name), 'model_metadata.json')
        utils.load_model_metadata(
            s3_client,
            os.path.normpath("%s/model/model_metadata.json" %
                             arg_s3_prefix[agent_index]),
            model_metadata_local_path)
        # Handle backward compatibility
        _, _, version = parse_model_metadata(model_metadata_local_path)
        if float(version) < float(utils.SIMAPP_VERSION) and \
        not utils.has_current_ckpnt_name(arg_s3_bucket[agent_index], arg_s3_prefix[agent_index], args.aws_region):
            utils.make_compatible(arg_s3_bucket[agent_index],
                                  arg_s3_prefix[agent_index], args.aws_region,
                                  SyncFiles.TRAINER_READY.value)

        # Select the optimal model
        utils.do_model_selection(s3_bucket=arg_s3_bucket[agent_index],
                                 s3_prefix=arg_s3_prefix[agent_index],
                                 region=args.aws_region)

        # Download hyperparameters from SageMaker
        if not os.path.exists(agent_name):
            os.makedirs(agent_name)
        hyperparameters_file_success = False
        hyperparams_s3_key = os.path.normpath(arg_s3_prefix[agent_index] +
                                              "/ip/hyperparameters.json")
        hyperparameters_file_success = s3_client.download_file(
            s3_key=hyperparams_s3_key,
            local_path=os.path.join(agent_name, "hyperparameters.json"))
        sm_hyperparams_dict = {}
        if hyperparameters_file_success:
            logger.info("Received Sagemaker hyperparameters successfully!")
            with open(os.path.join(agent_name,
                                   "hyperparameters.json")) as file:
                sm_hyperparams_dict = json.load(file)
        else:
            logger.info("SageMaker hyperparameters not found.")

        agent_config = {
            'model_metadata': model_metadata_local_path,
            ConfigParams.CAR_CTRL_CONFIG.value: {
                ConfigParams.LINK_NAME_LIST.value: [
                    link_name.replace('racecar', racecar_name)
                    for link_name in LINK_NAMES
                ],
                ConfigParams.VELOCITY_LIST.value: [
                    velocity_topic.replace('racecar', racecar_name)
                    for velocity_topic in VELOCITY_TOPICS
                ],
                ConfigParams.STEERING_LIST.value: [
                    steering_topic.replace('racecar', racecar_name)
                    for steering_topic in STEERING_TOPICS
                ],
                ConfigParams.CHANGE_START.value:
                utils.str2bool(rospy.get_param('CHANGE_START_POSITION',
                                               False)),
                ConfigParams.ALT_DIR.value:
                utils.str2bool(
                    rospy.get_param('ALTERNATE_DRIVING_DIRECTION', False)),
                ConfigParams.ACTION_SPACE_PATH.value:
                'custom_files/' + agent_name + '/model_metadata.json',
                ConfigParams.REWARD.value:
                reward_function,
                ConfigParams.AGENT_NAME.value:
                racecar_name,
                ConfigParams.VERSION.value:
                version,
                ConfigParams.NUMBER_OF_RESETS.value:
                args.number_of_resets,
                ConfigParams.PENALTY_SECONDS.value:
                args.penalty_seconds,
                ConfigParams.NUMBER_OF_TRIALS.value:
                args.number_of_trials,
                ConfigParams.IS_CONTINUOUS.value:
                args.is_continuous,
                ConfigParams.RACE_TYPE.value:
                args.race_type,
                ConfigParams.COLLISION_PENALTY.value:
                args.collision_penalty,
                ConfigParams.OFF_TRACK_PENALTY.value:
                args.off_track_penalty
            }
        }

        metrics_s3_config = {
            MetricsS3Keys.METRICS_BUCKET.value:
            metrics_s3_buckets[agent_index],
            MetricsS3Keys.METRICS_KEY.value:
            metrics_s3_object_keys[agent_index],
            # Replaced rospy.get_param('AWS_REGION') to be equal to the argument being passed
            # or default argument set
            MetricsS3Keys.REGION.value:
            args.aws_region,
            # Replaced rospy.get_param('MODEL_S3_BUCKET') to be equal to the argument being passed
            # or default argument set
            MetricsS3Keys.STEP_BUCKET.value:
            arg_s3_bucket[agent_index],
            # Replaced rospy.get_param('MODEL_S3_PREFIX') to be equal to the argument being passed
            # or default argument set
            MetricsS3Keys.STEP_KEY.value:
            os.path.join(arg_s3_prefix[agent_index],
                         EVALUATION_SIMTRACE_DATA_S3_OBJECT_KEY)
        }
        aws_region = rospy.get_param('AWS_REGION', args.aws_region)
        s3_writer_job_info = []
        if simtrace_s3_bucket:
            s3_writer_job_info.append(
                IterationData(
                    'simtrace', simtrace_s3_bucket[agent_index],
                    simtrace_s3_object_prefix[agent_index], aws_region,
                    os.path.join(
                        ITERATION_DATA_LOCAL_FILE_PATH, agent_name,
                        IterationDataLocalFileNames.
                        SIM_TRACE_EVALUATION_LOCAL_FILE.value)))
        if mp4_s3_bucket:
            s3_writer_job_info.extend([
                IterationData(
                    'pip', mp4_s3_bucket[agent_index],
                    mp4_s3_object_prefix[agent_index], aws_region,
                    os.path.join(
                        ITERATION_DATA_LOCAL_FILE_PATH, agent_name,
                        IterationDataLocalFileNames.
                        CAMERA_PIP_MP4_VALIDATION_LOCAL_PATH.value)),
                IterationData(
                    '45degree', mp4_s3_bucket[agent_index],
                    mp4_s3_object_prefix[agent_index], aws_region,
                    os.path.join(
                        ITERATION_DATA_LOCAL_FILE_PATH, agent_name,
                        IterationDataLocalFileNames.
                        CAMERA_45DEGREE_MP4_VALIDATION_LOCAL_PATH.value)),
                IterationData(
                    'topview', mp4_s3_bucket[agent_index],
                    mp4_s3_object_prefix[agent_index], aws_region,
                    os.path.join(
                        ITERATION_DATA_LOCAL_FILE_PATH, agent_name,
                        IterationDataLocalFileNames.
                        CAMERA_TOPVIEW_MP4_VALIDATION_LOCAL_PATH.value))
            ])

        s3_writers.append(S3Writer(job_info=s3_writer_job_info))
        run_phase_subject = RunPhaseSubject()
        agent_list.append(
            create_rollout_agent(agent_config,
                                 EvalMetrics(agent_name, metrics_s3_config),
                                 run_phase_subject))
    agent_list.append(create_obstacles_agent())
    agent_list.append(create_bot_cars_agent())
    # ROS service to indicate all the robomaker markov packages are ready for consumption
    signal_robomaker_markov_package_ready()

    PhaseObserver('/agent/training_phase', run_phase_subject)

    graph_manager, _ = get_graph_manager(hp_dict=sm_hyperparams_dict,
                                         agent_list=agent_list,
                                         run_phase_subject=run_phase_subject)

    ds_params_instance = S3BotoDataStoreParameters(
        aws_region=args.aws_region,
        bucket_names=s3_bucket_dict,
        base_checkpoint_dir=args.local_model_directory,
        s3_folders=s3_prefix_dict)

    graph_manager.data_store = S3BotoDataStore(params=ds_params_instance,
                                               graph_manager=graph_manager,
                                               ignore_lock=True)
    graph_manager.env_params.seed = 0

    task_parameters = TaskParameters()
    task_parameters.checkpoint_restore_path = args.local_model_directory

    tournament_worker(graph_manager=graph_manager,
                      number_of_trials=args.number_of_trials,
                      task_parameters=task_parameters,
                      s3_writers=s3_writers,
                      is_continuous=args.is_continuous)

    # tournament_worker: write race report to local file.
    write_race_report(graph_manager,
                      model_s3_bucket_map=s3_bucket_dict,
                      model_s3_prefix_map=s3_prefix_dict,
                      metrics_s3_bucket_map=metrics_s3_bucket_dict,
                      metrics_s3_key_map=metrics_s3_obect_key_dict,
                      simtrace_s3_bucket_map=simtrace_s3_bucket_dict,
                      simtrace_s3_prefix_map=simtrace_s3_prefix_dict,
                      mp4_s3_bucket_map=mp4_s3_bucket_dict,
                      mp4_s3_prefix_map=mp4_s3_object_prefix_dict,
                      display_names=display_names)

    # tournament_worker: terminate tournament_race_node.
    terminate_tournament_race()
def main():
    """ Main function for downloading yaml params """

    # parse argument
    s3_region = sys.argv[1]
    s3_bucket = sys.argv[2]
    s3_prefix = sys.argv[3]
    s3_yaml_name = sys.argv[4]
    launch_name = sys.argv[5]
    yaml_key = os.path.normpath(os.path.join(s3_prefix, s3_yaml_name))

    try:
        # create boto3 session/client and download yaml/json file
        session = boto3.session.Session()

        s3_endpoint_url = os.environ.get("S3_ENDPOINT_URL", None)

        if s3_endpoint_url is not None:
            LOG.info('Endpoint URL {}'.format(s3_endpoint_url))
            rospy.set_param('S3_ENDPOINT_URL', s3_endpoint_url)

        s3_client = session.client('s3',
                                   region_name=s3_region,
                                   endpoint_url=s3_endpoint_url,
                                   config=get_boto_config())

        local_yaml_path = os.path.abspath(
            os.path.join(os.getcwd(), s3_yaml_name))
        s3_client.download_file(Bucket=s3_bucket,
                                Key=yaml_key,
                                Filename=local_yaml_path)
        # Get values passed in yaml files. Default values are for backward compatibility and for single racecar racing
        default_yaml_values = {
            RACE_TYPE_YAML_KEY: TIME_TRIAL_RACE_TYPE,
            MODEL_S3_BUCKET_YAML_KEY: s3_bucket,
            MODEL_S3_PREFIX_YAML_KEY: s3_prefix,
            CAR_COLOR_YAML_KEY: DEFAULT_COLOR,
            BODY_SHELL_TYPE_YAML_KEY: None,
            MODEL_METADATA_FILE_S3_YAML_KEY: None,
            RACER_NAME_YAML_KEY: None
        }
        yaml_dict = get_yaml_dict(local_yaml_path)
        yaml_values = get_yaml_values(yaml_dict, default_yaml_values)

        # Forcing the yaml parameter to list
        force_list_params = [
            MODEL_METADATA_FILE_S3_YAML_KEY, MODEL_S3_BUCKET_YAML_KEY,
            MODEL_S3_PREFIX_YAML_KEY, CAR_COLOR_YAML_KEY,
            BODY_SHELL_TYPE_YAML_KEY, RACER_NAME_YAML_KEY
        ]

        for params in force_list_params:
            yaml_values[params] = force_list(yaml_values[params])

        # Populate the model_metadata_s3_key values to handle both training and evaluation for all race_formats
        if None in yaml_values[MODEL_METADATA_FILE_S3_YAML_KEY]:
            # MODEL_METADATA_FILE_S3_KEY not passed as part of yaml file ==> This happens during evaluation
            # Assume model_metadata.json is present in the s3_prefix/model/ folder
            yaml_values[MODEL_METADATA_FILE_S3_YAML_KEY] = list()
            for s3_prefix in yaml_values[MODEL_S3_PREFIX_YAML_KEY]:
                yaml_values[MODEL_METADATA_FILE_S3_YAML_KEY].append(
                    os.path.join(s3_prefix, 'model/model_metadata.json'))

        # Set multicar value if there is more than one value in MODEL_S3_BUCKET_YAML_KEY.
        multicar = len(yaml_values[MODEL_S3_BUCKET_YAML_KEY]) > 1

        # Set f1 as true if RACE_TYPE is F1
        is_f1 = yaml_values[RACE_TYPE_YAML_KEY] == F1_RACE_TYPE

        # Validate the yaml values
        validate_yaml_values(yaml_values, multicar)
        # List of racecar names that should include second camera while launching
        racecars_with_stereo_cameras = list()

        # List of racecar names that should include lidar while launching
        racecars_with_lidars = list()

        # List of SimApp versions
        simapp_versions = list()
        # List of body shell types
        body_shell_types = yaml_values[BODY_SHELL_TYPE_YAML_KEY]
        racer_names = yaml_values[RACER_NAME_YAML_KEY]
        if None in body_shell_types:
            # use default shells
            if None in racer_names:
                body_shell_types = [BodyShellType.DEFAULT.value] * len(
                    yaml_values[MODEL_S3_BUCKET_YAML_KEY])
            # use default shells for regular user and f1 shell for users in F1_SHELL_USERS_LIST
            else:
                body_shell_types = [
                    BodyShellType.F1_2021.value if racer_alias
                    in F1_SHELL_USERS_LIST else BodyShellType.DEFAULT.value
                    for racer_alias in yaml_values[RACER_NAME_YAML_KEY]
                ]
                yaml_dict[BODY_SHELL_TYPE_YAML_KEY] = body_shell_types
                # override local yaml file with updated BODY_SHELL_TYPE
                with open(local_yaml_path, 'w') as yaml_file:
                    yaml.dump(yaml_dict, yaml_file)
        for agent_index, model_s3_bucket in enumerate(
                yaml_values[MODEL_S3_BUCKET_YAML_KEY]):

            racecar_name = 'racecar_' + str(agent_index) if len(
                yaml_values[MODEL_S3_BUCKET_YAML_KEY]) > 1 else 'racecar'
            # Make a local folder with the racecar name to download the model_metadata.json
            if not os.path.exists(os.path.join(os.getcwd(), racecar_name)):
                os.makedirs(os.path.join(os.getcwd(), racecar_name))
            local_model_metadata_path = os.path.abspath(
                os.path.join(os.path.join(os.getcwd(), racecar_name),
                             'model_metadata.json'))
            json_key = yaml_values[MODEL_METADATA_FILE_S3_YAML_KEY][
                agent_index]
            json_key = json_key.replace('s3://{}/'.format(model_s3_bucket), '')
            s3_client.download_file(Bucket=model_s3_bucket,
                                    Key=json_key,
                                    Filename=local_model_metadata_path)
            sensors, _, simapp_version = utils_parse_model_metadata.parse_model_metadata(
                local_model_metadata_path)
            simapp_versions.append(simapp_version)
            if Input.STEREO.value in sensors:
                racecars_with_stereo_cameras.append(racecar_name)
            if Input.LIDAR.value in sensors or Input.SECTOR_LIDAR.value in sensors:
                racecars_with_lidars.append(racecar_name)

        cmd = [
            ''.join(("roslaunch deepracer_simulation_environment {} ".format(
                launch_name), "local_yaml_path:={} ".format(local_yaml_path),
                     "racecars_with_stereo_cameras:={} ".format(
                         ','.join(racecars_with_stereo_cameras)),
                     "racecars_with_lidars:={} ".format(
                         ','.join(racecars_with_lidars)),
                     "multicar:={} ".format(multicar),
                     "body_shell_types:={} ".format(
                         ','.join(body_shell_types)),
                     "simapp_versions:={} ".format(','.join(simapp_versions)),
                     "f1:={}".format(is_f1)))
        ]
        Popen(cmd, shell=True, executable="/bin/bash")

    except botocore.exceptions.ClientError as ex:
        log_and_exit(
            "Download params and launch of agent node failed: s3_bucket: {}, yaml_key: {}, {}"
            .format(s3_bucket, yaml_key, ex),
            SIMAPP_SIMULATION_WORKER_EXCEPTION, SIMAPP_EVENT_ERROR_CODE_400)
    except botocore.exceptions.EndpointConnectionError:
        log_and_exit("No Internet connection or s3 service unavailable",
                     SIMAPP_SIMULATION_WORKER_EXCEPTION,
                     SIMAPP_EVENT_ERROR_CODE_500)
    except Exception as ex:
        log_and_exit(
            "Download params and launch of agent node failed: s3_bucket: {}, yaml_key: {}, {}"
            .format(s3_bucket, yaml_key, ex),
            SIMAPP_SIMULATION_WORKER_EXCEPTION, SIMAPP_EVENT_ERROR_CODE_500)