def video_job_info(s3_bucket, s3_prefix, aws_region): dir_path = os.path.dirname(os.path.realpath(__file__)) pip_path = "{}/test_data/pip.mp4".format(dir_path) mainview_path = "{}/test_data/45degree.mp4".format(dir_path) topview_path = "{}/test_data/simtrace.mp4".format(dir_path) for filepath in (pip_path, mainview_path, topview_path): with open(filepath, "w") as filepointer: filepointer.write("") return [IterationData('pip', s3_bucket, s3_prefix, aws_region, pip_path), IterationData('45degree', s3_bucket, s3_prefix, aws_region, mainview_path), IterationData('topview', s3_bucket, s3_prefix, aws_region, topview_path)]
def main(): """ Main function for tournament worker """ parser = argparse.ArgumentParser() parser.add_argument('-p', '--preset', help="(string) Name of a preset to run \ (class name from the 'presets' directory.)", type=str, required=False) parser.add_argument('--s3_bucket', help='list(string) S3 bucket', type=str, nargs='+', default=rospy.get_param("MODEL_S3_BUCKET", ["gsaur-test"])) parser.add_argument('--s3_prefix', help='list(string) S3 prefix', type=str, nargs='+', default=rospy.get_param("MODEL_S3_PREFIX", ["sagemaker"])) parser.add_argument('--aws_region', help='(string) AWS region', type=str, default=rospy.get_param("AWS_REGION", "us-east-1")) parser.add_argument('--number_of_trials', help='(integer) Number of trials', type=int, default=int(rospy.get_param("NUMBER_OF_TRIALS", 10))) parser.add_argument( '-c', '--local_model_directory', help='(string) Path to a folder containing a checkpoint \ to restore the model from.', type=str, default='./checkpoint') parser.add_argument('--number_of_resets', help='(integer) Number of resets', type=int, default=int(rospy.get_param("NUMBER_OF_RESETS", 0))) parser.add_argument('--penalty_seconds', help='(float) penalty second', type=float, default=float(rospy.get_param("PENALTY_SECONDS", 2.0))) parser.add_argument('--job_type', help='(string) job type', type=str, default=rospy.get_param("JOB_TYPE", "EVALUATION")) parser.add_argument('--is_continuous', help='(boolean) is continous after lap completion', type=bool, default=utils.str2bool( rospy.get_param("IS_CONTINUOUS", False))) parser.add_argument('--race_type', help='(string) Race type', type=str, default=rospy.get_param("RACE_TYPE", "TIME_TRIAL")) parser.add_argument('--off_track_penalty', help='(float) off track penalty second', type=float, default=float(rospy.get_param("OFF_TRACK_PENALTY", 2.0))) parser.add_argument('--collision_penalty', help='(float) collision penalty second', type=float, default=float(rospy.get_param("COLLISION_PENALTY", 5.0))) args = parser.parse_args() arg_s3_bucket = args.s3_bucket arg_s3_prefix = args.s3_prefix logger.info("S3 bucket: %s \n S3 prefix: %s", arg_s3_bucket, arg_s3_prefix) # tournament_worker: names to be displayed in MP4. # This is racer alias in tournament worker case. display_names = rospy.get_param('DISPLAY_NAME', "") metrics_s3_buckets = rospy.get_param('METRICS_S3_BUCKET') metrics_s3_object_keys = rospy.get_param('METRICS_S3_OBJECT_KEY') arg_s3_bucket, arg_s3_prefix = utils.force_list( arg_s3_bucket), utils.force_list(arg_s3_prefix) metrics_s3_buckets = utils.force_list(metrics_s3_buckets) metrics_s3_object_keys = utils.force_list(metrics_s3_object_keys) validate_list = [ arg_s3_bucket, arg_s3_prefix, metrics_s3_buckets, metrics_s3_object_keys ] simtrace_s3_bucket = rospy.get_param('SIMTRACE_S3_BUCKET', None) mp4_s3_bucket = rospy.get_param('MP4_S3_BUCKET', None) if simtrace_s3_bucket: simtrace_s3_object_prefix = rospy.get_param('SIMTRACE_S3_PREFIX') simtrace_s3_bucket = utils.force_list(simtrace_s3_bucket) simtrace_s3_object_prefix = utils.force_list(simtrace_s3_object_prefix) validate_list.extend([simtrace_s3_bucket, simtrace_s3_object_prefix]) if mp4_s3_bucket: mp4_s3_object_prefix = rospy.get_param('MP4_S3_OBJECT_PREFIX') mp4_s3_bucket = utils.force_list(mp4_s3_bucket) mp4_s3_object_prefix = utils.force_list(mp4_s3_object_prefix) validate_list.extend([mp4_s3_bucket, mp4_s3_object_prefix]) if not all([lambda x: len(x) == len(validate_list[0]), validate_list]): utils.log_and_exit( "Eval worker error: Incorrect arguments passed: {}".format( validate_list), utils.SIMAPP_SIMULATION_WORKER_EXCEPTION, utils.SIMAPP_EVENT_ERROR_CODE_500) if args.number_of_resets != 0 and args.number_of_resets < MIN_RESET_COUNT: raise GenericRolloutException( "number of resets is less than {}".format(MIN_RESET_COUNT)) # Instantiate Cameras if len(arg_s3_bucket) == 1: configure_camera(namespaces=['racecar']) else: configure_camera(namespaces=[ 'racecar_{}'.format(str(agent_index)) for agent_index in range(len(arg_s3_bucket)) ]) agent_list = list() s3_bucket_dict = dict() s3_prefix_dict = dict() s3_writers = list() # tournament_worker: list of required S3 locations simtrace_s3_bucket_dict = dict() simtrace_s3_prefix_dict = dict() metrics_s3_bucket_dict = dict() metrics_s3_obect_key_dict = dict() mp4_s3_bucket_dict = dict() mp4_s3_object_prefix_dict = dict() for agent_index, s3_bucket_val in enumerate(arg_s3_bucket): agent_name = 'agent' if len(arg_s3_bucket) == 1 else 'agent_{}'.format( str(agent_index)) racecar_name = 'racecar' if len( arg_s3_bucket) == 1 else 'racecar_{}'.format(str(agent_index)) s3_bucket_dict[agent_name] = arg_s3_bucket[agent_index] s3_prefix_dict[agent_name] = arg_s3_prefix[agent_index] # tournament_worker: remap key with agent_name instead of agent_index for list of S3 locations. simtrace_s3_bucket_dict[agent_name] = simtrace_s3_bucket[agent_index] simtrace_s3_prefix_dict[agent_name] = simtrace_s3_object_prefix[ agent_index] metrics_s3_bucket_dict[agent_name] = metrics_s3_buckets[agent_index] metrics_s3_obect_key_dict[agent_name] = metrics_s3_object_keys[ agent_index] mp4_s3_bucket_dict[agent_name] = mp4_s3_bucket[agent_index] mp4_s3_object_prefix_dict[agent_name] = mp4_s3_object_prefix[ agent_index] s3_client = SageS3Client(bucket=arg_s3_bucket[agent_index], s3_prefix=arg_s3_prefix[agent_index], aws_region=args.aws_region) # Load the model metadata if not os.path.exists(os.path.join(CUSTOM_FILES_PATH, agent_name)): os.makedirs(os.path.join(CUSTOM_FILES_PATH, agent_name)) model_metadata_local_path = os.path.join( os.path.join(CUSTOM_FILES_PATH, agent_name), 'model_metadata.json') utils.load_model_metadata( s3_client, os.path.normpath("%s/model/model_metadata.json" % arg_s3_prefix[agent_index]), model_metadata_local_path) # Handle backward compatibility _, _, version = parse_model_metadata(model_metadata_local_path) if float(version) < float(utils.SIMAPP_VERSION) and \ not utils.has_current_ckpnt_name(arg_s3_bucket[agent_index], arg_s3_prefix[agent_index], args.aws_region): utils.make_compatible(arg_s3_bucket[agent_index], arg_s3_prefix[agent_index], args.aws_region, SyncFiles.TRAINER_READY.value) # Select the optimal model utils.do_model_selection(s3_bucket=arg_s3_bucket[agent_index], s3_prefix=arg_s3_prefix[agent_index], region=args.aws_region) # Download hyperparameters from SageMaker if not os.path.exists(agent_name): os.makedirs(agent_name) hyperparameters_file_success = False hyperparams_s3_key = os.path.normpath(arg_s3_prefix[agent_index] + "/ip/hyperparameters.json") hyperparameters_file_success = s3_client.download_file( s3_key=hyperparams_s3_key, local_path=os.path.join(agent_name, "hyperparameters.json")) sm_hyperparams_dict = {} if hyperparameters_file_success: logger.info("Received Sagemaker hyperparameters successfully!") with open(os.path.join(agent_name, "hyperparameters.json")) as file: sm_hyperparams_dict = json.load(file) else: logger.info("SageMaker hyperparameters not found.") agent_config = { 'model_metadata': model_metadata_local_path, ConfigParams.CAR_CTRL_CONFIG.value: { ConfigParams.LINK_NAME_LIST.value: [ link_name.replace('racecar', racecar_name) for link_name in LINK_NAMES ], ConfigParams.VELOCITY_LIST.value: [ velocity_topic.replace('racecar', racecar_name) for velocity_topic in VELOCITY_TOPICS ], ConfigParams.STEERING_LIST.value: [ steering_topic.replace('racecar', racecar_name) for steering_topic in STEERING_TOPICS ], ConfigParams.CHANGE_START.value: utils.str2bool(rospy.get_param('CHANGE_START_POSITION', False)), ConfigParams.ALT_DIR.value: utils.str2bool( rospy.get_param('ALTERNATE_DRIVING_DIRECTION', False)), ConfigParams.ACTION_SPACE_PATH.value: 'custom_files/' + agent_name + '/model_metadata.json', ConfigParams.REWARD.value: reward_function, ConfigParams.AGENT_NAME.value: racecar_name, ConfigParams.VERSION.value: version, ConfigParams.NUMBER_OF_RESETS.value: args.number_of_resets, ConfigParams.PENALTY_SECONDS.value: args.penalty_seconds, ConfigParams.NUMBER_OF_TRIALS.value: args.number_of_trials, ConfigParams.IS_CONTINUOUS.value: args.is_continuous, ConfigParams.RACE_TYPE.value: args.race_type, ConfigParams.COLLISION_PENALTY.value: args.collision_penalty, ConfigParams.OFF_TRACK_PENALTY.value: args.off_track_penalty } } metrics_s3_config = { MetricsS3Keys.METRICS_BUCKET.value: metrics_s3_buckets[agent_index], MetricsS3Keys.METRICS_KEY.value: metrics_s3_object_keys[agent_index], # Replaced rospy.get_param('AWS_REGION') to be equal to the argument being passed # or default argument set MetricsS3Keys.REGION.value: args.aws_region, # Replaced rospy.get_param('MODEL_S3_BUCKET') to be equal to the argument being passed # or default argument set MetricsS3Keys.STEP_BUCKET.value: arg_s3_bucket[agent_index], # Replaced rospy.get_param('MODEL_S3_PREFIX') to be equal to the argument being passed # or default argument set MetricsS3Keys.STEP_KEY.value: os.path.join(arg_s3_prefix[agent_index], EVALUATION_SIMTRACE_DATA_S3_OBJECT_KEY) } aws_region = rospy.get_param('AWS_REGION', args.aws_region) s3_writer_job_info = [] if simtrace_s3_bucket: s3_writer_job_info.append( IterationData( 'simtrace', simtrace_s3_bucket[agent_index], simtrace_s3_object_prefix[agent_index], aws_region, os.path.join( ITERATION_DATA_LOCAL_FILE_PATH, agent_name, IterationDataLocalFileNames. SIM_TRACE_EVALUATION_LOCAL_FILE.value))) if mp4_s3_bucket: s3_writer_job_info.extend([ IterationData( 'pip', mp4_s3_bucket[agent_index], mp4_s3_object_prefix[agent_index], aws_region, os.path.join( ITERATION_DATA_LOCAL_FILE_PATH, agent_name, IterationDataLocalFileNames. CAMERA_PIP_MP4_VALIDATION_LOCAL_PATH.value)), IterationData( '45degree', mp4_s3_bucket[agent_index], mp4_s3_object_prefix[agent_index], aws_region, os.path.join( ITERATION_DATA_LOCAL_FILE_PATH, agent_name, IterationDataLocalFileNames. CAMERA_45DEGREE_MP4_VALIDATION_LOCAL_PATH.value)), IterationData( 'topview', mp4_s3_bucket[agent_index], mp4_s3_object_prefix[agent_index], aws_region, os.path.join( ITERATION_DATA_LOCAL_FILE_PATH, agent_name, IterationDataLocalFileNames. CAMERA_TOPVIEW_MP4_VALIDATION_LOCAL_PATH.value)) ]) s3_writers.append(S3Writer(job_info=s3_writer_job_info)) run_phase_subject = RunPhaseSubject() agent_list.append( create_rollout_agent(agent_config, EvalMetrics(agent_name, metrics_s3_config), run_phase_subject)) agent_list.append(create_obstacles_agent()) agent_list.append(create_bot_cars_agent()) # ROS service to indicate all the robomaker markov packages are ready for consumption signal_robomaker_markov_package_ready() PhaseObserver('/agent/training_phase', run_phase_subject) graph_manager, _ = get_graph_manager(hp_dict=sm_hyperparams_dict, agent_list=agent_list, run_phase_subject=run_phase_subject) ds_params_instance = S3BotoDataStoreParameters( aws_region=args.aws_region, bucket_names=s3_bucket_dict, base_checkpoint_dir=args.local_model_directory, s3_folders=s3_prefix_dict) graph_manager.data_store = S3BotoDataStore(params=ds_params_instance, graph_manager=graph_manager, ignore_lock=True) graph_manager.env_params.seed = 0 task_parameters = TaskParameters() task_parameters.checkpoint_restore_path = args.local_model_directory tournament_worker(graph_manager=graph_manager, number_of_trials=args.number_of_trials, task_parameters=task_parameters, s3_writers=s3_writers, is_continuous=args.is_continuous) # tournament_worker: write race report to local file. write_race_report(graph_manager, model_s3_bucket_map=s3_bucket_dict, model_s3_prefix_map=s3_prefix_dict, metrics_s3_bucket_map=metrics_s3_bucket_dict, metrics_s3_key_map=metrics_s3_obect_key_dict, simtrace_s3_bucket_map=simtrace_s3_bucket_dict, simtrace_s3_prefix_map=simtrace_s3_prefix_dict, mp4_s3_bucket_map=mp4_s3_bucket_dict, mp4_s3_prefix_map=mp4_s3_object_prefix_dict, display_names=display_names) # tournament_worker: terminate tournament_race_node. terminate_tournament_race()
def main(): screen.set_use_colors(False) parser = argparse.ArgumentParser() parser.add_argument( '-c', '--checkpoint_dir', help= '(string) Path to a folder containing a checkpoint to restore the model from.', type=str, default='./checkpoint') parser.add_argument('--s3_bucket', help='(string) S3 bucket', type=str, default=rospy.get_param("SAGEMAKER_SHARED_S3_BUCKET", "gsaur-test")) parser.add_argument('--s3_prefix', help='(string) S3 prefix', type=str, default=rospy.get_param("SAGEMAKER_SHARED_S3_PREFIX", "sagemaker")) parser.add_argument( '--num_workers', help="(int) The number of workers started in this pool", type=int, default=int(rospy.get_param("NUM_WORKERS", 1))) parser.add_argument('--rollout_idx', help="(int) The index of current rollout worker", type=int, default=0) parser.add_argument('-r', '--redis_ip', help="(string) IP or host for the redis server", default='localhost', type=str) parser.add_argument('-rp', '--redis_port', help="(int) Port of the redis server", default=6379, type=int) parser.add_argument('--aws_region', help='(string) AWS region', type=str, default=rospy.get_param("AWS_REGION", "us-east-1")) parser.add_argument('--reward_file_s3_key', help='(string) Reward File S3 Key', type=str, default=rospy.get_param("REWARD_FILE_S3_KEY", None)) parser.add_argument('--model_metadata_s3_key', help='(string) Model Metadata File S3 Key', type=str, default=rospy.get_param("MODEL_METADATA_FILE_S3_KEY", None)) # For training job, reset is not allowed. penalty_seconds, off_track_penalty, and # collision_penalty will all be 0 be default parser.add_argument('--number_of_resets', help='(integer) Number of resets', type=int, default=int(rospy.get_param("NUMBER_OF_RESETS", 0))) parser.add_argument('--penalty_seconds', help='(float) penalty second', type=float, default=float(rospy.get_param("PENALTY_SECONDS", 0.0))) parser.add_argument('--job_type', help='(string) job type', type=str, default=rospy.get_param("JOB_TYPE", "TRAINING")) parser.add_argument('--is_continuous', help='(boolean) is continous after lap completion', type=bool, default=utils.str2bool( rospy.get_param("IS_CONTINUOUS", False))) parser.add_argument('--race_type', help='(string) Race type', type=str, default=rospy.get_param("RACE_TYPE", "TIME_TRIAL")) parser.add_argument('--off_track_penalty', help='(float) off track penalty second', type=float, default=float(rospy.get_param("OFF_TRACK_PENALTY", 0.0))) parser.add_argument('--collision_penalty', help='(float) collision penalty second', type=float, default=float(rospy.get_param("COLLISION_PENALTY", 0.0))) args = parser.parse_args() s3_client = SageS3Client(bucket=args.s3_bucket, s3_prefix=args.s3_prefix, aws_region=args.aws_region) logger.info("S3 bucket: %s", args.s3_bucket) logger.info("S3 prefix: %s", args.s3_prefix) # Load the model metadata model_metadata_local_path = os.path.join(CUSTOM_FILES_PATH, 'model_metadata.json') utils.load_model_metadata(s3_client, args.model_metadata_s3_key, model_metadata_local_path) # Download and import reward function if not args.reward_file_s3_key: log_and_exit( "Reward function code S3 key not available for S3 bucket {} and prefix {}" .format(args.s3_bucket, args.s3_prefix), SIMAPP_SIMULATION_WORKER_EXCEPTION, SIMAPP_EVENT_ERROR_CODE_500) download_customer_reward_function(s3_client, args.reward_file_s3_key) try: from custom_files.customer_reward_function import reward_function except Exception as e: log_and_exit("Failed to import user's reward_function: {}".format(e), SIMAPP_SIMULATION_WORKER_EXCEPTION, SIMAPP_EVENT_ERROR_CODE_400) # Instantiate Cameras configure_camera(namespaces=['racecar']) preset_file_success, _ = download_custom_files_if_present( s3_client, args.s3_prefix) #! TODO each agent should have own config _, _, version = utils_parse_model_metadata.parse_model_metadata( model_metadata_local_path) agent_config = { 'model_metadata': model_metadata_local_path, ConfigParams.CAR_CTRL_CONFIG.value: { ConfigParams.LINK_NAME_LIST.value: LINK_NAMES, ConfigParams.VELOCITY_LIST.value: VELOCITY_TOPICS, ConfigParams.STEERING_LIST.value: STEERING_TOPICS, ConfigParams.CHANGE_START.value: utils.str2bool(rospy.get_param('CHANGE_START_POSITION', True)), ConfigParams.ALT_DIR.value: utils.str2bool( rospy.get_param('ALTERNATE_DRIVING_DIRECTION', False)), ConfigParams.ACTION_SPACE_PATH.value: 'custom_files/model_metadata.json', ConfigParams.REWARD.value: reward_function, ConfigParams.AGENT_NAME.value: 'racecar', ConfigParams.VERSION.value: version, ConfigParams.NUMBER_OF_RESETS.value: args.number_of_resets, ConfigParams.PENALTY_SECONDS.value: args.penalty_seconds, ConfigParams.NUMBER_OF_TRIALS.value: None, ConfigParams.IS_CONTINUOUS.value: args.is_continuous, ConfigParams.RACE_TYPE.value: args.race_type, ConfigParams.COLLISION_PENALTY.value: args.collision_penalty, ConfigParams.OFF_TRACK_PENALTY.value: args.off_track_penalty } } #! TODO each agent should have own s3 bucket step_metrics_prefix = rospy.get_param('SAGEMAKER_SHARED_S3_PREFIX') if args.num_workers > 1: step_metrics_prefix = os.path.join(step_metrics_prefix, str(args.rollout_idx)) metrics_s3_config = { MetricsS3Keys.METRICS_BUCKET.value: rospy.get_param('METRICS_S3_BUCKET'), MetricsS3Keys.METRICS_KEY.value: rospy.get_param('METRICS_S3_OBJECT_KEY'), MetricsS3Keys.REGION.value: rospy.get_param('AWS_REGION') } metrics_s3_model_cfg = { MetricsS3Keys.METRICS_BUCKET.value: args.s3_bucket, MetricsS3Keys.METRICS_KEY.value: os.path.join(args.s3_prefix, DEEPRACER_CHKPNT_KEY_SUFFIX), MetricsS3Keys.REGION.value: args.aws_region } run_phase_subject = RunPhaseSubject() agent_list = list() agent_list.append( create_rollout_agent( agent_config, TrainingMetrics(agent_name='agent', s3_dict_metrics=metrics_s3_config, s3_dict_model=metrics_s3_model_cfg, ckpnt_dir=args.checkpoint_dir, run_phase_sink=run_phase_subject, use_model_picker=(args.rollout_idx == 0)), run_phase_subject)) agent_list.append(create_obstacles_agent()) agent_list.append(create_bot_cars_agent()) # ROS service to indicate all the robomaker markov packages are ready for consumption signal_robomaker_markov_package_ready() PhaseObserver('/agent/training_phase', run_phase_subject) aws_region = rospy.get_param('AWS_REGION', args.aws_region) simtrace_s3_bucket = rospy.get_param('SIMTRACE_S3_BUCKET', None) mp4_s3_bucket = rospy.get_param('MP4_S3_BUCKET', None) if args.rollout_idx == 0 else None if simtrace_s3_bucket: simtrace_s3_object_prefix = rospy.get_param('SIMTRACE_S3_PREFIX') if args.num_workers > 1: simtrace_s3_object_prefix = os.path.join(simtrace_s3_object_prefix, str(args.rollout_idx)) if mp4_s3_bucket: mp4_s3_object_prefix = rospy.get_param('MP4_S3_OBJECT_PREFIX') s3_writer_job_info = [] if simtrace_s3_bucket: s3_writer_job_info.append( IterationData( 'simtrace', simtrace_s3_bucket, simtrace_s3_object_prefix, aws_region, os.path.join( ITERATION_DATA_LOCAL_FILE_PATH, 'agent', IterationDataLocalFileNames.SIM_TRACE_TRAINING_LOCAL_FILE. value))) if mp4_s3_bucket: s3_writer_job_info.extend([ IterationData( 'pip', mp4_s3_bucket, mp4_s3_object_prefix, aws_region, os.path.join( ITERATION_DATA_LOCAL_FILE_PATH, 'agent', IterationDataLocalFileNames. CAMERA_PIP_MP4_VALIDATION_LOCAL_PATH.value)), IterationData( '45degree', mp4_s3_bucket, mp4_s3_object_prefix, aws_region, os.path.join( ITERATION_DATA_LOCAL_FILE_PATH, 'agent', IterationDataLocalFileNames. CAMERA_45DEGREE_MP4_VALIDATION_LOCAL_PATH.value)), IterationData( 'topview', mp4_s3_bucket, mp4_s3_object_prefix, aws_region, os.path.join( ITERATION_DATA_LOCAL_FILE_PATH, 'agent', IterationDataLocalFileNames. CAMERA_TOPVIEW_MP4_VALIDATION_LOCAL_PATH.value)) ]) s3_writer = S3Writer(job_info=s3_writer_job_info) redis_ip = s3_client.get_ip() logger.info("Received IP from SageMaker successfully: %s", redis_ip) # Download hyperparameters from SageMaker hyperparameters_file_success = False hyperparams_s3_key = os.path.normpath(args.s3_prefix + "/ip/hyperparameters.json") hyperparameters_file_success = s3_client.download_file( s3_key=hyperparams_s3_key, local_path="hyperparameters.json") sm_hyperparams_dict = {} if hyperparameters_file_success: logger.info("Received Sagemaker hyperparameters successfully!") with open("hyperparameters.json") as filepointer: sm_hyperparams_dict = json.load(filepointer) else: logger.info("SageMaker hyperparameters not found.") enable_domain_randomization = utils.str2bool( rospy.get_param('ENABLE_DOMAIN_RANDOMIZATION', False)) if preset_file_success: preset_location = os.path.join(CUSTOM_FILES_PATH, "preset.py") preset_location += ":graph_manager" graph_manager = short_dynamic_import(preset_location, ignore_module_case=True) logger.info("Using custom preset file!") else: graph_manager, _ = get_graph_manager( hp_dict=sm_hyperparams_dict, agent_list=agent_list, run_phase_subject=run_phase_subject, enable_domain_randomization=enable_domain_randomization) # If num_episodes_between_training is smaller than num_workers then cancel worker early. episode_steps_per_rollout = graph_manager.agent_params.algorithm.num_consecutive_playing_steps.num_steps # Reduce number of workers if allocated more than num_episodes_between_training if args.num_workers > episode_steps_per_rollout: logger.info( "Excess worker allocated. Reducing from {} to {}...".format( args.num_workers, episode_steps_per_rollout)) args.num_workers = episode_steps_per_rollout if args.rollout_idx >= episode_steps_per_rollout or args.rollout_idx >= args.num_workers: err_msg_format = "Exiting excess worker..." err_msg_format += "(rollout_idx[{}] >= num_workers[{}] or num_episodes_between_training[{}])" logger.info( err_msg_format.format(args.rollout_idx, args.num_workers, episode_steps_per_rollout)) # Close the down the job utils.cancel_simulation_job( os.environ.get('AWS_ROBOMAKER_SIMULATION_JOB_ARN'), rospy.get_param('AWS_REGION')) memory_backend_params = DeepRacerRedisPubSubMemoryBackendParameters( redis_address=redis_ip, redis_port=6379, run_type=str(RunType.ROLLOUT_WORKER), channel=args.s3_prefix, num_workers=args.num_workers, rollout_idx=args.rollout_idx) graph_manager.memory_backend_params = memory_backend_params ds_params_instance = S3BotoDataStoreParameters( aws_region=args.aws_region, bucket_names={'agent': args.s3_bucket}, base_checkpoint_dir=args.checkpoint_dir, s3_folders={'agent': args.s3_prefix}) graph_manager.data_store = S3BotoDataStore(ds_params_instance, graph_manager) task_parameters = TaskParameters() task_parameters.checkpoint_restore_path = args.checkpoint_dir rollout_worker(graph_manager=graph_manager, num_workers=args.num_workers, rollout_idx=args.rollout_idx, task_parameters=task_parameters, s3_writer=s3_writer)
def simtrace_job_info(s3_bucket, s3_prefix, aws_region): dir_path = os.path.dirname(os.path.realpath(__file__)) simtrace_path = "{}/test_data/simtrace.csv".format(dir_path) with open(simtrace_path, "w") as filepointer: filepointer.write("") return [IterationData('simtrace', s3_bucket, s3_prefix, aws_region, simtrace_path)]
def main(): """ Main function for evaluation worker """ parser = argparse.ArgumentParser() parser.add_argument('-p', '--preset', help="(string) Name of a preset to run \ (class name from the 'presets' directory.)", type=str, required=False) parser.add_argument('--s3_bucket', help='list(string) S3 bucket', type=str, nargs='+', default=rospy.get_param("MODEL_S3_BUCKET", ["gsaur-test"])) parser.add_argument('--s3_prefix', help='list(string) S3 prefix', type=str, nargs='+', default=rospy.get_param("MODEL_S3_PREFIX", ["sagemaker"])) parser.add_argument('--s3_endpoint_url', help='(string) S3 endpoint URL', type=str, default=rospy.get_param("S3_ENDPOINT_URL", None)) parser.add_argument('--aws_region', help='(string) AWS region', type=str, default=rospy.get_param("AWS_REGION", "us-east-1")) parser.add_argument('--number_of_trials', help='(integer) Number of trials', type=int, default=int(rospy.get_param("NUMBER_OF_TRIALS", 10))) parser.add_argument( '-c', '--local_model_directory', help='(string) Path to a folder containing a checkpoint \ to restore the model from.', type=str, default='./checkpoint') parser.add_argument('--number_of_resets', help='(integer) Number of resets', type=int, default=int(rospy.get_param("NUMBER_OF_RESETS", 0))) parser.add_argument('--penalty_seconds', help='(float) penalty second', type=float, default=float(rospy.get_param("PENALTY_SECONDS", 2.0))) parser.add_argument('--job_type', help='(string) job type', type=str, default=rospy.get_param("JOB_TYPE", "EVALUATION")) parser.add_argument('--is_continuous', help='(boolean) is continous after lap completion', type=bool, default=utils.str2bool( rospy.get_param("IS_CONTINUOUS", False))) parser.add_argument('--race_type', help='(string) Race type', type=str, default=rospy.get_param("RACE_TYPE", "TIME_TRIAL")) parser.add_argument('--off_track_penalty', help='(float) off track penalty second', type=float, default=float(rospy.get_param("OFF_TRACK_PENALTY", 2.0))) parser.add_argument('--collision_penalty', help='(float) collision penalty second', type=float, default=float(rospy.get_param("COLLISION_PENALTY", 5.0))) parser.add_argument('--round_robin_advance_dist', help='(float) round robin distance 0-1', type=float, default=float( rospy.get_param("ROUND_ROBIN_ADVANCE_DIST", 0.05))) parser.add_argument('--start_position_offset', help='(float) offset start 0-1', type=float, default=float( rospy.get_param("START_POSITION_OFFSET", 0.0))) args = parser.parse_args() arg_s3_bucket = args.s3_bucket arg_s3_prefix = args.s3_prefix logger.info("S3 bucket: %s \n S3 prefix: %s \n S3 endpoint URL: %s", args.s3_bucket, args.s3_prefix, args.s3_endpoint_url) metrics_s3_buckets = rospy.get_param('METRICS_S3_BUCKET') metrics_s3_object_keys = rospy.get_param('METRICS_S3_OBJECT_KEY') arg_s3_bucket, arg_s3_prefix = utils.force_list( arg_s3_bucket), utils.force_list(arg_s3_prefix) metrics_s3_buckets = utils.force_list(metrics_s3_buckets) metrics_s3_object_keys = utils.force_list(metrics_s3_object_keys) validate_list = [ arg_s3_bucket, arg_s3_prefix, metrics_s3_buckets, metrics_s3_object_keys ] simtrace_s3_bucket = rospy.get_param('SIMTRACE_S3_BUCKET', None) mp4_s3_bucket = rospy.get_param('MP4_S3_BUCKET', None) if simtrace_s3_bucket: simtrace_s3_object_prefix = rospy.get_param('SIMTRACE_S3_PREFIX') simtrace_s3_bucket = utils.force_list(simtrace_s3_bucket) simtrace_s3_object_prefix = utils.force_list(simtrace_s3_object_prefix) validate_list.extend([simtrace_s3_bucket, simtrace_s3_object_prefix]) if mp4_s3_bucket: mp4_s3_object_prefix = rospy.get_param('MP4_S3_OBJECT_PREFIX') mp4_s3_bucket = utils.force_list(mp4_s3_bucket) mp4_s3_object_prefix = utils.force_list(mp4_s3_object_prefix) validate_list.extend([mp4_s3_bucket, mp4_s3_object_prefix]) if not all([lambda x: len(x) == len(validate_list[0]), validate_list]): log_and_exit( "Eval worker error: Incorrect arguments passed: {}".format( validate_list), SIMAPP_SIMULATION_WORKER_EXCEPTION, SIMAPP_EVENT_ERROR_CODE_500) if args.number_of_resets != 0 and args.number_of_resets < MIN_RESET_COUNT: raise GenericRolloutException( "number of resets is less than {}".format(MIN_RESET_COUNT)) # Instantiate Cameras if len(arg_s3_bucket) == 1: configure_camera(namespaces=['racecar']) else: configure_camera(namespaces=[ 'racecar_{}'.format(str(agent_index)) for agent_index in range(len(arg_s3_bucket)) ]) agent_list = list() s3_bucket_dict = dict() s3_prefix_dict = dict() s3_writers = list() start_positions = get_start_positions(len(arg_s3_bucket)) done_condition = utils.str_to_done_condition( rospy.get_param("DONE_CONDITION", any)) park_positions = utils.pos_2d_str_to_list( rospy.get_param("PARK_POSITIONS", [])) # if not pass in park positions for all done condition case, use default if not park_positions: park_positions = [DEFAULT_PARK_POSITION for _ in arg_s3_bucket] for agent_index, _ in enumerate(arg_s3_bucket): agent_name = 'agent' if len(arg_s3_bucket) == 1 else 'agent_{}'.format( str(agent_index)) racecar_name = 'racecar' if len( arg_s3_bucket) == 1 else 'racecar_{}'.format(str(agent_index)) s3_bucket_dict[agent_name] = arg_s3_bucket[agent_index] s3_prefix_dict[agent_name] = arg_s3_prefix[agent_index] # download model metadata model_metadata = ModelMetadata( bucket=arg_s3_bucket[agent_index], s3_key=get_s3_key(arg_s3_prefix[agent_index], MODEL_METADATA_S3_POSTFIX), region_name=args.aws_region, s3_endpoint_url=args.s3_endpoint_url, local_path=MODEL_METADATA_LOCAL_PATH_FORMAT.format(agent_name)) _, _, version = model_metadata.get_model_metadata_info() # Select the optimal model utils.do_model_selection(s3_bucket=arg_s3_bucket[agent_index], s3_prefix=arg_s3_prefix[agent_index], region=args.aws_region, s3_endpoint_url=args.s3_endpoint_url) agent_config = { 'model_metadata': model_metadata, ConfigParams.CAR_CTRL_CONFIG.value: { ConfigParams.LINK_NAME_LIST.value: [ link_name.replace('racecar', racecar_name) for link_name in LINK_NAMES ], ConfigParams.VELOCITY_LIST.value: [ velocity_topic.replace('racecar', racecar_name) for velocity_topic in VELOCITY_TOPICS ], ConfigParams.STEERING_LIST.value: [ steering_topic.replace('racecar', racecar_name) for steering_topic in STEERING_TOPICS ], ConfigParams.CHANGE_START.value: utils.str2bool(rospy.get_param('CHANGE_START_POSITION', False)), ConfigParams.ALT_DIR.value: utils.str2bool( rospy.get_param('ALTERNATE_DRIVING_DIRECTION', False)), ConfigParams.ACTION_SPACE_PATH.value: model_metadata.local_path, ConfigParams.REWARD.value: reward_function, ConfigParams.AGENT_NAME.value: racecar_name, ConfigParams.VERSION.value: version, ConfigParams.NUMBER_OF_RESETS.value: args.number_of_resets, ConfigParams.PENALTY_SECONDS.value: args.penalty_seconds, ConfigParams.NUMBER_OF_TRIALS.value: args.number_of_trials, ConfigParams.IS_CONTINUOUS.value: args.is_continuous, ConfigParams.RACE_TYPE.value: args.race_type, ConfigParams.COLLISION_PENALTY.value: args.collision_penalty, ConfigParams.OFF_TRACK_PENALTY.value: args.off_track_penalty, ConfigParams.START_POSITION.value: start_positions[agent_index], ConfigParams.DONE_CONDITION.value: done_condition, ConfigParams.ROUND_ROBIN_ADVANCE_DIST.value: args.round_robin_advance_dist, ConfigParams.START_POSITION_OFFSET.value: args.start_position_offset } } metrics_s3_config = { MetricsS3Keys.METRICS_BUCKET.value: metrics_s3_buckets[agent_index], MetricsS3Keys.METRICS_KEY.value: metrics_s3_object_keys[agent_index], MetricsS3Keys.ENDPOINT_URL.value: rospy.get_param('S3_ENDPOINT_URL', None), # Replaced rospy.get_param('AWS_REGION') to be equal to the argument being passed # or default argument set MetricsS3Keys.REGION.value: args.aws_region } aws_region = rospy.get_param('AWS_REGION', args.aws_region) s3_writer_job_info = [] if simtrace_s3_bucket: s3_writer_job_info.append( IterationData( 'simtrace', simtrace_s3_bucket[agent_index], simtrace_s3_object_prefix[agent_index], aws_region, os.path.join( ITERATION_DATA_LOCAL_FILE_PATH, agent_name, IterationDataLocalFileNames. SIM_TRACE_EVALUATION_LOCAL_FILE.value))) if mp4_s3_bucket: s3_writer_job_info.extend([ IterationData( 'pip', mp4_s3_bucket[agent_index], mp4_s3_object_prefix[agent_index], aws_region, os.path.join( ITERATION_DATA_LOCAL_FILE_PATH, agent_name, IterationDataLocalFileNames. CAMERA_PIP_MP4_VALIDATION_LOCAL_PATH.value)), IterationData( '45degree', mp4_s3_bucket[agent_index], mp4_s3_object_prefix[agent_index], aws_region, os.path.join( ITERATION_DATA_LOCAL_FILE_PATH, agent_name, IterationDataLocalFileNames. CAMERA_45DEGREE_MP4_VALIDATION_LOCAL_PATH.value)), IterationData( 'topview', mp4_s3_bucket[agent_index], mp4_s3_object_prefix[agent_index], aws_region, os.path.join( ITERATION_DATA_LOCAL_FILE_PATH, agent_name, IterationDataLocalFileNames. CAMERA_TOPVIEW_MP4_VALIDATION_LOCAL_PATH.value)) ]) s3_writers.append( S3Writer(job_info=s3_writer_job_info, s3_endpoint_url=args.s3_endpoint_url)) run_phase_subject = RunPhaseSubject() agent_list.append( create_rollout_agent( agent_config, EvalMetrics(agent_name, metrics_s3_config, args.is_continuous), run_phase_subject)) agent_list.append(create_obstacles_agent()) agent_list.append(create_bot_cars_agent()) # ROS service to indicate all the robomaker markov packages are ready for consumption signal_robomaker_markov_package_ready() PhaseObserver('/agent/training_phase', run_phase_subject) enable_domain_randomization = utils.str2bool( rospy.get_param('ENABLE_DOMAIN_RANDOMIZATION', False)) sm_hyperparams_dict = {} graph_manager, _ = get_graph_manager( hp_dict=sm_hyperparams_dict, agent_list=agent_list, run_phase_subject=run_phase_subject, enable_domain_randomization=enable_domain_randomization, done_condition=done_condition) ds_params_instance = S3BotoDataStoreParameters( aws_region=args.aws_region, bucket_names=s3_bucket_dict, base_checkpoint_dir=args.local_model_directory, s3_folders=s3_prefix_dict, s3_endpoint_url=args.s3_endpoint_url) graph_manager.data_store = S3BotoDataStore(params=ds_params_instance, graph_manager=graph_manager, ignore_lock=True) graph_manager.env_params.seed = 0 task_parameters = TaskParameters() task_parameters.checkpoint_restore_path = args.local_model_directory evaluation_worker(graph_manager=graph_manager, number_of_trials=args.number_of_trials, task_parameters=task_parameters, s3_writers=s3_writers, is_continuous=args.is_continuous, park_positions=park_positions)