def _setup_graph_manager(self, checkpoint, agent_list): """Sets up graph manager based on the checkpoint file and agents list. Args: checkpoint (Checkpoint): The model checkpoints we just downloaded. agent_list (list[Agent]): List of agents we want to setup graph manager for. """ sm_hyperparams_dict = {} self._current_graph_manager, _ = get_graph_manager(hp_dict=sm_hyperparams_dict, agent_list=agent_list, run_phase_subject=self._run_phase_subject, enable_domain_randomization=self._enable_domain_randomization, done_condition=self._done_condition, pause_physics=self._model_updater.pause_physics_service, unpause_physics=self._model_updater.unpause_physics_service) checkpoint_dict = dict() checkpoint_dict[self._agent_name] = checkpoint ds_params_instance = S3BotoDataStoreParameters(checkpoint_dict=checkpoint_dict) self._current_graph_manager.data_store = S3BotoDataStore(params=ds_params_instance, graph_manager=self._current_graph_manager, ignore_lock=True, log_and_cont=True) self._current_graph_manager.env_params.seed = 0 self._current_graph_manager.data_store.wait_for_checkpoints() self._current_graph_manager.data_store.modify_checkpoint_variables() task_parameters = TaskParameters() task_parameters.checkpoint_restore_path = self._local_model_directory self._current_graph_manager.create_graph(task_parameters=task_parameters, stop_physics=self._model_updater.pause_physics_service, start_physics=self._model_updater.unpause_physics_service, empty_service_call=EmptyRequest)
def evaluate(params): # file params experiment_path = os.path.join(params.output_data_dir) logger.experiment_path = os.path.join(experiment_path, 'evaluation') params.checkpoint_restore_dir = os.path.join(params.input_data_dir, 'checkpoint') checkpoint_file = os.path.join(params.checkpoint_restore_dir, 'checkpoint') inplace_change(checkpoint_file, "/opt/ml/output/data/checkpoint", ".") # Note that due to a tensorflow issue (https://github.com/tensorflow/tensorflow/issues/9146) we need to replace # the absolute path for the evaluation-from-a-checkpointed-model to work vis_params = VisualizationParameters() vis_params.dump_gifs = True task_params = TaskParameters(evaluate_only=True, experiment_path=logger.experiment_path) task_params.__dict__ = add_items_to_dict(task_params.__dict__, params.__dict__) graph_manager = BasicRLGraphManager( agent_params=ClippedPPOAgentParameters(), env_params=GymVectorEnvironment(level='TSP_env:TSPEasyEnv'), schedule_params=ScheduleParameters(), vis_params=vis_params) graph_manager = graph_manager.create_graph(task_parameters=task_params) graph_manager.evaluate(EnvironmentSteps(5))
def evaluate(params): # file params experiment_path = os.path.join(params.output_data_dir) logger.experiment_path = os.path.join(experiment_path, 'evaluation') params.checkpoint_restore_dir = os.path.join(params.input_data_dir, 'checkpoint') checkpoint_file = os.path.join(params.checkpoint_restore_dir, 'checkpoint') inplace_change(checkpoint_file, "/opt/ml/output/data/checkpoint", ".") # Note that due to a tensorflow issue (https://github.com/tensorflow/tensorflow/issues/9146) we need to replace # the absolute path for the evaluation-from-a-checkpointed-model to work vis_params = VisualizationParameters() vis_params.dump_gifs = True task_params = TaskParameters(evaluate_only=True, experiment_path=logger.experiment_path) task_params.__dict__ = add_items_to_dict(task_params.__dict__, params.__dict__) graph_manager = BasicRLGraphManager( agent_params=ClippedPPOAgentParameters(), env_params=GymVectorEnvironment(level='TSP_env:TSPEasyEnv'), schedule_params=ScheduleParameters(), vis_params=vis_params ) graph_manager = graph_manager.create_graph(task_parameters=task_params) graph_manager.evaluate(EnvironmentSteps(5))
def evaluation_worker(graph_manager, number_of_trials, local_model_directory): # Initialize the graph task_parameters = TaskParameters() task_parameters.__dict__['checkpoint_restore_dir'] = local_model_directory graph_manager.create_graph(task_parameters) graph_manager.evaluate(EnvironmentEpisodes(number_of_trials))
def rollout_worker(graph_manager, checkpoint_dir, data_store, num_workers): """ wait for first checkpoint then perform rollouts using the model """ utils.wait_for_checkpoint(checkpoint_dir) task_parameters = TaskParameters() task_parameters.__dict__['checkpoint_restore_dir'] = checkpoint_dir graph_manager.create_graph(task_parameters) with graph_manager.phase_context(RunPhase.TRAIN): error_compensation = random.randint(0, 5) act_steps = math.ceil( (graph_manager.agent_params.algorithm. num_consecutive_playing_steps.num_steps + error_compensation) / num_workers) for i in range(int(graph_manager.improve_steps.num_steps / act_steps)): graph_manager.act( EnvironmentEpisodes(num_steps=act_steps + random.randint(0, 5))) # This waits for the first checkpoint last_checkpoint = data_store.get_current_checkpoint_number() data_store.load_from_store( expected_checkpoint_number=last_checkpoint + 1) graph_manager.restore_checkpoint()
def solve(self, model, app_args, amc_cfg, services, steps_per_episode): msglogger.info("AMC: Using coach") # When we import the graph_manager from the ADC_DDPG preset, we implicitly instruct # Coach to create and use our DistillerWrapperEnvironment environment. # So Distiller calls Coach, which creates the environment, trains the agent, and ends. if amc_cfg.agent_algo == "DDPG": from examples.auto_compression.amc.rl_libs.coach.presets.ADC_DDPG import ( graph_manager, agent_params) graph_manager.agent_params.exploration.noise_schedule = ExponentialSchedule( amc_cfg.ddpg_cfg.initial_training_noise, 0, amc_cfg.ddpg_cfg.training_noise_decay) # Number of iterations to train graph_manager.agent_params.algorithm.num_consecutive_training_steps = steps_per_episode #graph_manager.agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps(1) graph_manager.agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps( steps_per_episode) # Heatup graph_manager.heatup_steps = EnvironmentEpisodes( amc_cfg.ddpg_cfg.num_heatup_episodes) # Replay buffer size graph_manager.agent_params.memory.max_size = ( MemoryGranularity.Transitions, amc_cfg.ddpg_cfg.replay_buffer_size) amc_cfg.ddpg_cfg.training_noise_decay = amc_cfg.ddpg_cfg.training_noise_decay**( 1. / steps_per_episode) elif "ClippedPPO" in amc_cfg.agent_algo: from examples.auto_compression.amc.rl_libs.coach.presets.ADC_ClippedPPO import graph_manager, agent_params elif "TD3" in amc_cfg.agent_algo: from examples.auto_compression.amc.rl_libs.coach.presets.ADC_TD3 import graph_manager, agent_params else: raise ValueError( "The agent algorithm you are trying to use (%s) is not supported" % amc_cfg.amc_agent_algo) # Number of training steps n_training_episodes = amc_cfg.ddpg_cfg.num_training_episodes graph_manager.improve_steps = EnvironmentEpisodes(n_training_episodes) # Don't evaluate until the end graph_manager.steps_between_evaluation_periods = EnvironmentEpisodes( n_training_episodes) # These parameters are passed to the Distiller environment env_cfg = { 'model': model, 'app_args': app_args, 'amc_cfg': amc_cfg, 'services': services } graph_manager.env_params.additional_simulator_parameters = env_cfg coach_logs_dir = os.path.join(msglogger.logdir, 'coach') os.mkdir(coach_logs_dir) task_parameters = TaskParameters(experiment_path=coach_logs_dir) # Set Coach's PRNG seed if app_args.seed is not None: task_parameters.seed = app_args.seed graph_manager.create_graph(task_parameters) graph_manager.improve()
def evaluation_worker(graph_manager, number_of_trials, local_model_directory): # initialize graph task_parameters = TaskParameters() task_parameters.__dict__['checkpoint_restore_dir'] = local_model_directory graph_manager.create_graph(task_parameters) with graph_manager.phase_context(RunPhase.TEST): # reset all the levels before starting to evaluate graph_manager.reset_internal_state(force_environment_reset=True) graph_manager.act(EnvironmentEpisodes(number_of_trials))
def rollout_worker(graph_manager, checkpoint_dir, data_store, num_workers): """ wait for first checkpoint then perform rollouts using the model """ wait_for_checkpoint(checkpoint_dir) task_parameters = TaskParameters() task_parameters.__dict__['checkpoint_restore_dir'] = checkpoint_dir graph_manager.create_graph(task_parameters) with graph_manager.phase_context(RunPhase.TRAIN): last_checkpoint = 0 act_steps = math.ceil( (graph_manager.agent_params.algorithm. num_consecutive_playing_steps.num_steps) / num_workers) for i in range(int(graph_manager.improve_steps.num_steps / act_steps)): if should_stop(checkpoint_dir): break if type(graph_manager.agent_params.algorithm. num_consecutive_playing_steps) == EnvironmentSteps: graph_manager.act(EnvironmentSteps(num_steps=act_steps), wait_for_full_episodes=graph_manager. agent_params.algorithm.act_for_full_episodes) elif type(graph_manager.agent_params.algorithm. num_consecutive_playing_steps) == EnvironmentEpisodes: graph_manager.act(EnvironmentEpisodes(num_steps=act_steps)) new_checkpoint = get_latest_checkpoint(checkpoint_dir) if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.SYNC: while new_checkpoint < last_checkpoint + 1: if should_stop(checkpoint_dir): break if data_store: data_store.load_from_store() new_checkpoint = get_latest_checkpoint(checkpoint_dir) graph_manager.restore_checkpoint() if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.ASYNC: if new_checkpoint > last_checkpoint: graph_manager.restore_checkpoint() last_checkpoint = new_checkpoint
def create_graph(self, task_parameters: TaskParameters): self.task_parameters = task_parameters if isinstance(task_parameters, DistributedTaskParameters): screen.log_title("Creating graph - name: {} task id: {} type: {}".format(self.__class__.__name__, task_parameters.task_index, task_parameters.job_type)) else: screen.log_title("Creating graph - name: {}".format(self.__class__.__name__)) # "hide" the gpu if necessary if task_parameters.use_cpu: set_cpu() # create a target server for the worker and a device if isinstance(task_parameters, DistributedTaskParameters): task_parameters.worker_target, task_parameters.device = \ self.create_worker_or_parameters_server(task_parameters=task_parameters) # create the graph modules self.level_managers, self.environments = self._create_graph(task_parameters) # set self as the parent of all the level managers self.top_level_manager = self.level_managers[0] for level_manager in self.level_managers: level_manager.parent_graph_manager = self # create a session (it needs to be created after all the graph ops were created) self.sess = None self.create_session(task_parameters=task_parameters) self._phase = self.phase = RunPhase.UNDEFINED self.setup_logger()
def run_graph_manager(self, graph_manager: 'GraphManager', args: argparse.Namespace): if args.distributed_coach and not graph_manager.agent_params.algorithm.distributed_coach_synchronization_type: screen.error( "{} algorithm is not supported using distributed Coach.". format(graph_manager.agent_params.algorithm)) if args.distributed_coach and args.checkpoint_save_secs and graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.SYNC: screen.warning( "The --checkpoint_save_secs or -s argument will be ignored as SYNC distributed coach sync type is used. Checkpoint will be saved every training iteration." ) if args.distributed_coach and not args.checkpoint_save_secs and graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.ASYNC: screen.error( "Distributed coach with ASYNC distributed coach sync type requires --checkpoint_save_secs or -s." ) # Intel optimized TF seems to run significantly faster when limiting to a single OMP thread. # This will not affect GPU runs. os.environ["OMP_NUM_THREADS"] = "1" # turn TF debug prints off if args.framework == Frameworks.tensorflow: os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(args.tf_verbosity) # turn off the summary at the end of the run if necessary if not args.no_summary and not args.distributed_coach: atexit.register(logger.summarize_experiment) screen.change_terminal_title(args.experiment_name) task_parameters = TaskParameters( framework_type=args.framework, evaluate_only=args.evaluate, experiment_path=args.experiment_path, seed=args.seed, use_cpu=args.use_cpu, checkpoint_save_secs=args.checkpoint_save_secs, checkpoint_restore_dir=args.checkpoint_restore_dir, checkpoint_save_dir=args.checkpoint_save_dir, export_onnx_graph=args.export_onnx_graph, apply_stop_condition=args.apply_stop_condition) # open dashboard if args.open_dashboard: open_dashboard(args.experiment_path) if args.distributed_coach and args.distributed_coach_run_type != RunType.ORCHESTRATOR: handle_distributed_coach_tasks(graph_manager, args, task_parameters) return if args.distributed_coach and args.distributed_coach_run_type == RunType.ORCHESTRATOR: handle_distributed_coach_orchestrator(args) return # Single-threaded runs if args.num_workers == 1: self.start_single_threaded(task_parameters, graph_manager, args) else: self.start_multi_threaded(graph_manager, args)
def test_basic_rl_graph_manager_with_cartpole_dqn(): tf.reset_default_graph() from rl_coach.presets.CartPole_DQN import graph_manager assert graph_manager graph_manager.create_graph( task_parameters=TaskParameters(framework_type=Frameworks.tensorflow, experiment_path="./experiments/test"))
def test_basic_rl_graph_manager_with_lab_acer(): tf.reset_default_graph() from rl_coach.presets.Lab_nav_maze_static_01_ACER import graph_manager assert graph_manager graph_manager.create_graph( task_parameters=TaskParameters(framework_type=Frameworks.tensorflow, experiment_path="./experiments/test"))
def test_basic_rl_graph_manager_with_pong_nec(): tf.reset_default_graph() from rl_coach.presets.Atari_NEC import graph_manager assert graph_manager graph_manager.env_params.level = "PongDeterministic-v4" graph_manager.create_graph(task_parameters=TaskParameters( framework_type="tensorflow", experiment_path="./experiments/test"))
def test_get_QActionStateValue_predictions(): tf.reset_default_graph() from rl_coach.presets.CartPole_DQN import graph_manager as cartpole_dqn_graph_manager assert cartpole_dqn_graph_manager cartpole_dqn_graph_manager.create_graph(task_parameters=TaskParameters( framework_type="tensorflow", experiment_path="./experiments/test")) cartpole_dqn_graph_manager.improve_steps.num_steps = 1 cartpole_dqn_graph_manager.steps_between_evaluation_periods.num_steps = 5
def coach_adc(model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn): task_parameters = TaskParameters(framework_type="tensorflow", experiment_path="./experiments/test") extra_params = {'save_checkpoint_secs': None, 'render': True} task_parameters.__dict__.update(extra_params) # Create a dictionary of parameters that Coach will handover to CNNEnvironment # Once it creates it. if True: exploration_noise = 0.5 #exploration_noise = 0.25 exploitation_decay = 0.996 graph_manager.env_params.additional_simulator_parameters = { 'model': model, 'dataset': dataset, 'arch': arch, 'data_loader': data_loader, 'validate_fn': validate_fn, 'save_checkpoint_fn': save_checkpoint_fn, #'action_range': (0.10, 0.95), 'action_range': (0.70, 0.95), 'onehot_encoding': False, 'normalize_obs': True, 'desired_reduction': None, 'reward_fn': lambda top1, top5, vloss, total_macs: -1 * (1-top5/100) * math.log(total_macs) #'reward_fn': lambda top1, total_macs: -1 * (1-top1/100) * math.log(total_macs) #'reward_fn': lambda top1, total_macs: -1 * max(1-top1/100, 0.25) * math.log(total_macs) #'reward_fn': lambda top1, total_macs: -1 * (1-top1/100) * math.log(total_macs/100000) #'reward_fn': lambda top1, total_macs: top1/100 * total_macs/self.dense_model_macs } else: exploration_noise = 0.5 #exploration_noise = 0.25 exploitation_decay = 0.996 graph_manager.env_params.additional_simulator_parameters = { 'model': model, 'dataset': dataset, 'arch': arch, 'data_loader': data_loader, 'validate_fn': validate_fn, 'save_checkpoint_fn': save_checkpoint_fn, 'action_range': (0.10, 0.95), 'onehot_encoding': False, 'normalize_obs': True, 'desired_reduction': 1.5e8, 'reward_fn': lambda top1, total_macs: top1/100 #'reward_fn': lambda top1, total_macs: min(top1/100, 0.75) } #msglogger.debug('Experiment configuarion:\n' + json.dumps(graph_manager.env_params.additional_simulator_parameters, indent=2)) steps_per_episode = 13 agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([(ConstantSchedule(exploration_noise), EnvironmentSteps(100*steps_per_episode)), (ExponentialSchedule(exploration_noise, 0, exploitation_decay), EnvironmentSteps(300*steps_per_episode))]) graph_manager.create_graph(task_parameters) graph_manager.improve()
def create_graph(self, task_parameters=TaskParameters(), stop_physics=None, start_physics=None, empty_service_call=None): self.graph_creation_time = time.time() self.task_parameters = task_parameters if isinstance(task_parameters, DistributedTaskParameters): screen.log_title( "Creating graph - name: {} task id: {} type: {}".format( self.__class__.__name__, task_parameters.task_index, task_parameters.job_type)) else: screen.log_title("Creating graph - name: {}".format( self.__class__.__name__)) # "hide" the gpu if necessary if task_parameters.use_cpu: set_cpu() # create a target server for the worker and a device if isinstance(task_parameters, DistributedTaskParameters): task_parameters.worker_target, task_parameters.device = \ self.create_worker_or_parameters_server(task_parameters=task_parameters) # If necessary start the physics and then stop it after agent creation screen.log_title("Start physics before creating graph") if start_physics and empty_service_call: start_physics(empty_service_call()) # create the graph modules screen.log_title("Create graph") self.level_managers, self.environments = self._create_graph( task_parameters) screen.log_title("Stop physics after creating graph") if stop_physics and empty_service_call: stop_physics(empty_service_call()) # set self as the parent of all the level managers self.top_level_manager = self.level_managers[0] for level_manager in self.level_managers: level_manager.parent_graph_manager = self import smdebug.tensorflow as smd self.smdebug_hook = smd.SessionHook.create_from_json_file() self.smdebug_hook.set_mode(smd.modes.TRAIN) # create a session (it needs to be created after all the graph ops were created) self.sess = { agent_params.name: None for agent_params in self.agents_params } screen.log_title("Creating session") self.create_session(task_parameters=task_parameters) self._phase = self.phase = RunPhase.UNDEFINED self.setup_logger() return self
def test_get_QActionStateValue_predictions_lab(): tf.reset_default_graph() from rl_coach.presets.Lab_nav_maze_static_01_ACER import graph_manager assert graph_manager graph_manager.create_graph( task_parameters=TaskParameters(framework_type=Frameworks.tensorflow, experiment_path="./experiments/test")) graph_manager.improve_steps.num_steps = 1 graph_manager.steps_between_evaluation_periods.num_steps = 5
def test_basic_rl_graph_manager_with_cartpole_dqn_and_repeated_checkpoint_restore( ): tf.reset_default_graph() from rl_coach.presets.CartPole_DQN import graph_manager assert graph_manager graph_manager.create_graph( task_parameters=TaskParameters(framework_type=Frameworks.tensorflow, experiment_path="./experiments/test", apply_stop_condition=True))
def training_worker(graph_manager, checkpoint_dir): """ restore a checkpoint then perform rollouts using the restored model """ # initialize graph task_parameters = TaskParameters() task_parameters.__dict__['checkpoint_save_dir'] = checkpoint_dir task_parameters.__dict__['checkpoint_save_secs'] = 20 graph_manager.create_graph(task_parameters) # save randomly initialized graph graph_manager.save_checkpoint() # training loop steps = 0 # evaluation offset eval_offset = 1 graph_manager.setup_memory_backend() while (steps < graph_manager.improve_steps.num_steps): graph_manager.phase = core_types.RunPhase.TRAIN graph_manager.fetch_from_worker( graph_manager.agent_params.algorithm.num_consecutive_playing_steps) graph_manager.phase = core_types.RunPhase.UNDEFINED if graph_manager.should_train(): steps += 1 graph_manager.phase = core_types.RunPhase.TRAIN graph_manager.train() graph_manager.phase = core_types.RunPhase.UNDEFINED if steps * graph_manager.agent_params.algorithm.num_consecutive_playing_steps.num_steps > graph_manager.steps_between_evaluation_periods.num_steps * eval_offset: eval_offset += 1 if graph_manager.evaluate(graph_manager.evaluation_steps): break if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.SYNC: graph_manager.save_checkpoint() else: graph_manager.occasionally_save_checkpoint()
def evaluation_worker(graph_manager, number_of_trials, local_model_directory): # initialize graph task_parameters = TaskParameters(evaluate_only=True) task_parameters.__dict__['checkpoint_restore_dir'] = local_model_directory graph_manager.create_graph(task_parameters) graph_manager.reset_internal_state() data_store = graph_manager.data_store episodes_counter = Counter() try: # This will only work for DeepRacerRacetrackEnv enviroments graph_manager.top_level_manager.environment.env.env.set_allow_servo_step_signals( True) except Exception as ex: print("[ERROR] Method not defined in enviroment class: {}".format(ex)) while True: # Get current checkpoint number current_checkpoint = get_latest_checkpoint(local_model_directory) # Register the checkpoint with the environment for logging graph_manager.top_level_manager.environment.env.env.set_checkpoint_num( current_checkpoint) while episodes_counter[current_checkpoint] < 15: graph_manager.evaluate(EnvironmentEpisodes(1)) episodes_counter[current_checkpoint] += 1 latest_checkpoint = data_store.get_latest_checkpoint() if latest_checkpoint: if latest_checkpoint > current_checkpoint: data_store.get_a_particular_model( checkpoint_number=current_checkpoint + 1) graph_manager.restore_checkpoint() if should_stop(local_model_directory): break # Close the down the job graph_manager.top_level_manager.environment.env.env.cancel_simulation_job()
def coach_adc(model, dataset, arch, optimizer_data, validate_fn, save_checkpoint_fn, train_fn): # task_parameters = TaskParameters(framework_type="tensorflow", # experiment_path="./experiments/test") # extra_params = {'save_checkpoint_secs': None, # 'render': True} # task_parameters.__dict__.update(extra_params) task_parameters = TaskParameters(experiment_path=logger.get_experiment_path('adc')) conv_cnt = count_conv_layer(model) # Create a dictionary of parameters that Coach will handover to CNNEnvironment # Once it creates it. services = distiller.utils.MutableNamedTuple({ 'validate_fn': validate_fn, 'save_checkpoint_fn': save_checkpoint_fn, 'train_fn': train_fn}) app_args = distiller.utils.MutableNamedTuple({ 'dataset': dataset, 'arch': arch, 'optimizer_data': optimizer_data}) if True: amc_cfg = distiller.utils.MutableNamedTuple({ #'action_range': (0.20, 0.95), 'action_range': (0.20, 0.80), 'onehot_encoding': False, 'normalize_obs': True, 'desired_reduction': None, 'reward_fn': lambda top1, top5, vloss, total_macs: -1 * (1-top1/100) * math.log(total_macs), 'conv_cnt': conv_cnt, 'max_reward': -1000}) else: amc_cfg = distiller.utils.MutableNamedTuple({ 'action_range': (0.10, 0.95), 'onehot_encoding': False, 'normalize_obs': True, 'desired_reduction': 1.5e8, 'reward_fn': lambda top1, top5, vloss, total_macs: top1/100, #'reward_fn': lambda top1, total_macs: min(top1/100, 0.75), 'conv_cnt': conv_cnt, 'max_reward': -1000}) # These parameters are passed to the Distiller environment graph_manager.env_params.additional_simulator_parameters = {'model': model, 'app_args': app_args, 'amc_cfg': amc_cfg, 'services': services} exploration_noise = 0.5 exploitation_decay = 0.996 steps_per_episode = conv_cnt agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([ (ConstantSchedule(exploration_noise), EnvironmentSteps(100*steps_per_episode)), (ExponentialSchedule(exploration_noise, 0, exploitation_decay), EnvironmentSteps(300*steps_per_episode))]) graph_manager.create_graph(task_parameters) graph_manager.improve()
def evaluation_worker(graph_manager, number_of_trials, local_model_directory): # initialize graph task_parameters = TaskParameters(evaluate_only=True) task_parameters.__dict__['checkpoint_restore_dir'] = local_model_directory graph_manager.create_graph(task_parameters) try: # This will only work for DeepRacerRacetrackEnv enviroments graph_manager.top_level_manager.environment.env.env.set_allow_servo_step_signals(True) except Exception as ex: print("[ERROR] Method not defined in enviroment class: {}".format(ex)) curr_num_trials = 0 while curr_num_trials < number_of_trials: graph_manager.evaluate(EnvironmentSteps(1)) curr_num_trials += 1 # Close the down the job graph_manager.top_level_manager.environment.env.env.cancel_simulation_job()
def start_single_threaded(self, graph_manager: 'GraphManager', args: argparse.Namespace): # Start the training or evaluation task_parameters = TaskParameters( framework_type=args.framework, evaluate_only=args.evaluate, experiment_path=args.experiment_path, seed=args.seed, use_cpu=args.use_cpu, checkpoint_save_secs=args.checkpoint_save_secs, checkpoint_restore_dir=args.checkpoint_restore_dir, checkpoint_save_dir=args.checkpoint_save_dir, export_onnx_graph=args.export_onnx_graph ) start_graph(graph_manager=graph_manager, task_parameters=task_parameters)
def do_adc_internal(model, args, optimizer_data, validate_fn, save_checkpoint_fn, train_fn): dataset = args.dataset arch = args.arch perform_thinning = True # args.amc_thinning num_ft_epochs = args.amc_ft_epochs action_range = args.amc_action_range np.random.seed() conv_cnt = count_conv_layer(model) msglogger.info("Executing AMC: RL agent - %s RL library - %s", args.amc_agent_algo, RLLIB) # Create a dictionary of parameters that Coach will handover to DistillerWrapperEnvironment # Once it creates it. services = distiller.utils.MutableNamedTuple({ 'validate_fn': validate_fn, 'save_checkpoint_fn': save_checkpoint_fn, 'train_fn': train_fn }) app_args = distiller.utils.MutableNamedTuple({ 'dataset': dataset, 'arch': arch, 'optimizer_data': optimizer_data }) amc_cfg = distiller.utils.MutableNamedTuple({ 'protocol': args.amc_protocol, 'agent_algo': args.amc_agent_algo, 'perform_thinning': perform_thinning, 'num_ft_epochs': num_ft_epochs, 'action_range': action_range, 'conv_cnt': conv_cnt, 'reward_frequency': args.amc_reward_frequency }) #net_wrapper = NetworkWrapper(model, app_args, services) #return sample_networks(net_wrapper, services) if args.amc_protocol == "accuracy-guaranteed": amc_cfg.target_density = None amc_cfg.reward_fn = lambda env, top1, top5, vloss, total_macs: -( 1 - top1 / 100) * math.log(total_macs) amc_cfg.action_constrain_fn = None elif args.amc_protocol == "mac-constrained": amc_cfg.target_density = args.amc_target_density amc_cfg.reward_fn = lambda env, top1, top5, vloss, total_macs: top1 / 100 #(90.5 - top1) / 10 amc_cfg.action_constrain_fn = DistillerWrapperEnvironment.get_action elif args.amc_protocol == "mac-constrained-experimental": amc_cfg.target_density = args.amc_target_density amc_cfg.reward_fn = experimental_reward_fn amc_cfg.action_constrain_fn = None else: raise ValueError("{} is not supported currently".format( args.amc_protocol)) steps_per_episode = conv_cnt if args.amc_agent_algo == "DDPG": amc_cfg.heatup_noise = 0.5 amc_cfg.initial_training_noise = 0.5 amc_cfg.training_noise_decay = 0.996 # 0.998 amc_cfg.num_heatup_epochs = args.amc_heatup_epochs amc_cfg.num_training_epochs = args.amc_training_epochs training_noise_duration = amc_cfg.num_training_epochs * steps_per_episode heatup_duration = amc_cfg.num_heatup_epochs * steps_per_episode if amc_cfg.agent_algo == "Random-policy": return random_agent( DistillerWrapperEnvironment(model, app_args, amc_cfg, services)) if RLLIB == "spinup": msglogger.info("AMC: Using spinup") env1 = DistillerWrapperEnvironment(model, app_args, amc_cfg, services) env2 = DistillerWrapperEnvironment(model, app_args, amc_cfg, services) ddpg_spinup(env1, env2) else: msglogger.info("AMC: Using coach") # When we import the graph_manager from the ADC_DDPG preset, we implicitly instruct # Coach to create and use our DistillerWrapperEnvironment environment. # So Distiller calls Coach, which creates the environment, trains the agent, and ends. if args.amc_agent_algo == "DDPG": from examples.automated_deep_compression.presets.ADC_DDPG import graph_manager, agent_params agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule( [(ConstantSchedule(amc_cfg.heatup_noise), EnvironmentSteps(heatup_duration)), (ExponentialSchedule(amc_cfg.initial_training_noise, 0, amc_cfg.training_noise_decay), EnvironmentSteps(training_noise_duration))]) # agent_params.exploration.noise_percentage_schedule = ConstantSchedule(0) elif "ClippedPPO" in args.amc_agent_algo: from examples.automated_deep_compression.presets.ADC_ClippedPPO import graph_manager, agent_params # These parameters are passed to the Distiller environment graph_manager.env_params.additional_simulator_parameters = { 'model': model, 'app_args': app_args, 'amc_cfg': amc_cfg, 'services': services } coach_logs_dir = os.path.join(msglogger.logdir, 'coach') os.mkdir(coach_logs_dir) task_parameters = TaskParameters(experiment_path=coach_logs_dir) graph_manager.create_graph(task_parameters) graph_manager.improve()
def training_worker(graph_manager, checkpoint_dir, use_pretrained_model, framework): """ restore a checkpoint then perform rollouts using the restored model """ # initialize graph task_parameters = TaskParameters() task_parameters.__dict__['checkpoint_save_dir'] = checkpoint_dir task_parameters.__dict__['checkpoint_save_secs'] = 20 task_parameters.__dict__['experiment_path'] = SM_MODEL_OUTPUT_DIR if framework.lower() == "mxnet": task_parameters.framework_type = Frameworks.mxnet if hasattr(graph_manager, 'agent_params'): for network_parameters in graph_manager.agent_params.network_wrappers.values( ): network_parameters.framework = Frameworks.mxnet elif hasattr(graph_manager, 'agents_params'): for ap in graph_manager.agents_params: for network_parameters in ap.network_wrappers.values(): network_parameters.framework = Frameworks.mxnet if use_pretrained_model: task_parameters.__dict__[ 'checkpoint_restore_dir'] = PRETRAINED_MODEL_DIR graph_manager.create_graph(task_parameters) # save randomly initialized graph graph_manager.save_checkpoint() # training loop steps = 0 graph_manager.setup_memory_backend() # To handle SIGTERM door_man = DoorMan() try: while (steps < graph_manager.improve_steps.num_steps): graph_manager.phase = core_types.RunPhase.TRAIN graph_manager.fetch_from_worker( graph_manager.agent_params.algorithm. num_consecutive_playing_steps) graph_manager.phase = core_types.RunPhase.UNDEFINED if graph_manager.should_train(): steps += graph_manager.agent_params.algorithm.num_consecutive_playing_steps.num_steps graph_manager.phase = core_types.RunPhase.TRAIN graph_manager.train() graph_manager.phase = core_types.RunPhase.UNDEFINED if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.SYNC: graph_manager.save_checkpoint() else: graph_manager.occasionally_save_checkpoint() if door_man.terminate_now: "Received SIGTERM. Checkpointing before exiting." graph_manager.save_checkpoint() break except Exception as e: raise RuntimeError("An error occured while training: %s" % e) finally: print("Terminating training worker") graph_manager.data_store.upload_finished_file()
def main(): """ Main function for tournament worker """ parser = argparse.ArgumentParser() parser.add_argument('-p', '--preset', help="(string) Name of a preset to run \ (class name from the 'presets' directory.)", type=str, required=False) parser.add_argument('--s3_bucket', help='list(string) S3 bucket', type=str, nargs='+', default=rospy.get_param("MODEL_S3_BUCKET", ["gsaur-test"])) parser.add_argument('--s3_prefix', help='list(string) S3 prefix', type=str, nargs='+', default=rospy.get_param("MODEL_S3_PREFIX", ["sagemaker"])) parser.add_argument('--aws_region', help='(string) AWS region', type=str, default=rospy.get_param("AWS_REGION", "us-east-1")) parser.add_argument('--number_of_trials', help='(integer) Number of trials', type=int, default=int(rospy.get_param("NUMBER_OF_TRIALS", 10))) parser.add_argument( '-c', '--local_model_directory', help='(string) Path to a folder containing a checkpoint \ to restore the model from.', type=str, default='./checkpoint') parser.add_argument('--number_of_resets', help='(integer) Number of resets', type=int, default=int(rospy.get_param("NUMBER_OF_RESETS", 0))) parser.add_argument('--penalty_seconds', help='(float) penalty second', type=float, default=float(rospy.get_param("PENALTY_SECONDS", 2.0))) parser.add_argument('--job_type', help='(string) job type', type=str, default=rospy.get_param("JOB_TYPE", "EVALUATION")) parser.add_argument('--is_continuous', help='(boolean) is continous after lap completion', type=bool, default=utils.str2bool( rospy.get_param("IS_CONTINUOUS", False))) parser.add_argument('--race_type', help='(string) Race type', type=str, default=rospy.get_param("RACE_TYPE", "TIME_TRIAL")) parser.add_argument('--off_track_penalty', help='(float) off track penalty second', type=float, default=float(rospy.get_param("OFF_TRACK_PENALTY", 2.0))) parser.add_argument('--collision_penalty', help='(float) collision penalty second', type=float, default=float(rospy.get_param("COLLISION_PENALTY", 5.0))) args = parser.parse_args() arg_s3_bucket = args.s3_bucket arg_s3_prefix = args.s3_prefix logger.info("S3 bucket: %s \n S3 prefix: %s", arg_s3_bucket, arg_s3_prefix) # tournament_worker: names to be displayed in MP4. # This is racer alias in tournament worker case. display_names = rospy.get_param('DISPLAY_NAME', "") metrics_s3_buckets = rospy.get_param('METRICS_S3_BUCKET') metrics_s3_object_keys = rospy.get_param('METRICS_S3_OBJECT_KEY') arg_s3_bucket, arg_s3_prefix = utils.force_list( arg_s3_bucket), utils.force_list(arg_s3_prefix) metrics_s3_buckets = utils.force_list(metrics_s3_buckets) metrics_s3_object_keys = utils.force_list(metrics_s3_object_keys) validate_list = [ arg_s3_bucket, arg_s3_prefix, metrics_s3_buckets, metrics_s3_object_keys ] simtrace_s3_bucket = rospy.get_param('SIMTRACE_S3_BUCKET', None) mp4_s3_bucket = rospy.get_param('MP4_S3_BUCKET', None) if simtrace_s3_bucket: simtrace_s3_object_prefix = rospy.get_param('SIMTRACE_S3_PREFIX') simtrace_s3_bucket = utils.force_list(simtrace_s3_bucket) simtrace_s3_object_prefix = utils.force_list(simtrace_s3_object_prefix) validate_list.extend([simtrace_s3_bucket, simtrace_s3_object_prefix]) if mp4_s3_bucket: mp4_s3_object_prefix = rospy.get_param('MP4_S3_OBJECT_PREFIX') mp4_s3_bucket = utils.force_list(mp4_s3_bucket) mp4_s3_object_prefix = utils.force_list(mp4_s3_object_prefix) validate_list.extend([mp4_s3_bucket, mp4_s3_object_prefix]) if not all([lambda x: len(x) == len(validate_list[0]), validate_list]): utils.log_and_exit( "Eval worker error: Incorrect arguments passed: {}".format( validate_list), utils.SIMAPP_SIMULATION_WORKER_EXCEPTION, utils.SIMAPP_EVENT_ERROR_CODE_500) if args.number_of_resets != 0 and args.number_of_resets < MIN_RESET_COUNT: raise GenericRolloutException( "number of resets is less than {}".format(MIN_RESET_COUNT)) # Instantiate Cameras if len(arg_s3_bucket) == 1: configure_camera(namespaces=['racecar']) else: configure_camera(namespaces=[ 'racecar_{}'.format(str(agent_index)) for agent_index in range(len(arg_s3_bucket)) ]) agent_list = list() s3_bucket_dict = dict() s3_prefix_dict = dict() s3_writers = list() # tournament_worker: list of required S3 locations simtrace_s3_bucket_dict = dict() simtrace_s3_prefix_dict = dict() metrics_s3_bucket_dict = dict() metrics_s3_obect_key_dict = dict() mp4_s3_bucket_dict = dict() mp4_s3_object_prefix_dict = dict() for agent_index, s3_bucket_val in enumerate(arg_s3_bucket): agent_name = 'agent' if len(arg_s3_bucket) == 1 else 'agent_{}'.format( str(agent_index)) racecar_name = 'racecar' if len( arg_s3_bucket) == 1 else 'racecar_{}'.format(str(agent_index)) s3_bucket_dict[agent_name] = arg_s3_bucket[agent_index] s3_prefix_dict[agent_name] = arg_s3_prefix[agent_index] # tournament_worker: remap key with agent_name instead of agent_index for list of S3 locations. simtrace_s3_bucket_dict[agent_name] = simtrace_s3_bucket[agent_index] simtrace_s3_prefix_dict[agent_name] = simtrace_s3_object_prefix[ agent_index] metrics_s3_bucket_dict[agent_name] = metrics_s3_buckets[agent_index] metrics_s3_obect_key_dict[agent_name] = metrics_s3_object_keys[ agent_index] mp4_s3_bucket_dict[agent_name] = mp4_s3_bucket[agent_index] mp4_s3_object_prefix_dict[agent_name] = mp4_s3_object_prefix[ agent_index] s3_client = SageS3Client(bucket=arg_s3_bucket[agent_index], s3_prefix=arg_s3_prefix[agent_index], aws_region=args.aws_region) # Load the model metadata if not os.path.exists(os.path.join(CUSTOM_FILES_PATH, agent_name)): os.makedirs(os.path.join(CUSTOM_FILES_PATH, agent_name)) model_metadata_local_path = os.path.join( os.path.join(CUSTOM_FILES_PATH, agent_name), 'model_metadata.json') utils.load_model_metadata( s3_client, os.path.normpath("%s/model/model_metadata.json" % arg_s3_prefix[agent_index]), model_metadata_local_path) # Handle backward compatibility _, _, version = parse_model_metadata(model_metadata_local_path) if float(version) < float(utils.SIMAPP_VERSION) and \ not utils.has_current_ckpnt_name(arg_s3_bucket[agent_index], arg_s3_prefix[agent_index], args.aws_region): utils.make_compatible(arg_s3_bucket[agent_index], arg_s3_prefix[agent_index], args.aws_region, SyncFiles.TRAINER_READY.value) # Select the optimal model utils.do_model_selection(s3_bucket=arg_s3_bucket[agent_index], s3_prefix=arg_s3_prefix[agent_index], region=args.aws_region) # Download hyperparameters from SageMaker if not os.path.exists(agent_name): os.makedirs(agent_name) hyperparameters_file_success = False hyperparams_s3_key = os.path.normpath(arg_s3_prefix[agent_index] + "/ip/hyperparameters.json") hyperparameters_file_success = s3_client.download_file( s3_key=hyperparams_s3_key, local_path=os.path.join(agent_name, "hyperparameters.json")) sm_hyperparams_dict = {} if hyperparameters_file_success: logger.info("Received Sagemaker hyperparameters successfully!") with open(os.path.join(agent_name, "hyperparameters.json")) as file: sm_hyperparams_dict = json.load(file) else: logger.info("SageMaker hyperparameters not found.") agent_config = { 'model_metadata': model_metadata_local_path, ConfigParams.CAR_CTRL_CONFIG.value: { ConfigParams.LINK_NAME_LIST.value: [ link_name.replace('racecar', racecar_name) for link_name in LINK_NAMES ], ConfigParams.VELOCITY_LIST.value: [ velocity_topic.replace('racecar', racecar_name) for velocity_topic in VELOCITY_TOPICS ], ConfigParams.STEERING_LIST.value: [ steering_topic.replace('racecar', racecar_name) for steering_topic in STEERING_TOPICS ], ConfigParams.CHANGE_START.value: utils.str2bool(rospy.get_param('CHANGE_START_POSITION', False)), ConfigParams.ALT_DIR.value: utils.str2bool( rospy.get_param('ALTERNATE_DRIVING_DIRECTION', False)), ConfigParams.ACTION_SPACE_PATH.value: 'custom_files/' + agent_name + '/model_metadata.json', ConfigParams.REWARD.value: reward_function, ConfigParams.AGENT_NAME.value: racecar_name, ConfigParams.VERSION.value: version, ConfigParams.NUMBER_OF_RESETS.value: args.number_of_resets, ConfigParams.PENALTY_SECONDS.value: args.penalty_seconds, ConfigParams.NUMBER_OF_TRIALS.value: args.number_of_trials, ConfigParams.IS_CONTINUOUS.value: args.is_continuous, ConfigParams.RACE_TYPE.value: args.race_type, ConfigParams.COLLISION_PENALTY.value: args.collision_penalty, ConfigParams.OFF_TRACK_PENALTY.value: args.off_track_penalty } } metrics_s3_config = { MetricsS3Keys.METRICS_BUCKET.value: metrics_s3_buckets[agent_index], MetricsS3Keys.METRICS_KEY.value: metrics_s3_object_keys[agent_index], # Replaced rospy.get_param('AWS_REGION') to be equal to the argument being passed # or default argument set MetricsS3Keys.REGION.value: args.aws_region, # Replaced rospy.get_param('MODEL_S3_BUCKET') to be equal to the argument being passed # or default argument set MetricsS3Keys.STEP_BUCKET.value: arg_s3_bucket[agent_index], # Replaced rospy.get_param('MODEL_S3_PREFIX') to be equal to the argument being passed # or default argument set MetricsS3Keys.STEP_KEY.value: os.path.join(arg_s3_prefix[agent_index], EVALUATION_SIMTRACE_DATA_S3_OBJECT_KEY) } aws_region = rospy.get_param('AWS_REGION', args.aws_region) s3_writer_job_info = [] if simtrace_s3_bucket: s3_writer_job_info.append( IterationData( 'simtrace', simtrace_s3_bucket[agent_index], simtrace_s3_object_prefix[agent_index], aws_region, os.path.join( ITERATION_DATA_LOCAL_FILE_PATH, agent_name, IterationDataLocalFileNames. SIM_TRACE_EVALUATION_LOCAL_FILE.value))) if mp4_s3_bucket: s3_writer_job_info.extend([ IterationData( 'pip', mp4_s3_bucket[agent_index], mp4_s3_object_prefix[agent_index], aws_region, os.path.join( ITERATION_DATA_LOCAL_FILE_PATH, agent_name, IterationDataLocalFileNames. CAMERA_PIP_MP4_VALIDATION_LOCAL_PATH.value)), IterationData( '45degree', mp4_s3_bucket[agent_index], mp4_s3_object_prefix[agent_index], aws_region, os.path.join( ITERATION_DATA_LOCAL_FILE_PATH, agent_name, IterationDataLocalFileNames. CAMERA_45DEGREE_MP4_VALIDATION_LOCAL_PATH.value)), IterationData( 'topview', mp4_s3_bucket[agent_index], mp4_s3_object_prefix[agent_index], aws_region, os.path.join( ITERATION_DATA_LOCAL_FILE_PATH, agent_name, IterationDataLocalFileNames. CAMERA_TOPVIEW_MP4_VALIDATION_LOCAL_PATH.value)) ]) s3_writers.append(S3Writer(job_info=s3_writer_job_info)) run_phase_subject = RunPhaseSubject() agent_list.append( create_rollout_agent(agent_config, EvalMetrics(agent_name, metrics_s3_config), run_phase_subject)) agent_list.append(create_obstacles_agent()) agent_list.append(create_bot_cars_agent()) # ROS service to indicate all the robomaker markov packages are ready for consumption signal_robomaker_markov_package_ready() PhaseObserver('/agent/training_phase', run_phase_subject) graph_manager, _ = get_graph_manager(hp_dict=sm_hyperparams_dict, agent_list=agent_list, run_phase_subject=run_phase_subject) ds_params_instance = S3BotoDataStoreParameters( aws_region=args.aws_region, bucket_names=s3_bucket_dict, base_checkpoint_dir=args.local_model_directory, s3_folders=s3_prefix_dict) graph_manager.data_store = S3BotoDataStore(params=ds_params_instance, graph_manager=graph_manager, ignore_lock=True) graph_manager.env_params.seed = 0 task_parameters = TaskParameters() task_parameters.checkpoint_restore_path = args.local_model_directory tournament_worker(graph_manager=graph_manager, number_of_trials=args.number_of_trials, task_parameters=task_parameters, s3_writers=s3_writers, is_continuous=args.is_continuous) # tournament_worker: write race report to local file. write_race_report(graph_manager, model_s3_bucket_map=s3_bucket_dict, model_s3_prefix_map=s3_prefix_dict, metrics_s3_bucket_map=metrics_s3_bucket_dict, metrics_s3_key_map=metrics_s3_obect_key_dict, simtrace_s3_bucket_map=simtrace_s3_bucket_dict, simtrace_s3_prefix_map=simtrace_s3_prefix_dict, mp4_s3_bucket_map=mp4_s3_bucket_dict, mp4_s3_prefix_map=mp4_s3_object_prefix_dict, display_names=display_names) # tournament_worker: terminate tournament_race_node. terminate_tournament_race()
def validate(s3_bucket, s3_prefix, aws_region): screen.set_use_colors(False) screen.log_title(" S3 bucket: {} \n S3 prefix: {}".format( s3_bucket, s3_prefix)) # download model metadata model_metadata = ModelMetadata(bucket=s3_bucket, s3_key=get_s3_key( s3_prefix, MODEL_METADATA_S3_POSTFIX), region_name=aws_region, local_path=MODEL_METADATA_LOCAL_PATH) # Create model local path os.makedirs(LOCAL_MODEL_DIR) try: # Handle backward compatibility model_metadata_info = model_metadata.get_model_metadata_info() observation_list = model_metadata_info[ModelMetadataKeys.SENSOR.value] version = model_metadata_info[ModelMetadataKeys.VERSION.value] except Exception as ex: log_and_exit("Failed to parse model_metadata file: {}".format(ex), SIMAPP_VALIDATION_WORKER_EXCEPTION, SIMAPP_EVENT_ERROR_CODE_400) # Below get_transition_data function must called before create_training_agent function # to avoid 500 in case unsupported Sensor is received. # create_training_agent will exit with 500 if unsupported sensor is received, # and get_transition_data function below will exit with 400 if unsupported sensor is received. # We want to return 400 in model validation case if unsupported sensor is received. # Thus, call this get_transition_data function before create_traning_agent function! transitions = get_transition_data(observation_list) checkpoint = Checkpoint(bucket=s3_bucket, s3_prefix=s3_prefix, region_name=args.aws_region, agent_name='agent', checkpoint_dir=LOCAL_MODEL_DIR) # make coach checkpoint compatible if version < SIMAPP_VERSION_2 and not checkpoint.rl_coach_checkpoint.is_compatible( ): checkpoint.rl_coach_checkpoint.make_compatible( checkpoint.syncfile_ready) # add checkpoint into checkpoint_dict checkpoint_dict = {'agent': checkpoint} agent_config = { 'model_metadata': model_metadata, ConfigParams.CAR_CTRL_CONFIG.value: { ConfigParams.LINK_NAME_LIST.value: [], ConfigParams.VELOCITY_LIST.value: {}, ConfigParams.STEERING_LIST.value: {}, ConfigParams.CHANGE_START.value: None, ConfigParams.ALT_DIR.value: None, ConfigParams.MODEL_METADATA.value: model_metadata, ConfigParams.REWARD.value: None, ConfigParams.AGENT_NAME.value: 'racecar' } } agent_list = list() agent_list.append(create_training_agent(agent_config)) sm_hyperparams_dict = {} graph_manager, _ = get_graph_manager(hp_dict=sm_hyperparams_dict, agent_list=agent_list, run_phase_subject=None) ds_params_instance = S3BotoDataStoreParameters( checkpoint_dict=checkpoint_dict) graph_manager.data_store = S3BotoDataStore(ds_params_instance, graph_manager, ignore_lock=True) task_parameters = TaskParameters() task_parameters.checkpoint_restore_path = LOCAL_MODEL_DIR _validate(graph_manager=graph_manager, task_parameters=task_parameters, transitions=transitions, s3_bucket=s3_bucket, s3_prefix=s3_prefix, aws_region=aws_region)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--markov-preset-file', help="(string) Name of a preset file to run in Markov's preset directory.", type=str, default=os.environ.get("MARKOV_PRESET_FILE", "mars_presets.py")) parser.add_argument('-c', '--local_model_directory', help='(string) Path to a folder containing a checkpoint to restore the model from.', type=str, default=os.environ.get("LOCAL_MODEL_DIRECTORY", "./checkpoint")) parser.add_argument('-n', '--num_workers', help="(int) Number of workers for multi-process based agents, e.g. A3C", default=1, type=int) parser.add_argument('--model-s3-bucket', help='(string) S3 bucket where trained models are stored. It contains model checkpoints.', type=str, default=os.environ.get("MODEL_S3_BUCKET")) parser.add_argument('--model-s3-prefix', help='(string) S3 prefix where trained models are stored. It contains model checkpoints.', type=str, default=os.environ.get("MODEL_S3_PREFIX")) parser.add_argument('--aws-region', help='(string) AWS region', type=str, default=os.environ.get("ROS_AWS_REGION", "us-west-1")) parser.add_argument('--checkpoint-save-secs', help="(int) Time period in second between 2 checkpoints", type=int, default=600) parser.add_argument('--save-frozen-graph', help="(bool) True if we need to store the frozen graph", type=bool, default=True) args = parser.parse_args() if args.markov_preset_file: markov_path = imp.find_module("markov")[1] preset_location = os.path.join(markov_path, "presets", args.markov_preset_file) path_and_module = preset_location + ":graph_manager" graph_manager = short_dynamic_import(path_and_module, ignore_module_case=True) else: raise ValueError("Unable to determine preset file") # TODO: support other frameworks task_parameters = TaskParameters(framework_type=Frameworks.tensorflow, checkpoint_save_secs=args.checkpoint_save_secs) task_parameters.__dict__['checkpoint_save_dir'] = args.local_model_directory task_parameters.__dict__ = add_items_to_dict(task_parameters.__dict__, args.__dict__) # data_store_params_instance = S3BotoDataStoreParameters(bucket_name=args.model_s3_bucket, # s3_folder=args.model_s3_prefix, # checkpoint_dir=args.local_model_directory, # aws_region=args.aws_region) #data_store = S3BotoDataStore(data_store_params_instance) #if args.save_frozen_graph: # data_store.graph_manager = graph_manager #graph_manager.data_store_params = data_store_params_instance #graph_manager.data_store = data_store graph_manager.should_stop = should_stop_training_based_on_evaluation start_graph(graph_manager=graph_manager, task_parameters=task_parameters)
def main(): screen.set_use_colors(False) parser = argparse.ArgumentParser() parser.add_argument('-pk', '--preset_s3_key', help="(string) Name of a preset to download from S3", type=str, required=False) parser.add_argument( '-ek', '--environment_s3_key', help="(string) Name of an environment file to download from S3", type=str, required=False) parser.add_argument('--model_metadata_s3_key', help="(string) Model Metadata File S3 Key", type=str, required=False) parser.add_argument( '-c', '--checkpoint_dir', help= '(string) Path to a folder containing a checkpoint to write the model to.', type=str, default='./checkpoint') parser.add_argument( '--pretrained_checkpoint_dir', help='(string) Path to a folder for downloading a pre-trained model', type=str, default=PRETRAINED_MODEL_DIR) parser.add_argument('--s3_bucket', help='(string) S3 bucket', type=str, default=os.environ.get( "SAGEMAKER_SHARED_S3_BUCKET_PATH", "gsaur-test")) parser.add_argument('--s3_prefix', help='(string) S3 prefix', type=str, default='sagemaker') parser.add_argument('--framework', help='(string) tensorflow or mxnet', type=str, default='tensorflow') parser.add_argument('--pretrained_s3_bucket', help='(string) S3 bucket for pre-trained model', type=str) parser.add_argument('--pretrained_s3_prefix', help='(string) S3 prefix for pre-trained model', type=str, default='sagemaker') parser.add_argument('--aws_region', help='(string) AWS region', type=str, default=os.environ.get("AWS_REGION", "us-east-1")) args, _ = parser.parse_known_args() s3_client = S3Client(region_name=args.aws_region, max_retry_attempts=0) # download model metadata # TODO: replace 'agent' with name of each agent model_metadata_download = ModelMetadata( bucket=args.s3_bucket, s3_key=args.model_metadata_s3_key, region_name=args.aws_region, local_path=MODEL_METADATA_LOCAL_PATH_FORMAT.format('agent')) model_metadata_info = model_metadata_download.get_model_metadata_info() network_type = model_metadata_info[ModelMetadataKeys.NEURAL_NETWORK.value] version = model_metadata_info[ModelMetadataKeys.VERSION.value] # upload model metadata model_metadata_upload = ModelMetadata( bucket=args.s3_bucket, s3_key=get_s3_key(args.s3_prefix, MODEL_METADATA_S3_POSTFIX), region_name=args.aws_region, local_path=MODEL_METADATA_LOCAL_PATH_FORMAT.format('agent')) model_metadata_upload.persist( s3_kms_extra_args=utils.get_s3_kms_extra_args()) shutil.copy2(model_metadata_download.local_path, SM_MODEL_OUTPUT_DIR) success_custom_preset = False if args.preset_s3_key: preset_local_path = "./markov/presets/preset.py" try: s3_client.download_file(bucket=args.s3_bucket, s3_key=args.preset_s3_key, local_path=preset_local_path) success_custom_preset = True except botocore.exceptions.ClientError: pass if not success_custom_preset: logger.info( "Could not download the preset file. Using the default DeepRacer preset." ) else: preset_location = "markov.presets.preset:graph_manager" graph_manager = short_dynamic_import(preset_location, ignore_module_case=True) s3_client.upload_file( bucket=args.s3_bucket, s3_key=os.path.normpath("%s/presets/preset.py" % args.s3_prefix), local_path=preset_local_path, s3_kms_extra_args=utils.get_s3_kms_extra_args()) if success_custom_preset: logger.info("Using preset: %s" % args.preset_s3_key) if not success_custom_preset: params_blob = os.environ.get('SM_TRAINING_ENV', '') if params_blob: params = json.loads(params_blob) sm_hyperparams_dict = params["hyperparameters"] else: sm_hyperparams_dict = {} #! TODO each agent should have own config agent_config = { 'model_metadata': model_metadata_download, ConfigParams.CAR_CTRL_CONFIG.value: { ConfigParams.LINK_NAME_LIST.value: [], ConfigParams.VELOCITY_LIST.value: {}, ConfigParams.STEERING_LIST.value: {}, ConfigParams.CHANGE_START.value: None, ConfigParams.ALT_DIR.value: None, ConfigParams.MODEL_METADATA.value: model_metadata_download, ConfigParams.REWARD.value: None, ConfigParams.AGENT_NAME.value: 'racecar' } } agent_list = list() agent_list.append(create_training_agent(agent_config)) graph_manager, robomaker_hyperparams_json = get_graph_manager( hp_dict=sm_hyperparams_dict, agent_list=agent_list, run_phase_subject=None, run_type=str(RunType.TRAINER)) # Upload hyperparameters to SageMaker shared s3 bucket hyperparameters = Hyperparameters(bucket=args.s3_bucket, s3_key=get_s3_key( args.s3_prefix, HYPERPARAMETER_S3_POSTFIX), region_name=args.aws_region) hyperparameters.persist( hyperparams_json=robomaker_hyperparams_json, s3_kms_extra_args=utils.get_s3_kms_extra_args()) # Attach sample collector to graph_manager only if sample count > 0 max_sample_count = int(sm_hyperparams_dict.get("max_sample_count", 0)) if max_sample_count > 0: sample_collector = SampleCollector( bucket=args.s3_bucket, s3_prefix=args.s3_prefix, region_name=args.aws_region, max_sample_count=max_sample_count, sampling_frequency=int( sm_hyperparams_dict.get("sampling_frequency", 1))) graph_manager.sample_collector = sample_collector # persist IP config from sagemaker to s3 ip_config = IpConfig(bucket=args.s3_bucket, s3_prefix=args.s3_prefix, region_name=args.aws_region) ip_config.persist(s3_kms_extra_args=utils.get_s3_kms_extra_args()) training_algorithm = model_metadata_download.training_algorithm output_head_format = FROZEN_HEAD_OUTPUT_GRAPH_FORMAT_MAPPING[ training_algorithm] use_pretrained_model = args.pretrained_s3_bucket and args.pretrained_s3_prefix # Handle backward compatibility if use_pretrained_model: # checkpoint s3 instance for pretrained model # TODO: replace 'agent' for multiagent training checkpoint = Checkpoint(bucket=args.pretrained_s3_bucket, s3_prefix=args.pretrained_s3_prefix, region_name=args.aws_region, agent_name='agent', checkpoint_dir=args.pretrained_checkpoint_dir, output_head_format=output_head_format) # make coach checkpoint compatible if version < SIMAPP_VERSION_2 and not checkpoint.rl_coach_checkpoint.is_compatible( ): checkpoint.rl_coach_checkpoint.make_compatible( checkpoint.syncfile_ready) # get best model checkpoint string model_checkpoint_name = checkpoint.deepracer_checkpoint_json.get_deepracer_best_checkpoint( ) # Select the best checkpoint model by uploading rl coach .coach_checkpoint file checkpoint.rl_coach_checkpoint.update( model_checkpoint_name=model_checkpoint_name, s3_kms_extra_args=utils.get_s3_kms_extra_args()) # add checkpoint into checkpoint_dict checkpoint_dict = {'agent': checkpoint} # load pretrained model ds_params_instance_pretrained = S3BotoDataStoreParameters( checkpoint_dict=checkpoint_dict) data_store_pretrained = S3BotoDataStore(ds_params_instance_pretrained, graph_manager, True) data_store_pretrained.load_from_store() memory_backend_params = DeepRacerRedisPubSubMemoryBackendParameters( redis_address="localhost", redis_port=6379, run_type=str(RunType.TRAINER), channel=args.s3_prefix, network_type=network_type) graph_manager.memory_backend_params = memory_backend_params # checkpoint s3 instance for training model checkpoint = Checkpoint(bucket=args.s3_bucket, s3_prefix=args.s3_prefix, region_name=args.aws_region, agent_name='agent', checkpoint_dir=args.checkpoint_dir, output_head_format=output_head_format) checkpoint_dict = {'agent': checkpoint} ds_params_instance = S3BotoDataStoreParameters( checkpoint_dict=checkpoint_dict) graph_manager.data_store_params = ds_params_instance graph_manager.data_store = S3BotoDataStore(ds_params_instance, graph_manager) task_parameters = TaskParameters() task_parameters.experiment_path = SM_MODEL_OUTPUT_DIR task_parameters.checkpoint_save_secs = 20 if use_pretrained_model: task_parameters.checkpoint_restore_path = args.pretrained_checkpoint_dir task_parameters.checkpoint_save_dir = args.checkpoint_dir training_worker( graph_manager=graph_manager, task_parameters=task_parameters, user_batch_size=json.loads(robomaker_hyperparams_json)["batch_size"], user_episode_per_rollout=json.loads( robomaker_hyperparams_json)["num_episodes_between_training"], training_algorithm=training_algorithm)
def main(): screen.set_use_colors(False) parser = argparse.ArgumentParser() parser.add_argument( '-c', '--checkpoint_dir', help= '(string) Path to a folder containing a checkpoint to restore the model from.', type=str, default='./checkpoint') parser.add_argument('--s3_bucket', help='(string) S3 bucket', type=str, default=rospy.get_param("SAGEMAKER_SHARED_S3_BUCKET", "gsaur-test")) parser.add_argument('--s3_prefix', help='(string) S3 prefix', type=str, default=rospy.get_param("SAGEMAKER_SHARED_S3_PREFIX", "sagemaker")) parser.add_argument( '--num_workers', help="(int) The number of workers started in this pool", type=int, default=int(rospy.get_param("NUM_WORKERS", 1))) parser.add_argument('--rollout_idx', help="(int) The index of current rollout worker", type=int, default=0) parser.add_argument('-r', '--redis_ip', help="(string) IP or host for the redis server", default='localhost', type=str) parser.add_argument('-rp', '--redis_port', help="(int) Port of the redis server", default=6379, type=int) parser.add_argument('--aws_region', help='(string) AWS region', type=str, default=rospy.get_param("AWS_REGION", "us-east-1")) parser.add_argument('--reward_file_s3_key', help='(string) Reward File S3 Key', type=str, default=rospy.get_param("REWARD_FILE_S3_KEY", None)) parser.add_argument('--model_metadata_s3_key', help='(string) Model Metadata File S3 Key', type=str, default=rospy.get_param("MODEL_METADATA_FILE_S3_KEY", None)) # For training job, reset is not allowed. penalty_seconds, off_track_penalty, and # collision_penalty will all be 0 be default parser.add_argument('--number_of_resets', help='(integer) Number of resets', type=int, default=int(rospy.get_param("NUMBER_OF_RESETS", 0))) parser.add_argument('--penalty_seconds', help='(float) penalty second', type=float, default=float(rospy.get_param("PENALTY_SECONDS", 0.0))) parser.add_argument('--job_type', help='(string) job type', type=str, default=rospy.get_param("JOB_TYPE", "TRAINING")) parser.add_argument('--is_continuous', help='(boolean) is continous after lap completion', type=bool, default=utils.str2bool( rospy.get_param("IS_CONTINUOUS", False))) parser.add_argument('--race_type', help='(string) Race type', type=str, default=rospy.get_param("RACE_TYPE", "TIME_TRIAL")) parser.add_argument('--off_track_penalty', help='(float) off track penalty second', type=float, default=float(rospy.get_param("OFF_TRACK_PENALTY", 0.0))) parser.add_argument('--collision_penalty', help='(float) collision penalty second', type=float, default=float(rospy.get_param("COLLISION_PENALTY", 0.0))) args = parser.parse_args() s3_client = SageS3Client(bucket=args.s3_bucket, s3_prefix=args.s3_prefix, aws_region=args.aws_region) logger.info("S3 bucket: %s", args.s3_bucket) logger.info("S3 prefix: %s", args.s3_prefix) # Load the model metadata model_metadata_local_path = os.path.join(CUSTOM_FILES_PATH, 'model_metadata.json') utils.load_model_metadata(s3_client, args.model_metadata_s3_key, model_metadata_local_path) # Download and import reward function if not args.reward_file_s3_key: log_and_exit( "Reward function code S3 key not available for S3 bucket {} and prefix {}" .format(args.s3_bucket, args.s3_prefix), SIMAPP_SIMULATION_WORKER_EXCEPTION, SIMAPP_EVENT_ERROR_CODE_500) download_customer_reward_function(s3_client, args.reward_file_s3_key) try: from custom_files.customer_reward_function import reward_function except Exception as e: log_and_exit("Failed to import user's reward_function: {}".format(e), SIMAPP_SIMULATION_WORKER_EXCEPTION, SIMAPP_EVENT_ERROR_CODE_400) # Instantiate Cameras configure_camera(namespaces=['racecar']) preset_file_success, _ = download_custom_files_if_present( s3_client, args.s3_prefix) #! TODO each agent should have own config _, _, version = utils_parse_model_metadata.parse_model_metadata( model_metadata_local_path) agent_config = { 'model_metadata': model_metadata_local_path, ConfigParams.CAR_CTRL_CONFIG.value: { ConfigParams.LINK_NAME_LIST.value: LINK_NAMES, ConfigParams.VELOCITY_LIST.value: VELOCITY_TOPICS, ConfigParams.STEERING_LIST.value: STEERING_TOPICS, ConfigParams.CHANGE_START.value: utils.str2bool(rospy.get_param('CHANGE_START_POSITION', True)), ConfigParams.ALT_DIR.value: utils.str2bool( rospy.get_param('ALTERNATE_DRIVING_DIRECTION', False)), ConfigParams.ACTION_SPACE_PATH.value: 'custom_files/model_metadata.json', ConfigParams.REWARD.value: reward_function, ConfigParams.AGENT_NAME.value: 'racecar', ConfigParams.VERSION.value: version, ConfigParams.NUMBER_OF_RESETS.value: args.number_of_resets, ConfigParams.PENALTY_SECONDS.value: args.penalty_seconds, ConfigParams.NUMBER_OF_TRIALS.value: None, ConfigParams.IS_CONTINUOUS.value: args.is_continuous, ConfigParams.RACE_TYPE.value: args.race_type, ConfigParams.COLLISION_PENALTY.value: args.collision_penalty, ConfigParams.OFF_TRACK_PENALTY.value: args.off_track_penalty } } #! TODO each agent should have own s3 bucket step_metrics_prefix = rospy.get_param('SAGEMAKER_SHARED_S3_PREFIX') if args.num_workers > 1: step_metrics_prefix = os.path.join(step_metrics_prefix, str(args.rollout_idx)) metrics_s3_config = { MetricsS3Keys.METRICS_BUCKET.value: rospy.get_param('METRICS_S3_BUCKET'), MetricsS3Keys.METRICS_KEY.value: rospy.get_param('METRICS_S3_OBJECT_KEY'), MetricsS3Keys.REGION.value: rospy.get_param('AWS_REGION') } metrics_s3_model_cfg = { MetricsS3Keys.METRICS_BUCKET.value: args.s3_bucket, MetricsS3Keys.METRICS_KEY.value: os.path.join(args.s3_prefix, DEEPRACER_CHKPNT_KEY_SUFFIX), MetricsS3Keys.REGION.value: args.aws_region } run_phase_subject = RunPhaseSubject() agent_list = list() agent_list.append( create_rollout_agent( agent_config, TrainingMetrics(agent_name='agent', s3_dict_metrics=metrics_s3_config, s3_dict_model=metrics_s3_model_cfg, ckpnt_dir=args.checkpoint_dir, run_phase_sink=run_phase_subject, use_model_picker=(args.rollout_idx == 0)), run_phase_subject)) agent_list.append(create_obstacles_agent()) agent_list.append(create_bot_cars_agent()) # ROS service to indicate all the robomaker markov packages are ready for consumption signal_robomaker_markov_package_ready() PhaseObserver('/agent/training_phase', run_phase_subject) aws_region = rospy.get_param('AWS_REGION', args.aws_region) simtrace_s3_bucket = rospy.get_param('SIMTRACE_S3_BUCKET', None) mp4_s3_bucket = rospy.get_param('MP4_S3_BUCKET', None) if args.rollout_idx == 0 else None if simtrace_s3_bucket: simtrace_s3_object_prefix = rospy.get_param('SIMTRACE_S3_PREFIX') if args.num_workers > 1: simtrace_s3_object_prefix = os.path.join(simtrace_s3_object_prefix, str(args.rollout_idx)) if mp4_s3_bucket: mp4_s3_object_prefix = rospy.get_param('MP4_S3_OBJECT_PREFIX') s3_writer_job_info = [] if simtrace_s3_bucket: s3_writer_job_info.append( IterationData( 'simtrace', simtrace_s3_bucket, simtrace_s3_object_prefix, aws_region, os.path.join( ITERATION_DATA_LOCAL_FILE_PATH, 'agent', IterationDataLocalFileNames.SIM_TRACE_TRAINING_LOCAL_FILE. value))) if mp4_s3_bucket: s3_writer_job_info.extend([ IterationData( 'pip', mp4_s3_bucket, mp4_s3_object_prefix, aws_region, os.path.join( ITERATION_DATA_LOCAL_FILE_PATH, 'agent', IterationDataLocalFileNames. CAMERA_PIP_MP4_VALIDATION_LOCAL_PATH.value)), IterationData( '45degree', mp4_s3_bucket, mp4_s3_object_prefix, aws_region, os.path.join( ITERATION_DATA_LOCAL_FILE_PATH, 'agent', IterationDataLocalFileNames. CAMERA_45DEGREE_MP4_VALIDATION_LOCAL_PATH.value)), IterationData( 'topview', mp4_s3_bucket, mp4_s3_object_prefix, aws_region, os.path.join( ITERATION_DATA_LOCAL_FILE_PATH, 'agent', IterationDataLocalFileNames. CAMERA_TOPVIEW_MP4_VALIDATION_LOCAL_PATH.value)) ]) s3_writer = S3Writer(job_info=s3_writer_job_info) redis_ip = s3_client.get_ip() logger.info("Received IP from SageMaker successfully: %s", redis_ip) # Download hyperparameters from SageMaker hyperparameters_file_success = False hyperparams_s3_key = os.path.normpath(args.s3_prefix + "/ip/hyperparameters.json") hyperparameters_file_success = s3_client.download_file( s3_key=hyperparams_s3_key, local_path="hyperparameters.json") sm_hyperparams_dict = {} if hyperparameters_file_success: logger.info("Received Sagemaker hyperparameters successfully!") with open("hyperparameters.json") as filepointer: sm_hyperparams_dict = json.load(filepointer) else: logger.info("SageMaker hyperparameters not found.") enable_domain_randomization = utils.str2bool( rospy.get_param('ENABLE_DOMAIN_RANDOMIZATION', False)) if preset_file_success: preset_location = os.path.join(CUSTOM_FILES_PATH, "preset.py") preset_location += ":graph_manager" graph_manager = short_dynamic_import(preset_location, ignore_module_case=True) logger.info("Using custom preset file!") else: graph_manager, _ = get_graph_manager( hp_dict=sm_hyperparams_dict, agent_list=agent_list, run_phase_subject=run_phase_subject, enable_domain_randomization=enable_domain_randomization) # If num_episodes_between_training is smaller than num_workers then cancel worker early. episode_steps_per_rollout = graph_manager.agent_params.algorithm.num_consecutive_playing_steps.num_steps # Reduce number of workers if allocated more than num_episodes_between_training if args.num_workers > episode_steps_per_rollout: logger.info( "Excess worker allocated. Reducing from {} to {}...".format( args.num_workers, episode_steps_per_rollout)) args.num_workers = episode_steps_per_rollout if args.rollout_idx >= episode_steps_per_rollout or args.rollout_idx >= args.num_workers: err_msg_format = "Exiting excess worker..." err_msg_format += "(rollout_idx[{}] >= num_workers[{}] or num_episodes_between_training[{}])" logger.info( err_msg_format.format(args.rollout_idx, args.num_workers, episode_steps_per_rollout)) # Close the down the job utils.cancel_simulation_job( os.environ.get('AWS_ROBOMAKER_SIMULATION_JOB_ARN'), rospy.get_param('AWS_REGION')) memory_backend_params = DeepRacerRedisPubSubMemoryBackendParameters( redis_address=redis_ip, redis_port=6379, run_type=str(RunType.ROLLOUT_WORKER), channel=args.s3_prefix, num_workers=args.num_workers, rollout_idx=args.rollout_idx) graph_manager.memory_backend_params = memory_backend_params ds_params_instance = S3BotoDataStoreParameters( aws_region=args.aws_region, bucket_names={'agent': args.s3_bucket}, base_checkpoint_dir=args.checkpoint_dir, s3_folders={'agent': args.s3_prefix}) graph_manager.data_store = S3BotoDataStore(ds_params_instance, graph_manager) task_parameters = TaskParameters() task_parameters.checkpoint_restore_path = args.checkpoint_dir rollout_worker(graph_manager=graph_manager, num_workers=args.num_workers, rollout_idx=args.rollout_idx, task_parameters=task_parameters, s3_writer=s3_writer)
def training_worker(graph_manager, checkpoint_dir, use_pretrained_model, framework): """ restore a checkpoint then perform rollouts using the restored model """ # initialize graph task_parameters = TaskParameters() task_parameters.__dict__['checkpoint_save_dir'] = checkpoint_dir task_parameters.__dict__['checkpoint_save_secs'] = 20 task_parameters.__dict__['experiment_path'] = INTERMEDIATE_FOLDER if framework.lower() == "mxnet": task_parameters.framework_type = Frameworks.mxnet if hasattr(graph_manager, 'agent_params'): for network_parameters in graph_manager.agent_params.network_wrappers.values(): network_parameters.framework = Frameworks.mxnet elif hasattr(graph_manager, 'agents_params'): for ap in graph_manager.agents_params: for network_parameters in ap.network_wrappers.values(): network_parameters.framework = Frameworks.mxnet if use_pretrained_model: task_parameters.__dict__['checkpoint_restore_dir'] = PRETRAINED_MODEL_DIR graph_manager.create_graph(task_parameters) # save randomly initialized graph graph_manager.save_checkpoint() # training loop steps = 0 graph_manager.setup_memory_backend() # To handle SIGTERM door_man = DoorMan() try: while (steps < graph_manager.improve_steps.num_steps): graph_manager.phase = core_types.RunPhase.TRAIN graph_manager.fetch_from_worker(graph_manager.agent_params.algorithm.num_consecutive_playing_steps) graph_manager.phase = core_types.RunPhase.UNDEFINED if graph_manager.should_train(): steps += graph_manager.agent_params.algorithm.num_consecutive_playing_steps.num_steps graph_manager.phase = core_types.RunPhase.TRAIN graph_manager.train() graph_manager.phase = core_types.RunPhase.UNDEFINED if graph_manager.agent_params.algorithm.distributed_coach_synchronization_type == DistributedCoachSynchronizationType.SYNC: graph_manager.save_checkpoint() else: graph_manager.occasionally_save_checkpoint() if door_man.terminate_now: "Received SIGTERM. Checkpointing before exiting." graph_manager.save_checkpoint() break except Exception as e: raise RuntimeError("An error occured while training: %s" % e) finally: print("Terminating training worker") graph_manager.data_store.upload_finished_file()
def main(): screen.set_use_colors(False) logger.info("src/training_worker.py - INIZIO MAIN") parser = argparse.ArgumentParser() parser.add_argument('-pk', '--preset_s3_key', help="(string) Name of a preset to download from S3", type=str, required=False) parser.add_argument( '-ek', '--environment_s3_key', help="(string) Name of an environment file to download from S3", type=str, required=False) parser.add_argument('--model_metadata_s3_key', help="(string) Model Metadata File S3 Key", type=str, required=False) parser.add_argument( '-c', '--checkpoint-dir', help= '(string) Path to a folder containing a checkpoint to write the model to.', type=str, default='./checkpoint') parser.add_argument( '--pretrained-checkpoint-dir', help='(string) Path to a folder for downloading a pre-trained model', type=str, default=PRETRAINED_MODEL_DIR) parser.add_argument('--s3_bucket', help='(string) S3 bucket', type=str, default=os.environ.get( "SAGEMAKER_SHARED_S3_BUCKET_PATH", "gsaur-test")) parser.add_argument('--s3_prefix', help='(string) S3 prefix', type=str, default='sagemaker') parser.add_argument('--s3_endpoint_url', help='(string) S3 endpoint URL', type=str, default=os.environ.get("S3_ENDPOINT_URL", None)) parser.add_argument('--framework', help='(string) tensorflow or mxnet', type=str, default='tensorflow') parser.add_argument('--pretrained_s3_bucket', help='(string) S3 bucket for pre-trained model', type=str) parser.add_argument('--pretrained_s3_prefix', help='(string) S3 prefix for pre-trained model', type=str, default='sagemaker') parser.add_argument('--aws_region', help='(string) AWS region', type=str, default=os.environ.get("AWS_REGION", "us-east-1")) args, _ = parser.parse_known_args() logger.info("S3 bucket: %s \n S3 prefix: %s \n S3 endpoint URL: %s", args.s3_bucket, args.s3_prefix, args.s3_endpoint_url) s3_client = SageS3Client(bucket=args.s3_bucket, s3_prefix=args.s3_prefix, aws_region=args.aws_region, s3_endpoint_url=args.s3_endpoint_url) # Load the model metadata model_metadata_local_path = os.path.join(CUSTOM_FILES_PATH, 'model_metadata.json') utils.load_model_metadata(s3_client, args.model_metadata_s3_key, model_metadata_local_path) s3_client.upload_file( os.path.normpath("%s/model/model_metadata.json" % args.s3_prefix), model_metadata_local_path) shutil.copy2(model_metadata_local_path, SM_MODEL_OUTPUT_DIR) success_custom_preset = False if args.preset_s3_key: preset_local_path = "./markov/presets/preset.py" success_custom_preset = s3_client.download_file( s3_key=args.preset_s3_key, local_path=preset_local_path) if not success_custom_preset: logger.info( "Could not download the preset file. Using the default DeepRacer preset." ) else: preset_location = "markov.presets.preset:graph_manager" graph_manager = short_dynamic_import(preset_location, ignore_module_case=True) success_custom_preset = s3_client.upload_file( s3_key=os.path.normpath("%s/presets/preset.py" % args.s3_prefix), local_path=preset_local_path) if success_custom_preset: logger.info("Using preset: %s" % args.preset_s3_key) if not success_custom_preset: params_blob = os.environ.get('SM_TRAINING_ENV', '') if params_blob: params = json.loads(params_blob) sm_hyperparams_dict = params["hyperparameters"] else: sm_hyperparams_dict = {} #configurazione agente: metadati del modello impostati dall'utente (angolo di sterzo + velocità) + nome #! TODO each agent should have own config agent_config = { 'model_metadata': model_metadata_local_path, ConfigParams.CAR_CTRL_CONFIG.value: { ConfigParams.LINK_NAME_LIST.value: [], ConfigParams.VELOCITY_LIST.value: {}, ConfigParams.STEERING_LIST.value: {}, ConfigParams.CHANGE_START.value: None, ConfigParams.ALT_DIR.value: None, ConfigParams.ACTION_SPACE_PATH.value: 'custom_files/model_metadata.json', ConfigParams.REWARD.value: None, ConfigParams.AGENT_NAME.value: 'racecar' } } agent_list = list() agent_list.append(create_training_agent(agent_config)) logger.info( "src/training_worker.py - ora chiamo la get_graph_manager, che recupera l'agente" ) graph_manager, robomaker_hyperparams_json = get_graph_manager( hp_dict=sm_hyperparams_dict, agent_list=agent_list, run_phase_subject=None) logger.info("src/training_worker.py - ho l'agente") s3_client.upload_hyperparameters(robomaker_hyperparams_json) logger.info("Uploaded hyperparameters.json to S3") # Attach sample collector to graph_manager only if sample count > 0 max_sample_count = int(sm_hyperparams_dict.get("max_sample_count", 0)) if max_sample_count > 0: sample_collector = SampleCollector( s3_client=s3_client, s3_prefix=args.s3_prefix, max_sample_count=max_sample_count, sampling_frequency=int( sm_hyperparams_dict.get("sampling_frequency", 1))) graph_manager.sample_collector = sample_collector host_ip_address = utils.get_ip_from_host() s3_client.write_ip_config(host_ip_address) logger.info("Uploaded IP address information to S3: %s" % host_ip_address) use_pretrained_model = args.pretrained_s3_bucket and args.pretrained_s3_prefix # Handle backward compatibility _, network_type, version = parse_model_metadata(model_metadata_local_path) if use_pretrained_model: if float(version) < float(SIMAPP_VERSION) and \ not utils.has_current_ckpnt_name(args.pretrained_s3_bucket, args.pretrained_s3_prefix, args.aws_region, args.s3_endpoint_url): utils.make_compatible(args.pretrained_s3_bucket, args.pretrained_s3_prefix, args.aws_region, SyncFiles.TRAINER_READY.value) #Select the optimal model for the starting weights utils.do_model_selection(s3_bucket=args.s3_bucket, s3_prefix=args.s3_prefix, region=args.aws_region, s3_endpoint_url=args.s3_endpoint_url) ds_params_instance_pretrained = S3BotoDataStoreParameters( aws_region=args.aws_region, bucket_names={'agent': args.pretrained_s3_bucket}, base_checkpoint_dir=args.pretrained_checkpoint_dir, s3_folders={'agent': args.pretrained_s3_prefix}, s3_endpoint_url=args.s3_endpoint_url) data_store_pretrained = S3BotoDataStore(ds_params_instance_pretrained, graph_manager, True) data_store_pretrained.load_from_store() memory_backend_params = DeepRacerRedisPubSubMemoryBackendParameters( redis_address="localhost", redis_port=6379, run_type=str(RunType.TRAINER), channel=args.s3_prefix, network_type=network_type) graph_manager.memory_backend_params = memory_backend_params ds_params_instance = S3BotoDataStoreParameters( aws_region=args.aws_region, bucket_names={'agent': args.s3_bucket}, base_checkpoint_dir=args.checkpoint_dir, s3_folders={'agent': args.s3_prefix}, s3_endpoint_url=args.s3_endpoint_url) graph_manager.data_store_params = ds_params_instance graph_manager.data_store = S3BotoDataStore(ds_params_instance, graph_manager) task_parameters = TaskParameters() task_parameters.experiment_path = SM_MODEL_OUTPUT_DIR task_parameters.checkpoint_save_secs = 20 if use_pretrained_model: task_parameters.checkpoint_restore_path = args.pretrained_checkpoint_dir task_parameters.checkpoint_save_dir = args.checkpoint_dir #funzione riga 48 #prende in input: # - il grafo (creato con la get_graph_manager) # - robomaker_hyperparams_json (ritornato dalla get_graph_manager) training_worker( graph_manager=graph_manager, task_parameters=task_parameters, user_batch_size=json.loads(robomaker_hyperparams_json)["batch_size"], user_episode_per_rollout=json.loads( robomaker_hyperparams_json)["num_episodes_between_training"])