def get_modular_sarsa_trainer_exporter( self, environment, reward_shape, dueling, categorical, quantile, use_gpu=False, use_all_avail_gpus=False, clip_grad_norm=None, ): parameters = self.get_sarsa_parameters(environment, reward_shape, dueling, categorical, quantile, clip_grad_norm) trainer = self.get_modular_sarsa_trainer_reward_boost( environment, reward_shape, dueling=dueling, categorical=categorical, quantile=quantile, use_gpu=use_gpu, use_all_avail_gpus=use_all_avail_gpus, clip_grad_norm=clip_grad_norm, ) feature_extractor = PredictorFeatureExtractor( state_normalization_parameters=environment.normalization) output_transformer = DiscreteActionOutputTransformer( parameters.actions) exporter = DQNExporter(trainer.q_network, feature_extractor, output_transformer) return (trainer, exporter)
def main(params): # Set minibatch size based on # of devices being used to train params["training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"]) rl_parameters = RLParameters(**params["rl"]) training_parameters = TrainingParameters(**params["training"]) rainbow_parameters = RainbowDQNParameters(**params["rainbow"]) model_params = DiscreteActionModelParameters( actions=params["actions"], rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters, ) state_normalization = BaseWorkflow.read_norm_file( params["state_norm_data_path"]) writer = SummaryWriter(log_dir=params["model_output_path"]) logger.info("TensorBoard logging location is: {}".format(writer.log_dir)) preprocess_handler = DqnPreprocessHandler( Preprocessor(state_normalization, False), np.array(model_params.actions), PandasSparseToDenseProcessor(), ) workflow = DqnWorkflow( model_params, preprocess_handler, state_normalization, params["use_gpu"], params["use_all_avail_gpus"], ) train_dataset = JSONDatasetReader( params["training_data_path"], batch_size=training_parameters.minibatch_size) eval_dataset = JSONDatasetReader(params["eval_data_path"], batch_size=16) with summary_writer_context(writer): workflow.train_network(train_dataset, eval_dataset, int(params["epochs"])) exporter = DQNExporter( workflow.trainer.q_network, PredictorFeatureExtractor( state_normalization_parameters=state_normalization), DiscreteActionOutputTransformer(model_params.actions), ) return export_trainer_and_predictor(workflow.trainer, params["model_output_path"], exporter=exporter) # noqa
def single_process_main(gpu_index, *args): params = args[0] # Set minibatch size based on # of devices being used to train params["training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"] ) action_names = params["actions"] rl_parameters = RLParameters(**params["rl"]) training_parameters = TrainingParameters(**params["training"]) rainbow_parameters = RainbowDQNParameters(**params["rainbow"]) model_params = DiscreteActionModelParameters( actions=action_names, rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters, ) state_normalization = BaseWorkflow.read_norm_file(params["state_norm_data_path"]) writer = SummaryWriter(log_dir=params["model_output_path"]) logger.info("TensorBoard logging location is: {}".format(writer.log_dir)) if params["use_all_avail_gpus"]: BaseWorkflow.init_multiprocessing( int(params["num_processes_per_node"]), int(params["num_nodes"]), int(params["node_index"]), gpu_index, params["init_method"], ) workflow = DqnWorkflow( model_params, state_normalization, params["use_gpu"], params["use_all_avail_gpus"], ) sorted_features, _ = sort_features_by_normalization(state_normalization) preprocess_handler = DiscreteDqnPreprocessHandler( action_names, PandasSparseToDenseProcessor(sorted_features) ) train_dataset = JSONDatasetReader( params["training_data_path"], batch_size=training_parameters.minibatch_size, preprocess_handler=preprocess_handler, ) eval_dataset = JSONDatasetReader( params["eval_data_path"], batch_size=training_parameters.minibatch_size, preprocess_handler=preprocess_handler, ) with summary_writer_context(writer): workflow.train_network(train_dataset, eval_dataset, int(params["epochs"])) exporter = DQNExporter( workflow.trainer.q_network, PredictorFeatureExtractor(state_normalization_parameters=state_normalization), DiscreteActionOutputTransformer(model_params.actions), ) if int(params["node_index"]) == 0 and gpu_index == 0: export_trainer_and_predictor( workflow.trainer, params["model_output_path"], exporter=exporter ) # noqa