def main(params): # Set minibatch size based on # of devices being used to train params["shared_training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"]) rl_parameters = RLParameters(**params["rl"]) training_parameters = DDPGTrainingParameters(**params["shared_training"]) actor_parameters = DDPGNetworkParameters(**params["actor_training"]) critic_parameters = DDPGNetworkParameters(**params["critic_training"]) model_params = DDPGModelParameters( rl=rl_parameters, shared_training=training_parameters, actor_training=actor_parameters, critic_training=critic_parameters, ) state_normalization = BaseWorkflow.read_norm_file( params["state_norm_data_path"]) action_normalization = BaseWorkflow.read_norm_file( params["action_norm_data_path"]) writer = SummaryWriter(log_dir=params["model_output_path"]) logger.info("TensorBoard logging location is: {}".format(writer.log_dir)) preprocess_handler = ContinuousPreprocessHandler( Preprocessor(state_normalization, False), Preprocessor(action_normalization, False), PandasSparseToDenseProcessor(), ) workflow = ContinuousWorkflow( model_params, preprocess_handler, state_normalization, action_normalization, params["use_gpu"], params["use_all_avail_gpus"], ) train_dataset = JSONDatasetReader( params["training_data_path"], batch_size=training_parameters.minibatch_size) eval_dataset = JSONDatasetReader(params["eval_data_path"], batch_size=16) with summary_writer_context(writer): workflow.train_network(train_dataset, eval_dataset, int(params["epochs"])) return export_trainer_and_predictor( workflow.trainer, params["model_output_path"], exporter=_get_actor_exporter( trainer=workflow.trainer, state_normalization=state_normalization, action_normalization=action_normalization, ), ) # noqa
def main(params): # Set minibatch size based on # of devices being used to train params["training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"]) rl_parameters = RLParameters(**params["rl"]) training_parameters = TrainingParameters(**params["training"]) rainbow_parameters = RainbowDQNParameters(**params["rainbow"]) model_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters) state_normalization = BaseWorkflow.read_norm_file( params["state_norm_data_path"]) action_normalization = BaseWorkflow.read_norm_file( params["action_norm_data_path"]) writer = SummaryWriter(log_dir=params["model_output_path"]) logger.info("TensorBoard logging location is: {}".format(writer.log_dir)) preprocess_handler = ParametricDqnPreprocessHandler( Preprocessor(state_normalization, False), Preprocessor(action_normalization, False), PandasSparseToDenseProcessor(), ) workflow = ParametricDqnWorkflow( model_params, preprocess_handler, state_normalization, action_normalization, params["use_gpu"], params["use_all_avail_gpus"], ) train_dataset = JSONDatasetReader( params["training_data_path"], batch_size=training_parameters.minibatch_size) eval_dataset = JSONDatasetReader(params["eval_data_path"], batch_size=16) with summary_writer_context(writer): workflow.train_network(train_dataset, eval_dataset, int(params["epochs"])) exporter = ParametricDQNExporter( workflow.trainer.q_network, PredictorFeatureExtractor( state_normalization_parameters=state_normalization, action_normalization_parameters=action_normalization, ), ParametricActionOutputTransformer(), ) return export_trainer_and_predictor(workflow.trainer, params["model_output_path"], exporter=exporter) # noqa
def single_process_main(gpu_index, *args): params = args[0] # Set minibatch size based on # of devices being used to train params["training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"] ) action_names = params["actions"] rl_parameters = RLParameters(**params["rl"]) training_parameters = TrainingParameters(**params["training"]) rainbow_parameters = RainbowDQNParameters(**params["rainbow"]) model_params = DiscreteActionModelParameters( actions=action_names, rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters, ) state_normalization = BaseWorkflow.read_norm_file(params["state_norm_data_path"]) writer = SummaryWriter(log_dir=params["model_output_path"]) logger.info("TensorBoard logging location is: {}".format(writer.log_dir)) if params["use_all_avail_gpus"]: BaseWorkflow.init_multiprocessing( int(params["num_processes_per_node"]), int(params["num_nodes"]), int(params["node_index"]), gpu_index, params["init_method"], ) workflow = DqnWorkflow( model_params, state_normalization, params["use_gpu"], params["use_all_avail_gpus"], ) sorted_features, _ = sort_features_by_normalization(state_normalization) preprocess_handler = DiscreteDqnPreprocessHandler( action_names, PandasSparseToDenseProcessor(sorted_features) ) train_dataset = JSONDatasetReader( params["training_data_path"], batch_size=training_parameters.minibatch_size, preprocess_handler=preprocess_handler, ) eval_dataset = JSONDatasetReader( params["eval_data_path"], batch_size=training_parameters.minibatch_size, preprocess_handler=preprocess_handler, ) with summary_writer_context(writer): workflow.train_network(train_dataset, eval_dataset, int(params["epochs"])) exporter = DQNExporter( workflow.trainer.q_network, PredictorFeatureExtractor(state_normalization_parameters=state_normalization), DiscreteActionOutputTransformer(model_params.actions), ) if int(params["node_index"]) == 0 and gpu_index == 0: export_trainer_and_predictor( workflow.trainer, params["model_output_path"], exporter=exporter ) # noqa
def single_process_main(gpu_index, *args): params = args[0] # Set minibatch size based on # of devices being used to train params["training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"]) rl_parameters = from_json(params["rl"], RLParameters) training_parameters = from_json(params["training"], TrainingParameters) rainbow_parameters = from_json(params["rainbow"], RainbowDQNParameters) model_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters) state_normalization = BaseWorkflow.read_norm_file( params["state_norm_data_path"]) action_normalization = BaseWorkflow.read_norm_file( params["action_norm_data_path"]) writer = SummaryWriter(log_dir=params["model_output_path"]) logger.info("TensorBoard logging location is: {}".format(writer.log_dir)) if params["use_all_avail_gpus"]: BaseWorkflow.init_multiprocessing( int(params["num_processes_per_node"]), int(params["num_nodes"]), int(params["node_index"]), gpu_index, params["init_method"], ) workflow = ParametricDqnWorkflow( model_params, state_normalization, action_normalization, params["use_gpu"], params["use_all_avail_gpus"], ) state_sorted_features, _ = sort_features_by_normalization( state_normalization) action_sorted_features, _ = sort_features_by_normalization( action_normalization) preprocess_handler = ParametricDqnPreprocessHandler( PandasSparseToDenseProcessor(state_sorted_features), PandasSparseToDenseProcessor(action_sorted_features), ) train_dataset = JSONDatasetReader( params["training_data_path"], batch_size=training_parameters.minibatch_size, preprocess_handler=preprocess_handler, ) eval_dataset = JSONDatasetReader(params["eval_data_path"], batch_size=16, preprocess_handler=preprocess_handler) with summary_writer_context(writer): workflow.train_network(train_dataset, eval_dataset, int(params["epochs"])) if int(params["node_index"]) == 0 and gpu_index == 0: workflow.save_models(params["model_output_path"])