def get_critic_exporter(self, trainer, environment): feature_extractor = PredictorFeatureExtractor( state_normalization_parameters=environment.normalization, action_normalization_parameters=environment.normalization_action, ) output_transformer = ParametricActionOutputTransformer() return ParametricDQNExporter(trainer.q1_network, feature_extractor, output_transformer)
def main(params): # Set minibatch size based on # of devices being used to train params["training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"]) rl_parameters = RLParameters(**params["rl"]) training_parameters = TrainingParameters(**params["training"]) rainbow_parameters = RainbowDQNParameters(**params["rainbow"]) model_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters) state_normalization = BaseWorkflow.read_norm_file( params["state_norm_data_path"]) action_normalization = BaseWorkflow.read_norm_file( params["action_norm_data_path"]) writer = SummaryWriter(log_dir=params["model_output_path"]) logger.info("TensorBoard logging location is: {}".format(writer.log_dir)) preprocess_handler = ParametricDqnPreprocessHandler( Preprocessor(state_normalization, False), Preprocessor(action_normalization, False), PandasSparseToDenseProcessor(), ) workflow = ParametricDqnWorkflow( model_params, preprocess_handler, state_normalization, action_normalization, params["use_gpu"], params["use_all_avail_gpus"], ) train_dataset = JSONDatasetReader( params["training_data_path"], batch_size=training_parameters.minibatch_size) eval_dataset = JSONDatasetReader(params["eval_data_path"], batch_size=16) with summary_writer_context(writer): workflow.train_network(train_dataset, eval_dataset, int(params["epochs"])) exporter = ParametricDQNExporter( workflow.trainer.q_network, PredictorFeatureExtractor( state_normalization_parameters=state_normalization, action_normalization_parameters=action_normalization, ), ParametricActionOutputTransformer(), ) return export_trainer_and_predictor(workflow.trainer, params["model_output_path"], exporter=exporter) # noqa
def test_create_net(self): q_value_blob = core.BlobReference("q_values") N = 10 # NOTE: We add `b` prefix here to match the return type of FetchBlob actions = [b"Q"] q_values = np.random.randn(N, len(actions)).astype(np.float32) workspace.FeedBlob(q_value_blob, q_values) ot = ParametricActionOutputTransformer() output_record = schema.Struct(("q_value", schema.Scalar(blob=q_value_blob))) nets = ot.create_net(output_record) workspace.RunNetOnce(nets.init_net) workspace.RunNetOnce(nets.net) external_outputs = {str(b) for b in nets.net.external_outputs} def fetch_blob(b): self.assertIn(b, external_outputs) return workspace.FetchBlob(b) feature_lengths = fetch_blob( "output/string_weighted_multi_categorical_features.lengths" ) feature_keys = fetch_blob( "output/string_weighted_multi_categorical_features.keys" ) values_lengths = fetch_blob( "output/string_weighted_multi_categorical_features.values.lengths" ) values_keys = fetch_blob( "output/string_weighted_multi_categorical_features.values.keys" ) values_values = fetch_blob( "output/string_weighted_multi_categorical_features.values.values" ) npt.assert_array_equal(np.ones(N, dtype=np.int32), feature_lengths) npt.assert_array_equal(np.zeros(N, dtype=np.int64), feature_keys) npt.assert_array_equal([len(actions)] * N, values_lengths) npt.assert_array_equal(np.array(actions * N, dtype=np.object), values_keys) npt.assert_array_equal(q_values.reshape(-1), values_values)
def get_modular_sarsa_trainer_exporter(self, environment, parameters=None, use_gpu=False, use_all_avail_gpus=False): parameters = parameters or self.get_sarsa_parameters() q_network = FullyConnectedParametricDQN( state_dim=get_num_output_features(environment.normalization), action_dim=get_num_output_features( environment.normalization_action), sizes=parameters.training.layers[1:-1], activations=parameters.training.activations[:-1], ) reward_network = FullyConnectedParametricDQN( state_dim=get_num_output_features(environment.normalization), action_dim=get_num_output_features( environment.normalization_action), sizes=parameters.training.layers[1:-1], activations=parameters.training.activations[:-1], ) if use_gpu: q_network = q_network.cuda() reward_network = reward_network.cuda() if use_all_avail_gpus: q_network = q_network.get_data_parallel_model() reward_network = reward_network.get_data_parallel_model() q_network_target = q_network.get_target_network() trainer = _ParametricDQNTrainer(q_network, q_network_target, reward_network, parameters) state_preprocessor = Preprocessor(environment.normalization, False, True) action_preprocessor = Preprocessor(environment.normalization_action, False, True) feature_extractor = PredictorFeatureExtractor( state_normalization_parameters=environment.normalization, action_normalization_parameters=environment.normalization_action, ) output_transformer = ParametricActionOutputTransformer() exporter = ParametricDQNExporter( q_network, feature_extractor, output_transformer, state_preprocessor, action_preprocessor, ) return (trainer, exporter)
def from_state_action_normalization( cls, dnn, state_normalization, action_normalization, state_preprocessor=None, action_preprocessor=None, **kwargs, ): return cls( dnn=dnn, feature_extractor=PredictorFeatureExtractor( state_normalization_parameters=state_normalization, action_normalization_parameters=action_normalization, ), output_transformer=ParametricActionOutputTransformer(), state_preprocessor=state_preprocessor, action_preprocessor=action_preprocessor, **kwargs, )
def single_process_main(gpu_index, *args): params = args[0] # Set minibatch size based on # of devices being used to train params["training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"]) rl_parameters = RLParameters(**params["rl"]) training_parameters = TrainingParameters(**params["training"]) rainbow_parameters = RainbowDQNParameters(**params["rainbow"]) model_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters) state_normalization = BaseWorkflow.read_norm_file( params["state_norm_data_path"]) action_normalization = BaseWorkflow.read_norm_file( params["action_norm_data_path"]) writer = SummaryWriter(log_dir=params["model_output_path"]) logger.info("TensorBoard logging location is: {}".format(writer.log_dir)) if params["use_all_avail_gpus"]: BaseWorkflow.init_multiprocessing( int(params["num_processes_per_node"]), int(params["num_nodes"]), int(params["node_index"]), gpu_index, params["init_method"], ) workflow = ParametricDqnWorkflow( model_params, state_normalization, action_normalization, params["use_gpu"], params["use_all_avail_gpus"], ) state_sorted_features, _ = sort_features_by_normalization( state_normalization) action_sorted_features, _ = sort_features_by_normalization( action_normalization) preprocess_handler = ParametricDqnPreprocessHandler( PandasSparseToDenseProcessor(state_sorted_features), PandasSparseToDenseProcessor(action_sorted_features), ) train_dataset = JSONDatasetReader( params["training_data_path"], batch_size=training_parameters.minibatch_size, preprocess_handler=preprocess_handler, ) eval_dataset = JSONDatasetReader(params["eval_data_path"], batch_size=16, preprocess_handler=preprocess_handler) with summary_writer_context(writer): workflow.train_network(train_dataset, eval_dataset, int(params["epochs"])) exporter = ParametricDQNExporter( workflow.trainer.q_network, PredictorFeatureExtractor( state_normalization_parameters=state_normalization, action_normalization_parameters=action_normalization, ), ParametricActionOutputTransformer(), ) if int(params["node_index"]) == 0 and gpu_index == 0: export_trainer_and_predictor(workflow.trainer, params["model_output_path"], exporter=exporter) # noqa