def test_create_net(self): q_value_blob = core.BlobReference("q_values") N = 10 # NOTE: We add `b` prefix here to match the return type of FetchBlob actions = [b"yes", b"no"] q_values = np.random.randn(N, len(actions)).astype(np.float32) workspace.FeedBlob(q_value_blob, q_values) ot = DiscreteActionOutputTransformer(actions) output_record = schema.Struct(("q_values", schema.Scalar(blob=q_value_blob))) nets = ot.create_net(output_record) workspace.RunNetOnce(nets.init_net) workspace.RunNetOnce(nets.net) external_outputs = {str(b) for b in nets.net.external_outputs} def fetch_blob(b): self.assertIn(b, external_outputs) return workspace.FetchBlob(b) feature_lengths = fetch_blob( "output/string_weighted_multi_categorical_features.lengths" ) feature_keys = fetch_blob( "output/string_weighted_multi_categorical_features.keys" ) values_lengths = fetch_blob( "output/string_weighted_multi_categorical_features.values.lengths" ) values_keys = fetch_blob( "output/string_weighted_multi_categorical_features.values.keys" ) values_values = fetch_blob( "output/string_weighted_multi_categorical_features.values.values" ) action_lengths = fetch_blob("output/string_single_categorical_features.lengths") action_keys = fetch_blob("output/string_single_categorical_features.keys") action_values = fetch_blob("output/string_single_categorical_features.values") npt.assert_array_equal(np.ones(N, dtype=np.int32), feature_lengths) npt.assert_array_equal(np.zeros(N, dtype=np.int64), feature_keys) npt.assert_array_equal([len(actions)] * N, values_lengths) npt.assert_array_equal(np.array(actions * N, dtype=np.object), values_keys) npt.assert_array_equal(q_values.reshape(-1), values_values) npt.assert_array_equal([len(actions)] * N, action_lengths) npt.assert_array_equal(list(range(len(actions))) * N, action_keys) # We can only assert max-Q policy max_q_actions = action_values.reshape(-1, len(actions))[::, 0] npt.assert_array_equal( [actions[i] for i in np.argmax(q_values, axis=1)], max_q_actions )
def get_modular_sarsa_trainer_exporter( self, environment, reward_shape, dueling, use_gpu=False, use_all_avail_gpus=False, clip_grad_norm=None, ): parameters = self.get_sarsa_parameters( environment, reward_shape, dueling, clip_grad_norm ) trainer = self.get_modular_sarsa_trainer_reward_boost( environment, reward_shape, dueling, use_gpu=use_gpu, use_all_avail_gpus=use_all_avail_gpus, clip_grad_norm=clip_grad_norm, ) feature_extractor = PredictorFeatureExtractor( state_normalization_parameters=environment.normalization ) output_transformer = DiscreteActionOutputTransformer(parameters.actions) exporter = DQNExporter(trainer.q_network, feature_extractor, output_transformer) return (trainer, exporter)
def main(params): # Set minibatch size based on # of devices being used to train params["training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"]) rl_parameters = RLParameters(**params["rl"]) training_parameters = TrainingParameters(**params["training"]) rainbow_parameters = RainbowDQNParameters(**params["rainbow"]) model_params = DiscreteActionModelParameters( actions=params["actions"], rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters, ) state_normalization = BaseWorkflow.read_norm_file( params["state_norm_data_path"]) writer = SummaryWriter(log_dir=params["model_output_path"]) logger.info("TensorBoard logging location is: {}".format(writer.log_dir)) preprocess_handler = DqnPreprocessHandler( Preprocessor(state_normalization, False), np.array(model_params.actions), PandasSparseToDenseProcessor(), ) workflow = DqnWorkflow( model_params, preprocess_handler, state_normalization, params["use_gpu"], params["use_all_avail_gpus"], ) train_dataset = JSONDatasetReader( params["training_data_path"], batch_size=training_parameters.minibatch_size) eval_dataset = JSONDatasetReader(params["eval_data_path"], batch_size=16) with summary_writer_context(writer): workflow.train_network(train_dataset, eval_dataset, int(params["epochs"])) exporter = DQNExporter( workflow.trainer.q_network, PredictorFeatureExtractor( state_normalization_parameters=state_normalization), DiscreteActionOutputTransformer(model_params.actions), ) return export_trainer_and_predictor(workflow.trainer, params["model_output_path"], exporter=exporter) # noqa
def single_process_main(gpu_index, *args): params = args[0] # Set minibatch size based on # of devices being used to train params["training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"] ) action_names = params["actions"] rl_parameters = RLParameters(**params["rl"]) training_parameters = TrainingParameters(**params["training"]) rainbow_parameters = RainbowDQNParameters(**params["rainbow"]) model_params = DiscreteActionModelParameters( actions=action_names, rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters, ) state_normalization = BaseWorkflow.read_norm_file(params["state_norm_data_path"]) writer = SummaryWriter(log_dir=params["model_output_path"]) logger.info("TensorBoard logging location is: {}".format(writer.log_dir)) if params["use_all_avail_gpus"]: BaseWorkflow.init_multiprocessing( int(params["num_processes_per_node"]), int(params["num_nodes"]), int(params["node_index"]), gpu_index, params["init_method"], ) workflow = DqnWorkflow( model_params, state_normalization, params["use_gpu"], params["use_all_avail_gpus"], ) sorted_features, _ = sort_features_by_normalization(state_normalization) preprocess_handler = DiscreteDqnPreprocessHandler( action_names, PandasSparseToDenseProcessor(sorted_features) ) train_dataset = JSONDatasetReader( params["training_data_path"], batch_size=training_parameters.minibatch_size, preprocess_handler=preprocess_handler, ) eval_dataset = JSONDatasetReader( params["eval_data_path"], batch_size=training_parameters.minibatch_size, preprocess_handler=preprocess_handler, ) with summary_writer_context(writer): workflow.train_network(train_dataset, eval_dataset, int(params["epochs"])) exporter = DQNExporter( workflow.trainer.q_network, PredictorFeatureExtractor(state_normalization_parameters=state_normalization), DiscreteActionOutputTransformer(model_params.actions), ) if int(params["node_index"]) == 0 and gpu_index == 0: export_trainer_and_predictor( workflow.trainer, params["model_output_path"], exporter=exporter ) # noqa