def test_parametric_wrapper(self): state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)} action_normalization_parameters = {i: _cont_norm() for i in range(5, 9)} state_preprocessor = Preprocessor(state_normalization_parameters, False) action_preprocessor = Preprocessor(action_normalization_parameters, False) dqn = FullyConnectedParametricDQN( state_dim=len(state_normalization_parameters), action_dim=len(action_normalization_parameters), sizes=[16], activations=["relu"], ) dqn_with_preprocessor = ParametricDqnWithPreprocessor( dqn, state_preprocessor=state_preprocessor, action_preprocessor=action_preprocessor, ) wrapper = ParametricDqnPredictorWrapper(dqn_with_preprocessor) input_prototype = dqn_with_preprocessor.input_prototype() output_action_names, q_value = wrapper(*input_prototype) self.assertEqual(output_action_names, ["Q"]) self.assertEqual(q_value.shape, (1, 1)) expected_output = dqn( rlt.PreprocessedStateAction.from_tensors( state=state_preprocessor(*input_prototype[0]), action=action_preprocessor(*input_prototype[1]), ) ).q_value self.assertTrue((expected_output == q_value).all())
def build_serving_module( self, actor: ModelBase, state_normalization_data: NormalizationData, action_normalization_data: NormalizationData, ) -> torch.nn.Module: """ Returns a TorchScript predictor module """ state_normalization_parameters = ( state_normalization_data.dense_normalization_parameters) action_normalization_parameters = ( action_normalization_data.dense_normalization_parameters) assert state_normalization_parameters is not None assert action_normalization_parameters is not None state_preprocessor = Preprocessor(state_normalization_parameters, use_gpu=False) postprocessor = Postprocessor(action_normalization_parameters, use_gpu=False) actor_with_preprocessor = ActorWithPreprocessor( actor.cpu_model().eval(), state_preprocessor, postprocessor) action_features = Preprocessor(action_normalization_parameters, use_gpu=False).sorted_features return ActorPredictorWrapper(actor_with_preprocessor, action_features)
def test_preprocessing_network(self): feature_value_map = read_data() normalization_parameters = {} name_preprocessed_blob_map = {} for feature_name, feature_values in feature_value_map.items(): normalization_parameters[feature_name] = normalization.identify_parameter( feature_values, feature_type=self._feature_type_override(feature_name) ) feature_values[ 0 ] = MISSING_VALUE # Set one entry to MISSING_VALUE to test that preprocessor = Preprocessor( {feature_name: normalization_parameters[feature_name]}, False ) preprocessor.clamp = False feature_values_matrix = np.expand_dims(feature_values, -1) normalized_feature_values = preprocessor.forward(feature_values_matrix) name_preprocessed_blob_map[feature_name] = normalized_feature_values.numpy() test_features = self.preprocess(feature_value_map, normalization_parameters) for feature_name in feature_value_map: normalized_features = name_preprocessed_blob_map[feature_name] if feature_name != ENUM_FEATURE_ID: normalized_features = np.squeeze(normalized_features, -1) tolerance = 0.01 if feature_name == BOXCOX_FEATURE_ID: # At the limit, boxcox has some numerical instability tolerance = 0.5 non_matching = np.where( np.logical_not( np.isclose( normalized_features.flatten(), test_features[feature_name].flatten(), rtol=tolerance, atol=tolerance, ) ) ) self.assertTrue( np.all( np.isclose( normalized_features.flatten(), test_features[feature_name].flatten(), rtol=tolerance, atol=tolerance, ) ), "{} does not match: {} \n!=\n {}".format( feature_name, normalized_features.flatten()[non_matching], test_features[feature_name].flatten()[non_matching], ), )
def main(params): # Set minibatch size based on # of devices being used to train params["shared_training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"]) rl_parameters = RLParameters(**params["rl"]) training_parameters = DDPGTrainingParameters(**params["shared_training"]) actor_parameters = DDPGNetworkParameters(**params["actor_training"]) critic_parameters = DDPGNetworkParameters(**params["critic_training"]) model_params = DDPGModelParameters( rl=rl_parameters, shared_training=training_parameters, actor_training=actor_parameters, critic_training=critic_parameters, ) state_normalization = BaseWorkflow.read_norm_file( params["state_norm_data_path"]) action_normalization = BaseWorkflow.read_norm_file( params["action_norm_data_path"]) writer = SummaryWriter(log_dir=params["model_output_path"]) logger.info("TensorBoard logging location is: {}".format(writer.log_dir)) preprocess_handler = ContinuousPreprocessHandler( Preprocessor(state_normalization, False), Preprocessor(action_normalization, False), PandasSparseToDenseProcessor(), ) workflow = ContinuousWorkflow( model_params, preprocess_handler, state_normalization, action_normalization, params["use_gpu"], params["use_all_avail_gpus"], ) train_dataset = JSONDatasetReader( params["training_data_path"], batch_size=training_parameters.minibatch_size) eval_dataset = JSONDatasetReader(params["eval_data_path"], batch_size=16) with summary_writer_context(writer): workflow.train_network(train_dataset, eval_dataset, int(params["epochs"])) return export_trainer_and_predictor( workflow.trainer, params["model_output_path"], exporter=_get_actor_exporter( trainer=workflow.trainer, state_normalization=state_normalization, action_normalization=action_normalization, ), ) # noqa
def main(params): # Set minibatch size based on # of devices being used to train params["training"]["minibatch_size"] *= minibatch_size_multiplier( params["use_gpu"], params["use_all_avail_gpus"]) rl_parameters = RLParameters(**params["rl"]) training_parameters = TrainingParameters(**params["training"]) rainbow_parameters = RainbowDQNParameters(**params["rainbow"]) model_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, rainbow=rainbow_parameters) state_normalization = BaseWorkflow.read_norm_file( params["state_norm_data_path"]) action_normalization = BaseWorkflow.read_norm_file( params["action_norm_data_path"]) writer = SummaryWriter(log_dir=params["model_output_path"]) logger.info("TensorBoard logging location is: {}".format(writer.log_dir)) preprocess_handler = ParametricDqnPreprocessHandler( Preprocessor(state_normalization, False), Preprocessor(action_normalization, False), PandasSparseToDenseProcessor(), ) workflow = ParametricDqnWorkflow( model_params, preprocess_handler, state_normalization, action_normalization, params["use_gpu"], params["use_all_avail_gpus"], ) train_dataset = JSONDatasetReader( params["training_data_path"], batch_size=training_parameters.minibatch_size) eval_dataset = JSONDatasetReader(params["eval_data_path"], batch_size=16) with summary_writer_context(writer): workflow.train_network(train_dataset, eval_dataset, int(params["epochs"])) exporter = ParametricDQNExporter( workflow.trainer.q_network, PredictorFeatureExtractor( state_normalization_parameters=state_normalization, action_normalization_parameters=action_normalization, ), ParametricActionOutputTransformer(), ) return export_trainer_and_predictor(workflow.trainer, params["model_output_path"], exporter=exporter) # noqa
def test_preprocessing_network_onnx(self): feature_value_map = read_data() for feature_name, feature_values in feature_value_map.items(): normalization_parameters = normalization.identify_parameter( feature_name, feature_values, feature_type=self._feature_type_override(feature_name), ) feature_values[ 0 ] = MISSING_VALUE # Set one entry to MISSING_VALUE to test that feature_values_matrix = np.expand_dims(feature_values, -1) preprocessor = Preprocessor({feature_name: normalization_parameters}, False) normalized_feature_values = preprocessor.forward(feature_values_matrix) input_blob, output_blob, netdef = PytorchCaffe2Converter.pytorch_net_to_caffe2_netdef( preprocessor, 1, False, float_input=True ) preproc_workspace = netdef.workspace preproc_workspace.FeedBlob(input_blob, feature_values_matrix) preproc_workspace.RunNetOnce(core.Net(netdef.init_net)) preproc_workspace.RunNetOnce(core.Net(netdef.predict_net)) normalized_feature_values_onnx = netdef.workspace.FetchBlob(output_blob) tolerance = 0.0001 non_matching = np.where( np.logical_not( np.isclose( normalized_feature_values, normalized_feature_values_onnx, rtol=tolerance, atol=tolerance, ) ) ) self.assertTrue( np.all( np.isclose( normalized_feature_values, normalized_feature_values_onnx, rtol=tolerance, atol=tolerance, ) ), "{} does not match: {} \n!=\n {}".format( feature_name, normalized_feature_values[non_matching].tolist()[0:10], normalized_feature_values_onnx[non_matching].tolist()[0:10], ), )
def test_preprocessing_network(self): features, feature_value_map = preprocessing_util.read_data() normalization_parameters = {} name_preprocessed_blob_map = {} for feature_name, feature_values in feature_value_map.items(): normalization_parameters[feature_name] = normalization.identify_parameter( feature_values ) preprocessor = Preprocessor( {feature_name: normalization_parameters[feature_name]}, False ) preprocessor.clamp = False feature_values_matrix = np.expand_dims(feature_values, -1) normalized_feature_values = preprocessor.forward(feature_values_matrix) name_preprocessed_blob_map[feature_name] = normalized_feature_values.numpy() test_features = self.preprocess(feature_value_map, normalization_parameters) for feature_name in feature_value_map: normalized_features = name_preprocessed_blob_map[feature_name] if feature_name != identify_types.ENUM: normalized_features = np.squeeze(normalized_features, -1) tolerance = 0.01 if feature_name == BOXCOX: # At the limit, boxcox has some numerical instability tolerance = 0.5 non_matching = np.where( np.logical_not( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, ) ) ) self.assertTrue( np.all( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, ) ), "{} does not match: {} {}".format( feature_name, normalized_features[non_matching].tolist()[0:10], test_features[feature_name][non_matching].tolist()[0:10], ), )
def get_predictor(self, trainer, environment): state_preprocessor = Preprocessor(environment.normalization, False) action_preprocessor = Preprocessor(environment.normalization_action, False) q_network = self.current_predictor_network dqn_with_preprocessor = ParametricDqnWithPreprocessor( q_network.cpu_model().eval(), state_preprocessor, action_preprocessor) serving_module = ParametricDqnPredictorWrapper( dqn_with_preprocessor=dqn_with_preprocessor) predictor = ParametricDqnTorchPredictor(serving_module) return predictor
def get_critic_exporter(self, trainer, environment): feature_extractor = PredictorFeatureExtractor( state_normalization_parameters=environment.normalization, action_normalization_parameters=environment.normalization_action, ) output_transformer = ParametricActionOutputTransformer() return ParametricDQNExporter( trainer.q1_network, feature_extractor, output_transformer, Preprocessor(environment.normalization, False, True), Preprocessor(environment.normalization_action, False, True), )
def test_discrete_wrapper(self): state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)} state_preprocessor = Preprocessor(state_normalization_parameters, False) action_dim = 2 dqn = FullyConnectedDQN( state_dim=len(state_normalization_parameters), action_dim=action_dim, sizes=[16], activations=["relu"], ) dqn_with_preprocessor = DiscreteDqnWithPreprocessor( dqn, state_preprocessor) action_names = ["L", "R"] wrapper = DiscreteDqnPredictorWrapper(dqn_with_preprocessor, action_names) input_prototype = dqn_with_preprocessor.input_prototype() output_action_names, q_values = wrapper(*input_prototype) self.assertEqual(action_names, output_action_names) self.assertEqual(q_values.shape, (1, 2)) expected_output = dqn( rlt.PreprocessedState.from_tensor( state_preprocessor(*input_prototype[0]))).q_values self.assertTrue((expected_output == q_values).all())
def __init__( self, model_params: DiscreteActionModelParameters, state_normalization: Dict[int, NormalizationParameters], use_gpu: bool, use_all_avail_gpus: bool, ): logger.info("Running DQN workflow with params:") logger.info(model_params) model_params = model_params trainer = create_dqn_trainer_from_params( model_params, state_normalization, use_gpu=use_gpu, use_all_avail_gpus=use_all_avail_gpus, ) trainer = update_model_for_warm_start(trainer) assert type(trainer) == DQNTrainer, "Warm started wrong model type: " + str( type(trainer) ) evaluator = Evaluator( model_params.actions, model_params.rl.gamma, trainer, metrics_to_score=trainer.metrics_to_score, ) super().__init__( DiscreteDqnBatchPreprocessor(Preprocessor(state_normalization, use_gpu)), trainer, evaluator, model_params.training.minibatch_size, )
def test_actor_wrapper(self): state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)} action_normalization_parameters = { i: _cont_action_norm() for i in range(101, 105) } state_preprocessor = Preprocessor(state_normalization_parameters, False) postprocessor = Postprocessor(action_normalization_parameters, False) # Test with FullyConnectedActor to make behavior deterministic actor = FullyConnectedActor( state_dim=len(state_normalization_parameters), action_dim=len(action_normalization_parameters), sizes=[16], activations=["relu"], ) actor_with_preprocessor = ActorWithPreprocessor( actor, state_preprocessor, postprocessor ) wrapper = ActorPredictorWrapper(actor_with_preprocessor) input_prototype = actor_with_preprocessor.input_prototype() action = wrapper(*input_prototype) self.assertEqual(action.shape, (1, len(action_normalization_parameters))) expected_output = postprocessor( actor( rlt.PreprocessedState.from_tensor( state_preprocessor(*input_prototype[0]) ) ).action ) self.assertTrue((expected_output == action).all())
def get_modular_sarsa_trainer_exporter(self, environment, parameters=None, use_gpu=False, use_all_avail_gpus=False): parameters = parameters or self.get_sarsa_parameters() q_network = FullyConnectedParametricDQN( state_dim=get_num_output_features(environment.normalization), action_dim=get_num_output_features( environment.normalization_action), sizes=parameters.training.layers[1:-1], activations=parameters.training.activations[:-1], ) reward_network = FullyConnectedParametricDQN( state_dim=get_num_output_features(environment.normalization), action_dim=get_num_output_features( environment.normalization_action), sizes=parameters.training.layers[1:-1], activations=parameters.training.activations[:-1], ) if use_gpu: q_network = q_network.cuda() reward_network = reward_network.cuda() if use_all_avail_gpus: q_network = q_network.get_data_parallel_model() reward_network = reward_network.get_data_parallel_model() q_network_target = q_network.get_target_network() trainer = _ParametricDQNTrainer(q_network, q_network_target, reward_network, parameters) state_preprocessor = Preprocessor(environment.normalization, False, True) action_preprocessor = Preprocessor(environment.normalization_action, False, True) feature_extractor = PredictorFeatureExtractor( state_normalization_parameters=environment.normalization, action_normalization_parameters=environment.normalization_action, ) output_transformer = ParametricActionOutputTransformer() exporter = ParametricDQNExporter( q_network, feature_extractor, output_transformer, state_preprocessor, action_preprocessor, ) return (trainer, exporter)
def __init__( self, state_preprocessor: Preprocessor, value_network: Optional[nn.Module], action_names: List[str], ) -> None: super().__init__() self.state_sorted_features_t = state_preprocessor.sorted_features self.state_preprocessor = torch.jit.trace( state_preprocessor, (state_preprocessor.input_prototype())) value_network_sample_input = self.state_preprocessor( *state_preprocessor.input_prototype()) self.value_network = torch.jit.trace(value_network, value_network_sample_input) self.action_names = torch.jit.Attribute(action_names, List[str])
def build_serving_module( self, q_network: ModelBase, state_normalization_parameters: Dict[int, NormalizationParameters], action_normalization_parameters: Dict[int, NormalizationParameters], ) -> torch.nn.Module: """ Returns a TorchScript predictor module """ state_preprocessor = Preprocessor(state_normalization_parameters, False) action_preprocessor = Preprocessor(action_normalization_parameters, False) dqn_with_preprocessor = ParametricDqnWithPreprocessor( q_network.cpu_model().eval(), state_preprocessor, action_preprocessor) return ParametricDqnPredictorWrapper( dqn_with_preprocessor=dqn_with_preprocessor)
def save_models(self, path: str): export_time = round(time.time()) output_path = os.path.expanduser(path) pytorch_output_path = os.path.join(output_path, "trainer_{}.pt".format(export_time)) torchscript_output_path = os.path.join( path, "model_{}.torchscript".format(export_time)) state_preprocessor = Preprocessor(self.state_normalization, False) action_preprocessor = Preprocessor(self.action_normalization, False) q_network = self.trainer.q_network dqn_with_preprocessor = ParametricDqnWithPreprocessor( q_network.cpu_model().eval(), state_preprocessor, action_preprocessor) serving_module = ParametricDqnPredictorWrapper( dqn_with_preprocessor=dqn_with_preprocessor) logger.info("Saving PyTorch trainer to {}".format(pytorch_output_path)) save_model_to_file(self.trainer, pytorch_output_path) self.save_torchscript_model(serving_module, torchscript_output_path)
def test_normalize_dense_matrix_enum(self): normalization_parameters = { 1: NormalizationParameters( identify_types.ENUM, None, None, None, None, [12, 4, 2], None, None, None, ), 2: NormalizationParameters(identify_types.CONTINUOUS, None, 0, 0, 1, None, None, None, None), 3: NormalizationParameters(identify_types.ENUM, None, None, None, None, [15, 3], None, None, None), } preprocessor = Preprocessor(normalization_parameters, False) preprocessor.clamp = False inputs = np.zeros([4, 3], dtype=np.float32) feature_ids = [2, 1, 3] # Sorted according to feature type inputs[:, feature_ids.index(1)] = [12, 4, 2, 2] inputs[:, feature_ids.index(2)] = [1.0, 2.0, 3.0, 3.0] inputs[:, feature_ids.index(3)] = [ 15, 3, 15, normalization.MISSING_VALUE ] normalized_feature_matrix = preprocessor.forward(inputs) np.testing.assert_allclose( np.array([ [1.0, 1, 0, 0, 1, 0], [2.0, 0, 1, 0, 0, 1], [3.0, 0, 0, 1, 1, 0], [3.0, 0, 0, 1, 0, 0], # Missing values should go to all 0 ]), normalized_feature_matrix, )
def test_quantile_boundary_logic(self): """Test quantile logic when feaure value == quantile boundary.""" input = torch.tensor([[0.0], [80.0], [100.0]]) norm_params = NormalizationParameters( feature_type="QUANTILE", boxcox_lambda=None, boxcox_shift=None, mean=0, stddev=1, possible_values=None, quantiles=[0.0, 80.0, 100.0], min_value=0.0, max_value=100.0, ) preprocessor = Preprocessor({1: norm_params}, False) output = preprocessor._preprocess_QUANTILE(0, input.float(), [norm_params]) expected_output = torch.tensor([[0.0], [0.5], [1.0]]) self.assertTrue(np.all(np.isclose(output, expected_output)))
def get_predictor(self, trainer, environment): state_preprocessor = Preprocessor(environment.normalization, False) q_network = trainer.q_network dqn_with_preprocessor = DiscreteDqnWithPreprocessor( q_network.cpu_model().eval(), state_preprocessor) serving_module = DiscreteDqnPredictorWrapper( dqn_with_preprocessor=dqn_with_preprocessor, action_names=environment.ACTIONS, ) predictor = DiscreteDqnTorchPredictor(serving_module) return predictor
def test_do_not_preprocess(self): normalization_parameters = { i: NormalizationParameters(feature_type=DO_NOT_PREPROCESS) for i in range(1, 5) } preprocessor = Preprocessor(normalization_parameters, use_gpu=False) postprocessor = Postprocessor(normalization_parameters, use_gpu=False) x = torch.randn(3, 4) presence = torch.ones_like(x, dtype=torch.uint8) y = postprocessor(preprocessor(x, presence)) npt.assert_allclose(x, y)
def test_normalize_dense_matrix_enum(self): normalization_parameters = { 1: NormalizationParameters( identify_types.ENUM, None, None, None, None, [12, 4, 2], None, None, None, ), 2: NormalizationParameters( identify_types.CONTINUOUS, None, 0, 0, 1, None, None, None, None ), 3: NormalizationParameters( identify_types.ENUM, None, None, None, None, [15, 3], None, None, None ), } preprocessor = Preprocessor(normalization_parameters, False) inputs = np.zeros([4, 3], dtype=np.float32) feature_ids = [2, 1, 3] # Sorted according to feature type inputs[:, feature_ids.index(1)] = [12, 4, 2, 2] inputs[:, feature_ids.index(2)] = [1.0, 2.0, 3.0, 3.0] inputs[:, feature_ids.index(3)] = [15, 3, 15, normalization.MISSING_VALUE] normalized_feature_matrix = preprocessor.forward(inputs) np.testing.assert_allclose( np.array( [ [1.0, 1, 0, 0, 1, 0], [2.0, 0, 1, 0, 0, 1], [3.0, 0, 0, 1, 1, 0], [3.0, 0, 0, 1, 0, 0], # Missing values should go to all 0 ] ), normalized_feature_matrix, )
def test_predictor_torch_export(self): """Verify that q-values before model export equal q-values after model export. Meant to catch issues with export logic.""" environment = Gridworld() samples = Samples( mdp_ids=["0"], sequence_numbers=[0], sequence_number_ordinals=[1], states=[{0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0, 5: 1.0, 15: 1.0, 24: 1.0}], actions=["D"], action_probabilities=[0.5], rewards=[0], possible_actions=[["R", "D"]], next_states=[{5: 1.0}], next_actions=["U"], terminals=[False], possible_next_actions=[["R", "U", "D"]], ) tdps = environment.preprocess_samples(samples, 1) assert len(tdps) == 1, "Invalid number of data pages" trainer, exporter = self.get_modular_sarsa_trainer_exporter( environment, {}, False ) input = rlt.PreprocessedState.from_tensor(tdps[0].states) pre_export_q_values = trainer.q_network(input).q_values.detach().numpy() preprocessor = Preprocessor(environment.normalization, False) cpu_q_network = trainer.q_network.cpu_model() cpu_q_network.eval() dqn_with_preprocessor = DiscreteDqnWithPreprocessor(cpu_q_network, preprocessor) serving_module = DiscreteDqnPredictorWrapper( dqn_with_preprocessor, action_names=environment.ACTIONS ) with tempfile.TemporaryDirectory() as tmpdirname: buf = export_module_to_buffer(serving_module) tmp_path = os.path.join(tmpdirname, "model") with open(tmp_path, "wb") as f: f.write(buf.getvalue()) f.close() predictor = DiscreteDqnTorchPredictor(torch.jit.load(tmp_path)) post_export_q_values = predictor.predict([samples.states[0]]) for i, action in enumerate(environment.ACTIONS): self.assertAlmostEqual( float(pre_export_q_values[0][i]), float(post_export_q_values[0][action]), places=4, )
def __init__( self, state_preprocessor: Preprocessor, action_preprocessor: Preprocessor, value_network: Optional[nn.Module], ) -> None: super().__init__() self.state_sorted_features_t = state_preprocessor.sorted_features self.state_preprocessor = torch.jit.trace( state_preprocessor, (state_preprocessor.input_prototype())) self.action_sorted_features_t = action_preprocessor.sorted_features self.action_preprocessor = torch.jit.trace( action_preprocessor, (action_preprocessor.input_prototype())) value_network_sample_input = ( self.state_preprocessor(*state_preprocessor.input_prototype()), self.action_preprocessor(*action_preprocessor.input_prototype()), ) self.value_network = torch.jit.trace(value_network, value_network_sample_input)
def get_actor_predictor(self, trainer, environment): state_preprocessor = Preprocessor(environment.normalization, False) postprocessor = Postprocessor( environment.normalization_continuous_action, False) actor_with_preprocessor = ActorWithPreprocessor( trainer.actor_network.cpu_model().eval(), state_preprocessor, postprocessor) serving_module = ActorPredictorWrapper(actor_with_preprocessor) predictor = ActorTorchPredictor( serving_module, sort_features_by_normalization( environment.normalization_continuous_action)[0], ) return predictor
def test_discrete_wrapper_with_id_list(self): state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)} state_preprocessor = Preprocessor(state_normalization_parameters, False) action_dim = 2 state_feature_config = rlt.ModelFeatureConfig( float_feature_infos=[ rlt.FloatFeatureInfo(name=str(i), feature_id=i) for i in range(1, 5) ], id_list_feature_configs=[ rlt.IdListFeatureConfig(name="A", feature_id=10, id_mapping_name="A_mapping") ], id_mapping_config={"A_mapping": rlt.IdMapping(ids=[0, 1, 2])}, ) dqn = FullyConnectedDQNWithEmbedding( state_dim=len(state_normalization_parameters), action_dim=action_dim, sizes=[16], activations=["relu"], model_feature_config=state_feature_config, embedding_dim=8, ) dqn_with_preprocessor = DiscreteDqnWithPreprocessorWithIdList( dqn, state_preprocessor, state_feature_config) action_names = ["L", "R"] wrapper = DiscreteDqnPredictorWrapperWithIdList( dqn_with_preprocessor, action_names, state_feature_config) input_prototype = dqn_with_preprocessor.input_prototype() output_action_names, q_values = wrapper(*input_prototype) self.assertEqual(action_names, output_action_names) self.assertEqual(q_values.shape, (1, 2)) feature_id_to_name = { config.feature_id: config.name for config in state_feature_config.id_list_feature_configs } state_id_list_features = { feature_id_to_name[k]: v for k, v in input_prototype[1].items() } expected_output = dqn( rlt.PreprocessedState(state=rlt.PreprocessedFeatureVector( float_features=state_preprocessor(*input_prototype[0]), id_list_features=state_id_list_features, ))).q_values self.assertTrue((expected_output == q_values).all())
def test_continuous_action(self): normalization_parameters = { i: NormalizationParameters(feature_type=CONTINUOUS_ACTION, min_value=-5.0 * i, max_value=10.0 * i) for i in range(1, 5) } preprocessor = Preprocessor(normalization_parameters, use_gpu=False) postprocessor = Postprocessor(normalization_parameters, use_gpu=False) x = torch.rand(3, 4) * torch.tensor([15, 30, 45, 60]) + torch.tensor( [-5, -10, -15, -20]) presence = torch.ones_like(x, dtype=torch.uint8) y = postprocessor(preprocessor(x, presence)) npt.assert_allclose(x, y, rtol=1e-5)
def build_serving_module( self, q_network: ModelBase, state_normalization_parameters: Dict[int, NormalizationParameters], action_names: List[str], state_feature_config: rlt.ModelFeatureConfig, ) -> torch.nn.Module: """ Returns a TorchScript predictor module """ state_preprocessor = Preprocessor(state_normalization_parameters, False) dqn_with_preprocessor = DiscreteDqnWithPreprocessor( q_network.cpu_model().eval(), state_preprocessor) return DiscreteDqnPredictorWrapper(dqn_with_preprocessor, action_names, state_feature_config)
def get_actor_predictor(self, trainer, environment): feature_extractor = PredictorFeatureExtractor( state_normalization_parameters=environment.normalization) output_transformer = ActorOutputTransformer( sort_features_by_normalization( environment.normalization_action)[0], environment.max_action_range.reshape(-1), environment.min_action_range.reshape(-1), ) predictor = ActorExporter( trainer.actor_network, feature_extractor, output_transformer, Preprocessor(environment.normalization, False, True), ).export() return predictor
def preprocess_samples( self, samples: Samples, minibatch_size: int, use_gpu: bool = False, one_hot_action: bool = True, normalize_actions: bool = True, ) -> List[TrainingDataPage]: logger.info("Shuffling...") samples.shuffle() logger.info("Sparse2Dense...") net = core.Net("gridworld_preprocessing") C2.set_net(net) saa = StackedAssociativeArray.from_dict_list(samples.states, "states") sorted_state_features, _ = sort_features_by_normalization(self.normalization) state_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_state_features ) saa = StackedAssociativeArray.from_dict_list(samples.next_states, "next_states") next_state_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_state_features ) sorted_action_features, _ = sort_features_by_normalization( self.normalization_action ) saa = StackedAssociativeArray.from_dict_list(samples.actions, "action") action_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_action_features ) saa = StackedAssociativeArray.from_dict_list( samples.next_actions, "next_action" ) next_action_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_action_features ) action_probabilities = torch.tensor( samples.action_probabilities, dtype=torch.float32 ).reshape(-1, 1) rewards = torch.tensor(samples.rewards, dtype=torch.float32).reshape(-1, 1) pnas_lengths_list = [] pnas_flat: List[List[str]] = [] for pnas in samples.possible_next_actions: pnas_lengths_list.append(len(pnas)) pnas_flat.extend(pnas) saa = StackedAssociativeArray.from_dict_list(pnas_flat, "possible_next_actions") pnas_lengths = torch.tensor(pnas_lengths_list, dtype=torch.int32) pna_lens_blob = "pna_lens_blob" workspace.FeedBlob(pna_lens_blob, pnas_lengths.numpy()) possible_next_actions_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_action_features ) state_pnas_tile_blob = C2.LengthsTile(next_state_matrix, pna_lens_blob) workspace.RunNetOnce(net) logger.info("Preprocessing...") state_preprocessor = Preprocessor(self.normalization, False) action_preprocessor = Preprocessor(self.normalization_action, False) states_ndarray = workspace.FetchBlob(state_matrix) states_ndarray = state_preprocessor.forward(states_ndarray) actions_ndarray = torch.from_numpy(workspace.FetchBlob(action_matrix)) if normalize_actions: actions_ndarray = action_preprocessor.forward(actions_ndarray) next_states_ndarray = workspace.FetchBlob(next_state_matrix) next_states_ndarray = state_preprocessor.forward(next_states_ndarray) next_actions_ndarray = torch.from_numpy(workspace.FetchBlob(next_action_matrix)) if normalize_actions: next_actions_ndarray = action_preprocessor.forward(next_actions_ndarray) logged_possible_next_actions = action_preprocessor.forward( workspace.FetchBlob(possible_next_actions_matrix) ) state_pnas_tile = state_preprocessor.forward( workspace.FetchBlob(state_pnas_tile_blob) ) logged_possible_next_state_actions = torch.cat( (state_pnas_tile, logged_possible_next_actions), dim=1 ) logger.info("Reward Timeline to Torch...") possible_next_actions_ndarray = logged_possible_next_actions possible_next_actions_state_concat = logged_possible_next_state_actions time_diffs = torch.ones([len(samples.states), 1]) tdps = [] pnas_start = 0 logger.info("Batching...") for start in range(0, states_ndarray.shape[0], minibatch_size): end = start + minibatch_size if end > states_ndarray.shape[0]: break pnas_end = pnas_start + torch.sum(pnas_lengths[start:end]) pnas = possible_next_actions_ndarray[pnas_start:pnas_end] pnas_concat = possible_next_actions_state_concat[pnas_start:pnas_end] pnas_start = pnas_end tdp = TrainingDataPage( states=states_ndarray[start:end], actions=actions_ndarray[start:end], propensities=action_probabilities[start:end], rewards=rewards[start:end], next_states=next_states_ndarray[start:end], next_actions=next_actions_ndarray[start:end], possible_next_actions=None, not_terminals=(pnas_lengths[start:end] > 0).reshape(-1, 1), time_diffs=time_diffs[start:end], possible_next_actions_lengths=pnas_lengths[start:end], possible_next_actions_state_concat=pnas_concat, ) tdp.set_type(torch.cuda.FloatTensor if use_gpu else torch.FloatTensor) tdps.append(tdp) return tdps
def test_prepare_normalization_and_normalize(self): feature_value_map = read_data() normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( name, values, 10, feature_type=self._feature_type_override(name) ) for k, v in normalization_parameters.items(): if id_to_type(k) == CONTINUOUS: self.assertEqual(v.feature_type, CONTINUOUS) self.assertIs(v.boxcox_lambda, None) self.assertIs(v.boxcox_shift, None) elif id_to_type(k) == BOXCOX: self.assertEqual(v.feature_type, BOXCOX) self.assertIsNot(v.boxcox_lambda, None) self.assertIsNot(v.boxcox_shift, None) else: assert v.feature_type == id_to_type(k) preprocessor = Preprocessor(normalization_parameters, False) sorted_features, _ = sort_features_by_normalization(normalization_parameters) input_matrix = np.zeros([10000, len(sorted_features)], dtype=np.float32) for i, feature in enumerate(sorted_features): input_matrix[:, i] = feature_value_map[feature] normalized_feature_matrix = preprocessor.forward(input_matrix) normalized_features = {} on_column = 0 for feature in sorted_features: norm = normalization_parameters[feature] if norm.feature_type == ENUM: column_size = len(norm.possible_values) else: column_size = 1 normalized_features[feature] = normalized_feature_matrix[ :, on_column : (on_column + column_size) ] on_column += column_size self.assertTrue( all( [ np.isfinite(parameter.stddev) and np.isfinite(parameter.mean) for parameter in normalization_parameters.values() ] ) ) for k, v in six.iteritems(normalized_features): v = v.numpy() self.assertTrue(np.all(np.isfinite(v))) feature_type = normalization_parameters[k].feature_type if feature_type == identify_types.PROBABILITY: sigmoidv = special.expit(v) self.assertTrue( np.all( np.logical_and(np.greater(sigmoidv, 0), np.less(sigmoidv, 1)) ) ) elif feature_type == identify_types.ENUM: possible_values = normalization_parameters[k].possible_values self.assertEqual(v.shape[0], len(feature_value_map[k])) self.assertEqual(v.shape[1], len(possible_values)) possible_value_map = {} for i, possible_value in enumerate(possible_values): possible_value_map[possible_value] = i for i, row in enumerate(v): original_feature = feature_value_map[k][i] if abs(original_feature - MISSING_VALUE) < 0.01: self.assertEqual(0.0, np.sum(row)) else: self.assertEqual( possible_value_map[original_feature], np.where(row == 1)[0][0], ) elif feature_type == identify_types.QUANTILE: for i, feature in enumerate(v[0]): original_feature = feature_value_map[k][i] expected = NumpyFeatureProcessor.value_to_quantile( original_feature, normalization_parameters[k].quantiles ) self.assertAlmostEqual(feature, expected, 2) elif feature_type == identify_types.BINARY: pass elif ( feature_type == identify_types.CONTINUOUS or feature_type == identify_types.BOXCOX ): one_stddev = np.isclose(np.std(v, ddof=1), 1, atol=0.01) zero_stddev = np.isclose(np.std(v, ddof=1), 0, atol=0.01) zero_mean = np.isclose(np.mean(v), 0, atol=0.01) self.assertTrue( np.all(zero_mean), "mean of feature {} is {}, not 0".format(k, np.mean(v)), ) self.assertTrue(np.all(np.logical_or(one_stddev, zero_stddev))) elif feature_type == identify_types.CONTINUOUS_ACTION: less_than_max = v < 1 more_than_min = v > -1 self.assertTrue( np.all(less_than_max), "values are not less than 1: {}".format(v[less_than_max == False]), ) self.assertTrue( np.all(more_than_min), "values are not more than -1: {}".format(v[more_than_min == False]), ) else: raise NotImplementedError()
def benchmark(num_forward_passes): """ Benchmark preprocessor speeds: 1 - PyTorch 2 - PyTorch -> ONNX -> C2 3 - C2 """ feature_value_map = gen_data( num_binary_features=10, num_boxcox_features=10, num_continuous_features=10, num_enum_features=10, num_prob_features=10, num_quantile_features=10, ) normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( name, values, 10 ) sorted_features, _ = sort_features_by_normalization(normalization_parameters) # Dummy input input_matrix = np.zeros([10000, len(sorted_features)], dtype=np.float32) # PyTorch Preprocessor pytorch_preprocessor = Preprocessor(normalization_parameters, False) for i, feature in enumerate(sorted_features): input_matrix[:, i] = feature_value_map[feature] #################### time pytorch ############################ start = time.time() for _ in range(NUM_FORWARD_PASSES): _ = pytorch_preprocessor.forward(input_matrix) end = time.time() logger.info( "PyTorch: {} forward passes done in {} seconds".format( NUM_FORWARD_PASSES, end - start ) ) ################ time pytorch -> ONNX -> caffe2 #################### buffer = PytorchCaffe2Converter.pytorch_net_to_buffer( pytorch_preprocessor, len(sorted_features), False ) input_blob, output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef( buffer ) torch_workspace = caffe2_netdef.workspace parameters = torch_workspace.Blobs() for blob_str in parameters: workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str)) torch_init_net = core.Net(caffe2_netdef.init_net) torch_predict_net = core.Net(caffe2_netdef.predict_net) input_matrix_blob = "input_matrix_blob" workspace.FeedBlob(input_blob, input_matrix) workspace.RunNetOnce(torch_init_net) start = time.time() for _ in range(NUM_FORWARD_PASSES): workspace.RunNetOnce(torch_predict_net) _ = workspace.FetchBlob(output_blob) end = time.time() logger.info( "PyTorch -> ONNX -> Caffe2: {} forward passes done in {} seconds".format( NUM_FORWARD_PASSES, end - start ) ) #################### time caffe2 ############################ norm_net = core.Net("net") C2.set_net(norm_net) preprocessor = PreprocessorNet() input_matrix_blob = "input_matrix_blob" workspace.FeedBlob(input_matrix_blob, np.array([], dtype=np.float32)) output_blob, _ = preprocessor.normalize_dense_matrix( input_matrix_blob, sorted_features, normalization_parameters, "", False ) workspace.FeedBlob(input_matrix_blob, input_matrix) start = time.time() for _ in range(NUM_FORWARD_PASSES): workspace.RunNetOnce(norm_net) _ = workspace.FetchBlob(output_blob) end = time.time() logger.info( "Caffe2: {} forward passes done in {} seconds".format( NUM_FORWARD_PASSES, end - start ) )
def preprocess_samples_discrete( self, samples: Samples, minibatch_size: int, one_hot_action: bool = True, use_gpu: bool = False, ) -> List[TrainingDataPage]: logger.info("Shuffling...") samples = shuffle_samples(samples) logger.info("Preprocessing...") if self.sparse_to_dense_net is None: self.sparse_to_dense_net = core.Net("gridworld_sparse_to_dense") C2.set_net(self.sparse_to_dense_net) saa = StackedAssociativeArray.from_dict_list(samples.states, "states") sorted_features, _ = sort_features_by_normalization(self.normalization) self.state_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_features ) saa = StackedAssociativeArray.from_dict_list( samples.next_states, "next_states" ) self.next_state_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_features ) C2.set_net(None) else: StackedAssociativeArray.from_dict_list(samples.states, "states") StackedAssociativeArray.from_dict_list(samples.next_states, "next_states") workspace.RunNetOnce(self.sparse_to_dense_net) logger.info("Converting to Torch...") actions_one_hot = torch.tensor( (np.array(samples.actions).reshape(-1, 1) == np.array(self.ACTIONS)).astype( np.int64 ) ) actions = actions_one_hot.argmax(dim=1, keepdim=True) rewards = torch.tensor(samples.rewards, dtype=torch.float32).reshape(-1, 1) action_probabilities = torch.tensor( samples.action_probabilities, dtype=torch.float32 ).reshape(-1, 1) next_actions_one_hot = torch.tensor( ( np.array(samples.next_actions).reshape(-1, 1) == np.array(self.ACTIONS) ).astype(np.int64) ) logger.info("Converting PA to Torch...") possible_action_strings = np.array( list(itertools.zip_longest(*samples.possible_actions, fillvalue="")) ).T possible_actions_mask = torch.zeros([len(samples.actions), len(self.ACTIONS)]) for i, action in enumerate(self.ACTIONS): possible_actions_mask[:, i] = torch.tensor( np.max(possible_action_strings == action, axis=1).astype(np.int64) ) logger.info("Converting PNA to Torch...") possible_next_action_strings = np.array( list(itertools.zip_longest(*samples.possible_next_actions, fillvalue="")) ).T possible_next_actions_mask = torch.zeros( [len(samples.next_actions), len(self.ACTIONS)] ) for i, action in enumerate(self.ACTIONS): possible_next_actions_mask[:, i] = torch.tensor( np.max(possible_next_action_strings == action, axis=1).astype(np.int64) ) terminals = torch.tensor(samples.terminals, dtype=torch.int32).reshape(-1, 1) not_terminal = 1 - terminals logger.info("Converting RT to Torch...") time_diffs = torch.ones([len(samples.states), 1]) logger.info("Preprocessing...") preprocessor = Preprocessor(self.normalization, False) states_ndarray = workspace.FetchBlob(self.state_matrix) states_ndarray = preprocessor.forward(states_ndarray) next_states_ndarray = workspace.FetchBlob(self.next_state_matrix) next_states_ndarray = preprocessor.forward(next_states_ndarray) logger.info("Batching...") tdps = [] for start in range(0, states_ndarray.shape[0], minibatch_size): end = start + minibatch_size if end > states_ndarray.shape[0]: break tdp = TrainingDataPage( states=states_ndarray[start:end], actions=actions_one_hot[start:end] if one_hot_action else actions[start:end], propensities=action_probabilities[start:end], rewards=rewards[start:end], next_states=next_states_ndarray[start:end], not_terminal=not_terminal[start:end], next_actions=next_actions_one_hot[start:end], possible_actions_mask=possible_actions_mask[start:end], possible_next_actions_mask=possible_next_actions_mask[start:end], time_diffs=time_diffs[start:end], ) tdp.set_type(torch.cuda.FloatTensor if use_gpu else torch.FloatTensor) tdps.append(tdp) return tdps
def test_prepare_normalization_and_normalize(self): feature_value_map = read_data() normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( values, 10, feature_type=self._feature_type_override(name) ) for k, v in normalization_parameters.items(): if id_to_type(k) == CONTINUOUS: self.assertEqual(v.feature_type, CONTINUOUS) self.assertIs(v.boxcox_lambda, None) self.assertIs(v.boxcox_shift, None) elif id_to_type(k) == BOXCOX: self.assertEqual(v.feature_type, BOXCOX) self.assertIsNot(v.boxcox_lambda, None) self.assertIsNot(v.boxcox_shift, None) else: assert v.feature_type == id_to_type(k) preprocessor = Preprocessor(normalization_parameters, False) sorted_features, _ = sort_features_by_normalization(normalization_parameters) input_matrix = np.zeros([10000, len(sorted_features)], dtype=np.float32) for i, feature in enumerate(sorted_features): input_matrix[:, i] = feature_value_map[feature] normalized_feature_matrix = preprocessor.forward(input_matrix) normalized_features = {} on_column = 0 for feature in sorted_features: norm = normalization_parameters[feature] if norm.feature_type == ENUM: column_size = len(norm.possible_values) else: column_size = 1 normalized_features[feature] = normalized_feature_matrix[ :, on_column : (on_column + column_size) ] on_column += column_size self.assertTrue( all( [ np.isfinite(parameter.stddev) and np.isfinite(parameter.mean) for parameter in normalization_parameters.values() ] ) ) for k, v in six.iteritems(normalized_features): v = v.numpy() self.assertTrue(np.all(np.isfinite(v))) feature_type = normalization_parameters[k].feature_type if feature_type == identify_types.PROBABILITY: sigmoidv = special.expit(v) self.assertTrue( np.all( np.logical_and(np.greater(sigmoidv, 0), np.less(sigmoidv, 1)) ) ) elif feature_type == identify_types.ENUM: possible_values = normalization_parameters[k].possible_values self.assertEqual(v.shape[0], len(feature_value_map[k])) self.assertEqual(v.shape[1], len(possible_values)) possible_value_map = {} for i, possible_value in enumerate(possible_values): possible_value_map[possible_value] = i for i, row in enumerate(v): original_feature = feature_value_map[k][i] if abs(original_feature - MISSING_VALUE) < 0.01: self.assertEqual(0.0, np.sum(row)) else: self.assertEqual( possible_value_map[original_feature], np.where(row == 1)[0][0], ) elif feature_type == identify_types.QUANTILE: for i, feature in enumerate(v[0]): original_feature = feature_value_map[k][i] expected = self._value_to_quantile( original_feature, normalization_parameters[k].quantiles ) self.assertAlmostEqual(feature, expected, 2) elif feature_type == identify_types.BINARY: pass elif ( feature_type == identify_types.CONTINUOUS or feature_type == identify_types.BOXCOX ): one_stddev = np.isclose(np.std(v, ddof=1), 1, atol=0.01) zero_stddev = np.isclose(np.std(v, ddof=1), 0, atol=0.01) zero_mean = np.isclose(np.mean(v), 0, atol=0.01) self.assertTrue( np.all(zero_mean), "mean of feature {} is {}, not 0".format(k, np.mean(v)), ) self.assertTrue(np.all(np.logical_or(one_stddev, zero_stddev))) elif feature_type == identify_types.CONTINUOUS_ACTION: less_than_max = v < 1 more_than_min = v > -1 self.assertTrue( np.all(less_than_max), "values are not less than 1: {}".format(v[less_than_max == False]), ) self.assertTrue( np.all(more_than_min), "values are not more than -1: {}".format(v[more_than_min == False]), ) else: raise NotImplementedError()
def test_preprocessing_network(self): feature_value_map = read_data() normalization_parameters = {} name_preprocessed_blob_map = {} for feature_name, feature_values in feature_value_map.items(): normalization_parameters[feature_name] = normalization.identify_parameter( feature_name, feature_values, feature_type=self._feature_type_override(feature_name), ) feature_values[ 0 ] = MISSING_VALUE # Set one entry to MISSING_VALUE to test that preprocessor = Preprocessor( {feature_name: normalization_parameters[feature_name]}, False ) feature_values_matrix = np.expand_dims(feature_values, -1) normalized_feature_values = preprocessor.forward(feature_values_matrix) name_preprocessed_blob_map[feature_name] = normalized_feature_values.numpy() test_features = NumpyFeatureProcessor.preprocess( feature_value_map, normalization_parameters ) for feature_name in feature_value_map: normalized_features = name_preprocessed_blob_map[feature_name] if feature_name != ENUM_FEATURE_ID: normalized_features = np.squeeze(normalized_features, -1) tolerance = 0.01 if feature_name == BOXCOX_FEATURE_ID: # At the limit, boxcox has some numerical instability tolerance = 0.5 non_matching = np.where( np.logical_not( np.isclose( normalized_features.flatten(), test_features[feature_name].flatten(), rtol=tolerance, atol=tolerance, ) ) ) self.assertTrue( np.all( np.isclose( normalized_features.flatten(), test_features[feature_name].flatten(), rtol=tolerance, atol=tolerance, ) ), "{} does not match: {} \n!=\n {}".format( feature_name, normalized_features.flatten()[non_matching], test_features[feature_name].flatten()[non_matching], ), )
def preprocess_samples_discrete( self, samples: Samples, minibatch_size: int, one_hot_action: bool = True) -> List[TrainingDataPage]: logger.info("Shuffling...") samples.shuffle() logger.info("Preprocessing...") net = core.Net("gridworld_preprocessing") C2.set_net(net) preprocessor = PreprocessorNet(True) saa = StackedAssociativeArray.from_dict_list(samples.states, "states") state_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization, "state_norm", False, False, False, ) saa = StackedAssociativeArray.from_dict_list(samples.next_states, "next_states") next_state_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization, "next_state_norm", False, False, False, ) workspace.RunNetOnce(net) logger.info("Converting to Torch...") actions_one_hot = torch.tensor((np.array(samples.actions).reshape( -1, 1) == np.array(self.ACTIONS)).astype(np.int64)) actions = actions_one_hot.argmax(dim=1, keepdim=True) rewards = torch.tensor(samples.rewards, dtype=torch.float32).reshape(-1, 1) action_probabilities = torch.tensor(samples.action_probabilities, dtype=torch.float32).reshape( -1, 1) next_actions_one_hot = torch.tensor( (np.array(samples.next_actions).reshape(-1, 1) == np.array( self.ACTIONS)).astype(np.int64)) logger.info("Converting PNA to Torch...") possible_next_action_strings = np.array( list( itertools.zip_longest(*samples.possible_next_actions, fillvalue=""))).T possible_next_actions_mask = torch.zeros( [len(samples.next_actions), len(self.ACTIONS)]) for i, action in enumerate(self.ACTIONS): possible_next_actions_mask[:, i] = torch.tensor( np.max(possible_next_action_strings == action, axis=1).astype(np.int64)) terminals = torch.tensor(samples.terminals, dtype=torch.int32).reshape(-1, 1) not_terminals = 1 - terminals episode_values = None logger.info("Converting RT to Torch...") episode_values = torch.tensor(samples.episode_values, dtype=torch.float32).reshape(-1, 1) time_diffs = torch.ones([len(samples.states), 1]) logger.info("Preprocessing...") preprocessor = Preprocessor(self.normalization, False) states_ndarray = workspace.FetchBlob(state_matrix) states_ndarray = preprocessor.forward(states_ndarray) next_states_ndarray = workspace.FetchBlob(next_state_matrix) next_states_ndarray = preprocessor.forward(next_states_ndarray) logger.info("Batching...") tdps = [] for start in range(0, states_ndarray.shape[0], minibatch_size): end = start + minibatch_size if end > states_ndarray.shape[0]: break tdp = TrainingDataPage( states=states_ndarray[start:end], actions=actions_one_hot[start:end] if one_hot_action else actions[start:end], propensities=action_probabilities[start:end], rewards=rewards[start:end], next_states=next_states_ndarray[start:end], not_terminals=not_terminals[start:end], next_actions=next_actions_one_hot[start:end], possible_next_actions=possible_next_actions_mask[start:end], episode_values=episode_values[start:end] if episode_values is not None else None, time_diffs=time_diffs[start:end], ) tdp.set_type(torch.FloatTensor) tdps.append(tdp) return tdps
def preprocess_samples( self, samples: Samples, minibatch_size: int, use_gpu: bool = False, one_hot_action: bool = True, normalize_actions: bool = True, ) -> List[TrainingDataPage]: logger.info("Shuffling...") samples = shuffle_samples(samples) logger.info("Sparse2Dense...") net = core.Net("gridworld_preprocessing") C2.set_net(net) saa = StackedAssociativeArray.from_dict_list(samples.states, "states") sorted_state_features, _ = sort_features_by_normalization(self.normalization) state_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_state_features ) saa = StackedAssociativeArray.from_dict_list(samples.next_states, "next_states") next_state_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_state_features ) sorted_action_features, _ = sort_features_by_normalization( self.normalization_action ) saa = StackedAssociativeArray.from_dict_list(samples.actions, "action") action_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_action_features ) saa = StackedAssociativeArray.from_dict_list( samples.next_actions, "next_action" ) next_action_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_action_features ) action_probabilities = torch.tensor( samples.action_probabilities, dtype=torch.float32 ).reshape(-1, 1) rewards = torch.tensor(samples.rewards, dtype=torch.float32).reshape(-1, 1) max_action_size = 4 pnas_mask_list: List[List[int]] = [] pnas_flat: List[Dict[str, float]] = [] for pnas in samples.possible_next_actions: pnas_mask_list.append([1] * len(pnas) + [0] * (max_action_size - len(pnas))) pnas_flat.extend(pnas) for _ in range(max_action_size - len(pnas)): pnas_flat.append({}) # Filler saa = StackedAssociativeArray.from_dict_list(pnas_flat, "possible_next_actions") pnas_mask = torch.Tensor(pnas_mask_list) possible_next_actions_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_action_features ) workspace.RunNetOnce(net) logger.info("Preprocessing...") state_preprocessor = Preprocessor(self.normalization, False) action_preprocessor = Preprocessor(self.normalization_action, False) states_ndarray = workspace.FetchBlob(state_matrix) states_ndarray = state_preprocessor.forward(states_ndarray) actions_ndarray = torch.from_numpy(workspace.FetchBlob(action_matrix)) if normalize_actions: actions_ndarray = action_preprocessor.forward(actions_ndarray) next_states_ndarray = workspace.FetchBlob(next_state_matrix) next_states_ndarray = state_preprocessor.forward(next_states_ndarray) state_pnas_tile = next_states_ndarray.repeat(1, max_action_size).reshape( -1, next_states_ndarray.shape[1] ) next_actions_ndarray = torch.from_numpy(workspace.FetchBlob(next_action_matrix)) if normalize_actions: next_actions_ndarray = action_preprocessor.forward(next_actions_ndarray) logged_possible_next_actions = action_preprocessor.forward( workspace.FetchBlob(possible_next_actions_matrix) ) assert state_pnas_tile.shape[0] == logged_possible_next_actions.shape[0], ( "Invalid shapes: " + str(state_pnas_tile.shape) + " != " + str(logged_possible_next_actions.shape) ) logged_possible_next_state_actions = torch.cat( (state_pnas_tile, logged_possible_next_actions), dim=1 ) logger.info("Reward Timeline to Torch...") time_diffs = torch.ones([len(samples.states), 1]) tdps = [] pnas_start = 0 logger.info("Batching...") for start in range(0, states_ndarray.shape[0], minibatch_size): end = start + minibatch_size if end > states_ndarray.shape[0]: break pnas_end = pnas_start + (minibatch_size * max_action_size) tdp = TrainingDataPage( states=states_ndarray[start:end], actions=actions_ndarray[start:end], propensities=action_probabilities[start:end], rewards=rewards[start:end], next_states=next_states_ndarray[start:end], next_actions=next_actions_ndarray[start:end], not_terminal=(pnas_mask[start:end, :].sum(dim=1, keepdim=True) > 0), time_diffs=time_diffs[start:end], possible_next_actions_mask=pnas_mask[start:end, :], possible_next_actions_state_concat=logged_possible_next_state_actions[ pnas_start:pnas_end, : ], ) pnas_start = pnas_end tdp.set_type(torch.cuda.FloatTensor if use_gpu else torch.FloatTensor) tdps.append(tdp) return tdps