def test_normalize_dense_matrix_enum(self): normalization_parameters = { 1: NormalizationParameters( identify_types.ENUM, None, None, None, None, [12, 4, 2], None, None, None, ), 2: NormalizationParameters( identify_types.CONTINUOUS, None, 0, 0, 1, None, None, None, None ), 3: NormalizationParameters( identify_types.ENUM, None, None, None, None, [15, 3], None, None, None ), } norm_net = core.Net("net") C2.set_net(norm_net) preprocessor = PreprocessorNet() inputs = np.zeros([4, 3], dtype=np.float32) feature_ids = [2, 1, 3] # Sorted according to feature type inputs[:, feature_ids.index(1)] = [12, 4, 2, 2] inputs[:, feature_ids.index(2)] = [1.0, 2.0, 3.0, 3.0] inputs[:, feature_ids.index(3)] = [15, 3, 15, normalization.MISSING_VALUE] input_blob = C2.NextBlob("input_blob") workspace.FeedBlob(input_blob, np.array([0], dtype=np.float32)) normalized_output_blob, _ = preprocessor.normalize_dense_matrix( input_blob, feature_ids, normalization_parameters, "", False ) workspace.FeedBlob(input_blob, inputs) workspace.RunNetOnce(norm_net) normalized_feature_matrix = workspace.FetchBlob(normalized_output_blob) np.testing.assert_allclose( np.array( [ [1.0, 1, 0, 0, 1, 0], [2.0, 0, 1, 0, 0, 1], [3.0, 0, 0, 1, 1, 0], [3.0, 0, 0, 1, 0, 0], # Missing values should go to all 0 ] ), normalized_feature_matrix, )
def export_critic( cls, trainer, state_normalization_parameters, action_normalization_parameters, int_features=False, model_on_gpu=False, ): """Export caffe2 preprocessor net and pytorch critic forward pass as one caffe2 net. :param trainer DDPGTrainer :param state_normalization_parameters state NormalizationParameters :param action_normalization_parameters action NormalizationParameters :param int_features boolean indicating if int features blob will be present """ input_dim = trainer.state_dim + trainer.action_dim if isinstance(trainer.critic, DataParallel): trainer.critic = trainer.critic.module buffer = PytorchCaffe2Converter.pytorch_net_to_buffer( trainer.critic, input_dim, model_on_gpu) critic_input_blob, critic_output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef( buffer) torch_workspace = caffe2_netdef.workspace parameters = [] for blob_str in torch_workspace.Blobs(): workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str)) parameters.append(blob_str) torch_init_net = core.Net(caffe2_netdef.init_net) torch_predict_net = core.Net(caffe2_netdef.predict_net) model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32)) input_feature_lengths = "input_feature_lengths" input_feature_keys = "input_feature_keys" input_feature_values = "input_feature_values" if int_features: workspace.FeedBlob("input/int_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32)) C2.net().Cast( ["input/int_features.values"], ["input/int_features.values_float"], dtype=caffe2_pb2.TensorProto.FLOAT, ) C2.net().MergeMultiScalarFeatureTensors( [ "input/float_features.lengths", "input/float_features.keys", "input/float_features.values", "input/int_features.lengths", "input/int_features.keys", "input/int_features.values_float", ], [ input_feature_lengths, input_feature_keys, input_feature_values ], ) else: C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths]) C2.net().Copy(["input/float_features.keys"], [input_feature_keys]) C2.net().Copy(["input/float_features.values"], [input_feature_values]) preprocessor = PreprocessorNet(True) sorted_features, _ = sort_features_by_normalization( state_normalization_parameters) state_dense_matrix, new_parameters = sparse_to_dense( input_feature_lengths, input_feature_keys, input_feature_values, sorted_features, ) parameters.extend(new_parameters) state_normalized_dense_matrix, new_parameters = preprocessor.normalize_dense_matrix( state_dense_matrix, sorted_features, state_normalization_parameters, "state_norm", False, ) parameters.extend(new_parameters) # Don't normalize actions, just go from sparse -> dense action_dense_matrix, new_parameters = sparse_to_dense( input_feature_lengths, input_feature_keys, input_feature_values, list(action_normalization_parameters.keys() ), # TODO: Clean up in D10161240 ) parameters.extend(new_parameters) state_action_normalized = "state_action_normalized" state_action_normalized_dim = "state_action_normalized_dim" net.Concat( [state_normalized_dense_matrix, action_dense_matrix], [state_action_normalized, state_action_normalized_dim], axis=1, ) net.Copy([state_action_normalized], [critic_input_blob]) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(torch_init_net) net.AppendNet(torch_init_net) net.AppendNet(torch_predict_net) C2.FlattenToVec(C2.ArgMax(critic_output_blob)) output_lengths = "output/float_features.lengths" workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill( [C2.FlattenToVec(C2.ArgMax(critic_output_blob))], [output_lengths], value=trainer.critic.layers[-1].out_features, dtype=caffe2_pb2.TensorProto.INT32, ) output_keys_int32 = "output_keys_int32" output_keys = "output/float_features.keys" workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64)) C2.net().LengthsRangeFill([output_lengths], [output_keys_int32]) C2.net().Cast([output_keys_int32], [output_keys], to=caffe2_pb2.TensorProto.INT64) output_values = "output/float_features.values" workspace.FeedBlob(output_values, np.zeros(1, dtype=np.float32)) C2.net().FlattenToVec([critic_output_blob], [output_values]) workspace.CreateNet(net) return DDPGPredictor(net, torch_init_net, parameters, int_features)
def export_actor( cls, trainer, state_normalization_parameters, min_action_range_tensor_serving, max_action_range_tensor_serving, int_features=False, model_on_gpu=False, ): """Export caffe2 preprocessor net and pytorch actor forward pass as one caffe2 net. :param trainer DDPGTrainer :param state_normalization_parameters state NormalizationParameters :param min_action_range_tensor_serving pytorch tensor that specifies min action value for each dimension :param max_action_range_tensor_serving pytorch tensor that specifies min action value for each dimension :param state_normalization_parameters state NormalizationParameters :param int_features boolean indicating if int features blob will be present :param model_on_gpu boolean indicating if the model is a GPU model or CPU model """ input_dim = trainer.state_dim if isinstance(trainer.actor, DataParallel): trainer.actor = trainer.actor.module buffer = PytorchCaffe2Converter.pytorch_net_to_buffer( trainer.actor, input_dim, model_on_gpu) actor_input_blob, actor_output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef( buffer) torch_workspace = caffe2_netdef.workspace parameters = torch_workspace.Blobs() for blob_str in parameters: workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str)) torch_init_net = core.Net(caffe2_netdef.init_net) torch_predict_net = core.Net(caffe2_netdef.predict_net) # While converting to metanetdef, the external_input of predict_net # will be recomputed. Add the real output of init_net to parameters # to make sure they will be counted. parameters.extend( set(caffe2_netdef.init_net.external_output) - set(caffe2_netdef.init_net.external_input)) model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) # Feed action scaling tensors for serving min_action_serving_blob = C2.NextBlob( "min_action_range_tensor_serving") workspace.FeedBlob(min_action_serving_blob, min_action_range_tensor_serving.cpu().data.numpy()) parameters.append(str(min_action_serving_blob)) max_action_serving_blob = C2.NextBlob( "max_action_range_tensor_serving") workspace.FeedBlob(max_action_serving_blob, max_action_range_tensor_serving.cpu().data.numpy()) parameters.append(str(max_action_serving_blob)) # Feed action scaling tensors for training [-1, 1] due to tanh actor min_vals_training = trainer.min_action_range_tensor_training.cpu( ).data.numpy() min_action_training_blob = C2.NextBlob( "min_action_range_tensor_training") workspace.FeedBlob(min_action_training_blob, min_vals_training) parameters.append(str(min_action_training_blob)) max_vals_training = trainer.max_action_range_tensor_training.cpu( ).data.numpy() max_action_training_blob = C2.NextBlob( "max_action_range_tensor_training") workspace.FeedBlob(max_action_training_blob, max_vals_training) parameters.append(str(max_action_training_blob)) workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32)) input_feature_lengths = "input_feature_lengths" input_feature_keys = "input_feature_keys" input_feature_values = "input_feature_values" if int_features: workspace.FeedBlob("input/int_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32)) C2.net().Cast( ["input/int_features.values"], ["input/int_features.values_float"], dtype=caffe2_pb2.TensorProto.FLOAT, ) C2.net().MergeMultiScalarFeatureTensors( [ "input/float_features.lengths", "input/float_features.keys", "input/float_features.values", "input/int_features.lengths", "input/int_features.keys", "input/int_features.values_float", ], [ input_feature_lengths, input_feature_keys, input_feature_values ], ) else: C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths]) C2.net().Copy(["input/float_features.keys"], [input_feature_keys]) C2.net().Copy(["input/float_features.values"], [input_feature_values]) preprocessor = PreprocessorNet(True) sorted_features, _ = sort_features_by_normalization( state_normalization_parameters) state_dense_matrix, new_parameters = sparse_to_dense( input_feature_lengths, input_feature_keys, input_feature_values, sorted_features, ) parameters.extend(new_parameters) state_normalized_dense_matrix, new_parameters = preprocessor.normalize_dense_matrix( state_dense_matrix, sorted_features, state_normalization_parameters, "state_norm", False, ) parameters.extend(new_parameters) net.Copy([state_normalized_dense_matrix], [actor_input_blob]) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(torch_init_net) net.AppendNet(torch_predict_net) C2.FlattenToVec(C2.ArgMax(actor_output_blob)) output_lengths = "output/float_features.lengths" workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill( [C2.FlattenToVec(C2.ArgMax(actor_output_blob))], [output_lengths], value=trainer.actor.layers[-1].out_features, dtype=caffe2_pb2.TensorProto.INT32, ) output_keys_int32 = "output_keys_int32" output_keys = "output/float_features.keys" workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64)) C2.net().LengthsRangeFill([output_lengths], [output_keys_int32]) C2.net().Cast([output_keys_int32], [output_keys], to=caffe2_pb2.TensorProto.INT64) output_values = "output/float_features.values" workspace.FeedBlob(output_values, np.zeros(1, dtype=np.float32)) # Scale actors actions from [-1, 1] to serving range prev_range = C2.Sub(max_action_training_blob, min_action_training_blob) new_range = C2.Sub(max_action_serving_blob, min_action_serving_blob) subtract_prev_min = C2.Sub(actor_output_blob, min_action_training_blob) div_by_prev_range = C2.Div(subtract_prev_min, prev_range) scaled_for_serving_actions = C2.Add( C2.Mul(div_by_prev_range, new_range), min_action_serving_blob) C2.net().FlattenToVec([scaled_for_serving_actions], [output_values]) workspace.CreateNet(net) return DDPGPredictor(net, torch_init_net, parameters, int_features)
def export( cls, trainer, state_normalization_parameters, action_normalization_parameters, int_features=False, model_on_gpu=False, normalize_actions=True, ): """Export caffe2 preprocessor net and pytorch DQN forward pass as one caffe2 net. :param trainer ParametricDQNTrainer :param state_normalization_parameters state NormalizationParameters :param action_normalization_parameters action NormalizationParameters :param int_features boolean indicating if int features blob will be present :param model_on_gpu boolean indicating if the model is a GPU model or CPU model """ input_dim = trainer.num_features if isinstance(trainer.q_network, DataParallel): trainer.q_network = trainer.q_network.module buffer = PytorchCaffe2Converter.pytorch_net_to_buffer( trainer.q_network, input_dim, model_on_gpu ) qnet_input_blob, qnet_output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef( buffer ) torch_workspace = caffe2_netdef.workspace parameters = torch_workspace.Blobs() for blob_str in parameters: workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str)) # Remove the input blob from parameters since it's not a real # input (will be calculated by preprocessor) parameters.remove(qnet_input_blob) torch_init_net = core.Net(caffe2_netdef.init_net) torch_predict_net = core.Net(caffe2_netdef.predict_net) # While converting to metanetdef, the external_input of predict_net # will be recomputed. Add the real output of init_net to parameters # to make sure they will be counted. parameters.extend( set(caffe2_netdef.init_net.external_output) - set(caffe2_netdef.init_net.external_input) ) # ensure state and action IDs have no intersection assert ( len( set(state_normalization_parameters.keys()) & set(action_normalization_parameters.keys()) ) == 0 ) model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32)) input_feature_lengths = "input_feature_lengths" input_feature_keys = "input_feature_keys" input_feature_values = "input_feature_values" if int_features: workspace.FeedBlob( "input/int_features.lengths", np.zeros(1, dtype=np.int32) ) workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32)) C2.net().Cast( ["input/int_features.values"], ["input/int_features.values_float"], dtype=caffe2_pb2.TensorProto.FLOAT, ) C2.net().MergeMultiScalarFeatureTensors( [ "input/float_features.lengths", "input/float_features.keys", "input/float_features.values", "input/int_features.lengths", "input/int_features.keys", "input/int_features.values_float", ], [input_feature_lengths, input_feature_keys, input_feature_values], ) else: C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths]) C2.net().Copy(["input/float_features.keys"], [input_feature_keys]) C2.net().Copy(["input/float_features.values"], [input_feature_values]) preprocessor = PreprocessorNet() sparse_to_dense_processor = Caffe2SparseToDenseProcessor() sorted_state_features, _ = sort_features_by_normalization( state_normalization_parameters ) state_dense_matrix, new_parameters = sparse_to_dense_processor( sorted_state_features, StackedAssociativeArray( input_feature_lengths, input_feature_keys, input_feature_values ), ) parameters.extend(new_parameters) state_normalized_dense_matrix, new_parameters = preprocessor.normalize_dense_matrix( state_dense_matrix, sorted_state_features, state_normalization_parameters, "state_norm", False, ) parameters.extend(new_parameters) sorted_action_features, _ = sort_features_by_normalization( action_normalization_parameters ) action_dense_matrix, new_parameters = sparse_to_dense_processor( sorted_action_features, StackedAssociativeArray( input_feature_lengths, input_feature_keys, input_feature_values ), ) parameters.extend(new_parameters) if normalize_actions: action_normalized_dense_matrix, new_parameters = preprocessor.normalize_dense_matrix( action_dense_matrix, sorted_action_features, action_normalization_parameters, "action_norm", False, ) parameters.extend(new_parameters) else: action_normalized_dense_matrix = action_dense_matrix state_action_normalized = "state_action_normalized" state_action_normalized_dim = "state_action_normalized_dim" net.Concat( [state_normalized_dense_matrix, action_normalized_dense_matrix], [state_action_normalized, state_action_normalized_dim], axis=1, ) net.Copy([state_action_normalized], [qnet_input_blob]) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(torch_init_net) net.AppendNet(torch_predict_net) new_parameters, q_values = RLPredictor._forward_pass( model, trainer, state_action_normalized, ["Q"], qnet_output_blob ) parameters.extend(new_parameters) flat_q_values_key = ( "output/string_weighted_multi_categorical_features.values.values" ) num_examples, _ = C2.Reshape(C2.Size(flat_q_values_key), shape=[1]) q_value_blob, _ = C2.Reshape(flat_q_values_key, shape=[1, -1]) # Get 1 x n (number of examples) action index tensor under the max_q policy max_q_act_idxs = "max_q_policy_actions" C2.net().FlattenToVec([C2.ArgMax(q_value_blob)], [max_q_act_idxs]) max_q_act_blob = C2.Tile(max_q_act_idxs, num_examples, axis=0) # Get 1 x n (number of examples) action index tensor under the softmax policy temperature = C2.NextBlob("temperature") parameters.append(temperature) workspace.FeedBlob( temperature, np.array([trainer.rl_temperature], dtype=np.float32) ) tempered_q_values = C2.Div(q_value_blob, temperature, broadcast=1) softmax_values = C2.Softmax(tempered_q_values) softmax_act_idxs_nested = "softmax_act_idxs_nested" C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested]) softmax_act_blob = C2.Tile( C2.FlattenToVec(softmax_act_idxs_nested), num_examples, axis=0 ) # Concat action idx vecs to get 2 x n tensor [[a_maxq, ..], [a_softmax, ..]] # transpose & flatten to get [a_maxq, a_softmax, a_maxq, a_softmax, ...] max_q_act_blob = C2.Cast(max_q_act_blob, to=caffe2_pb2.TensorProto.INT64) softmax_act_blob = C2.Cast(softmax_act_blob, to=caffe2_pb2.TensorProto.INT64) max_q_act_blob_nested, _ = C2.Reshape(max_q_act_blob, shape=[1, -1]) softmax_act_blob_nested, _ = C2.Reshape(softmax_act_blob, shape=[1, -1]) C2.net().Append( [max_q_act_blob_nested, softmax_act_blob_nested], [max_q_act_blob_nested] ) transposed_action_idxs = C2.Transpose(max_q_act_blob_nested) flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs) output_values = "output/int_single_categorical_features.values" workspace.FeedBlob(output_values, np.zeros(1, dtype=np.int64)) C2.net().Copy([flat_transposed_action_idxs], [output_values]) output_lengths = "output/int_single_categorical_features.lengths" workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill( [flat_q_values_key], [output_lengths], value=2, dtype=caffe2_pb2.TensorProto.INT32, ) output_keys = "output/int_single_categorical_features.keys" workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64)) output_keys_tensor, _ = C2.Concat( C2.ConstantFill(shape=[1, 1], value=0, dtype=caffe2_pb2.TensorProto.INT64), C2.ConstantFill(shape=[1, 1], value=1, dtype=caffe2_pb2.TensorProto.INT64), axis=0, ) output_key_tile = C2.Tile(output_keys_tensor, num_examples, axis=0) C2.net().FlattenToVec([output_key_tile], [output_keys]) workspace.CreateNet(net) return ParametricDQNPredictor(net, torch_init_net, parameters, int_features)
def preprocess_samples(self, samples: Samples, minibatch_size: int) -> List[TrainingDataPage]: samples.shuffle() net = core.Net("gridworld_preprocessing") C2.set_net(net) preprocessor = PreprocessorNet(True) saa = StackedAssociativeArray.from_dict_list(samples.states, "states") state_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization, "state_norm", False, False, ) saa = StackedAssociativeArray.from_dict_list(samples.next_states, "next_states") next_state_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization, "next_state_norm", False, False, ) saa = StackedAssociativeArray.from_dict_list(samples.actions, "action") action_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization_action, "action_norm", False, False, ) saa = StackedAssociativeArray.from_dict_list(samples.next_actions, "next_action") next_action_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization_action, "next_action_norm", False, False, ) propensities = np.array(samples.propensities, dtype=np.float32).reshape(-1, 1) rewards = np.array(samples.rewards, dtype=np.float32).reshape(-1, 1) pnas_lengths_list = [] pnas_flat: List[List[str]] = [] for pnas in samples.possible_next_actions: pnas_lengths_list.append(len(pnas)) pnas_flat.extend(pnas) saa = StackedAssociativeArray.from_dict_list(pnas_flat, "possible_next_actions") pnas_lengths = np.array(pnas_lengths_list, dtype=np.int32) pna_lens_blob = "pna_lens_blob" workspace.FeedBlob(pna_lens_blob, pnas_lengths) possible_next_actions_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization_action, "possible_next_action_norm", False, False, ) state_pnas_blob = preprocessor.concat_states_and_possible_actions( next_state_matrix, possible_next_actions_matrix, pna_lens_blob) workspace.RunNetOnce(net) states_ndarray = workspace.FetchBlob(state_matrix) actions_ndarray = workspace.FetchBlob(action_matrix) next_states_ndarray = workspace.FetchBlob(next_state_matrix) next_actions_ndarray = workspace.FetchBlob(next_action_matrix) possible_next_actions_ndarray = workspace.FetchBlob( possible_next_actions_matrix) next_state_pnas_concat = workspace.FetchBlob(state_pnas_blob) time_diffs = np.ones(len(states_ndarray)) episode_values = None if samples.reward_timelines is not None: episode_values = np.zeros(rewards.shape, dtype=np.float32) for i, reward_timeline in enumerate(samples.reward_timelines): for time_diff, reward in reward_timeline.items(): episode_values[i, 0] += reward * (DISCOUNT**time_diff) tdps = [] pnas_start = 0 for start in range(0, states_ndarray.shape[0], minibatch_size): end = start + minibatch_size if end > states_ndarray.shape[0]: break pnas_end = pnas_start + np.sum(pnas_lengths[start:end]) pnas = possible_next_actions_ndarray[pnas_start:pnas_end] pnas_start = pnas_end tdps.append( TrainingDataPage( states=states_ndarray[start:end], actions=actions_ndarray[start:end], propensities=propensities[start:end], rewards=rewards[start:end], next_states=next_states_ndarray[start:end], next_actions=next_actions_ndarray[start:end], possible_next_actions=StackedArray(pnas_lengths[start:end], pnas), not_terminals=(pnas_lengths[start:end] > 0).reshape(-1, 1), episode_values=episode_values[start:end] if episode_values is not None else None, time_diffs=time_diffs[start:end], possible_next_actions_lengths=pnas_lengths[start:end], next_state_pnas_concat=next_state_pnas_concat, )) return tdps
def preprocess_samples( self, states: List[Dict[int, float]], actions: List[Dict[int, float]], rewards: List[float], next_states: List[Dict[int, float]], next_actions: List[Dict[int, float]], is_terminals: List[bool], possible_next_actions: List[List[Dict[int, float]]], reward_timelines: List[Dict[int, float]], minibatch_size: int, ) -> List[TrainingDataPage]: # Shuffle merged = list( zip(states, actions, rewards, next_states, next_actions, is_terminals, possible_next_actions, reward_timelines)) random.shuffle(merged) states, actions, rewards, next_states, next_actions, is_terminals, \ possible_next_actions, reward_timelines = zip(*merged) net = core.Net('gridworld_preprocessing') C2.set_net(net) preprocessor = PreprocessorNet(net, True) saa = StackedAssociativeArray.from_dict_list(states, 'states') state_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization, 'state_norm', ) saa = StackedAssociativeArray.from_dict_list(next_states, 'next_states') next_state_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization, 'next_state_norm', ) saa = StackedAssociativeArray.from_dict_list(actions, 'action') action_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization_action, 'action_norm', ) saa = StackedAssociativeArray.from_dict_list(next_actions, 'next_action') next_action_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization_action, 'next_action_norm', ) rewards = np.array(rewards, dtype=np.float32).reshape(-1, 1) pnas_lengths_list = [] pnas_flat = [] for pnas in possible_next_actions: pnas_lengths_list.append(len(pnas)) pnas_flat.extend(pnas) saa = StackedAssociativeArray.from_dict_list(pnas_flat, 'possible_next_actions') pnas_lengths = np.array(pnas_lengths_list, dtype=np.int32) possible_next_actions_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization_action, 'possible_next_action_norm', ) workspace.RunNetOnce(net) states_ndarray = workspace.FetchBlob(state_matrix) actions_ndarray = workspace.FetchBlob(action_matrix) next_states_ndarray = workspace.FetchBlob(next_state_matrix) next_actions_ndarray = workspace.FetchBlob(next_action_matrix) possible_next_actions_ndarray = workspace.FetchBlob( possible_next_actions_matrix) tdps = [] pnas_start = 0 for start in range(0, states_ndarray.shape[0], minibatch_size): end = start + minibatch_size if end > states_ndarray.shape[0]: break pnas_end = pnas_start + np.sum(pnas_lengths[start:end]) pnas = possible_next_actions_ndarray[pnas_start:pnas_end] pnas_start = pnas_end tdps.append( TrainingDataPage( states=states_ndarray[start:end], actions=actions_ndarray[start:end], rewards=rewards[start:end], next_states=next_states_ndarray[start:end], next_actions=next_actions_ndarray[start:end], possible_next_actions=StackedArray(pnas_lengths[start:end], pnas), not_terminals=(pnas_lengths[start:end] > 0).reshape(-1, 1), reward_timelines=reward_timelines[start:end] if reward_timelines else None, )) return tdps
def test_prepare_normalization_and_normalize(self): feature_value_map = read_data() normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( name, values, 10, feature_type=self._feature_type_override(name) ) for k, v in normalization_parameters.items(): if id_to_type(k) == CONTINUOUS: self.assertEqual(v.feature_type, CONTINUOUS) self.assertIs(v.boxcox_lambda, None) self.assertIs(v.boxcox_shift, None) elif id_to_type(k) == BOXCOX: self.assertEqual(v.feature_type, BOXCOX) self.assertIsNot(v.boxcox_lambda, None) self.assertIsNot(v.boxcox_shift, None) else: assert v.feature_type == id_to_type(k) sorted_features, _ = sort_features_by_normalization(normalization_parameters) norm_net = core.Net("net") C2.set_net(norm_net) preprocessor = PreprocessorNet() input_matrix = np.zeros([10000, len(sorted_features)], dtype=np.float32) for i, feature in enumerate(sorted_features): input_matrix[:, i] = feature_value_map[feature] input_matrix_blob = "input_matrix_blob" workspace.FeedBlob(input_matrix_blob, np.array([], dtype=np.float32)) output_blob, _ = preprocessor.normalize_dense_matrix( input_matrix_blob, sorted_features, normalization_parameters, "", False ) workspace.FeedBlob(input_matrix_blob, input_matrix) workspace.RunNetOnce(norm_net) normalized_feature_matrix = workspace.FetchBlob(output_blob) normalized_features = {} on_column = 0 for feature in sorted_features: norm = normalization_parameters[feature] if norm.feature_type == ENUM: column_size = len(norm.possible_values) else: column_size = 1 normalized_features[feature] = normalized_feature_matrix[ :, on_column : (on_column + column_size) ] on_column += column_size self.assertTrue( all( [ np.isfinite(parameter.stddev) and np.isfinite(parameter.mean) for parameter in normalization_parameters.values() ] ) ) for k, v in six.iteritems(normalized_features): self.assertTrue(np.all(np.isfinite(v))) feature_type = normalization_parameters[k].feature_type if feature_type == identify_types.PROBABILITY: sigmoidv = special.expit(v) self.assertTrue( np.all( np.logical_and(np.greater(sigmoidv, 0), np.less(sigmoidv, 1)) ) ) elif feature_type == identify_types.ENUM: possible_values = normalization_parameters[k].possible_values self.assertEqual(v.shape[0], len(feature_value_map[k])) self.assertEqual(v.shape[1], len(possible_values)) possible_value_map = {} for i, possible_value in enumerate(possible_values): possible_value_map[possible_value] = i for i, row in enumerate(v): original_feature = feature_value_map[k][i] self.assertEqual( possible_value_map[original_feature], np.where(row == 1)[0][0] ) elif feature_type == identify_types.QUANTILE: for i, feature in enumerate(v[0]): original_feature = feature_value_map[k][i] expected = NumpyFeatureProcessor.value_to_quantile( original_feature, normalization_parameters[k].quantiles ) self.assertAlmostEqual(feature, expected, 2) elif feature_type == identify_types.BINARY: pass elif ( feature_type == identify_types.CONTINUOUS or feature_type == identify_types.BOXCOX ): one_stddev = np.isclose(np.std(v, ddof=1), 1, atol=0.01) zero_stddev = np.isclose(np.std(v, ddof=1), 0, atol=0.01) zero_mean = np.isclose(np.mean(v), 0, atol=0.01) self.assertTrue( np.all(zero_mean), "mean of feature {} is {}, not 0".format(k, np.mean(v)), ) self.assertTrue(np.all(np.logical_or(one_stddev, zero_stddev))) elif feature_type == identify_types.CONTINUOUS_ACTION: less_than_max = v < 1 more_than_min = v > -1 self.assertTrue( np.all(less_than_max), "values are not less than 1: {}".format(v[less_than_max == False]), ) self.assertTrue( np.all(more_than_min), "values are not more than -1: {}".format(v[more_than_min == False]), ) else: raise NotImplementedError()
def export_actor( cls, trainer, state_normalization_parameters, action_feature_ids, min_action_range_tensor_serving, max_action_range_tensor_serving, model_on_gpu=False, ): """ Export caffe2 preprocessor net and pytorch actor forward pass as one caffe2 net. :param trainer DDPGTrainer :param state_normalization_parameters state NormalizationParameters :param min_action_range_tensor_serving pytorch tensor that specifies min action value for each dimension :param max_action_range_tensor_serving pytorch tensor that specifies min action value for each dimension :param state_normalization_parameters state NormalizationParameters :param model_on_gpu boolean indicating if the model is a GPU model or CPU model """ model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) parameters: List[str] = [] workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32)) input_feature_lengths = "input_feature_lengths" input_feature_keys = "input_feature_keys" input_feature_values = "input_feature_values" C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths]) C2.net().Copy(["input/float_features.keys"], [input_feature_keys]) C2.net().Copy(["input/float_features.values"], [input_feature_values]) preprocessor = PreprocessorNet() sparse_to_dense_processor = Caffe2SparseToDenseProcessor() sorted_features, _ = sort_features_by_normalization( state_normalization_parameters) state_dense_matrix, new_parameters = sparse_to_dense_processor( sorted_features, StackedAssociativeArray(input_feature_lengths, input_feature_keys, input_feature_values), ) parameters.extend(new_parameters) state_normalized_dense_matrix, new_parameters = preprocessor.normalize_dense_matrix( state_dense_matrix, sorted_features, state_normalization_parameters, "state_norm", False, ) parameters.extend(new_parameters) torch_init_net, torch_predict_net, new_parameters, actor_input_blob, actor_output_blob, min_action_training_blob, max_action_training_blob, min_action_serving_blob, max_action_serving_blob = DDPGPredictor.generate_train_net( trainer, model, min_action_range_tensor_serving, max_action_range_tensor_serving, model_on_gpu, ) parameters.extend(new_parameters) net.Copy([state_normalized_dense_matrix], [actor_input_blob]) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(torch_init_net) net.AppendNet(torch_predict_net) # Scale actors actions from [-1, 1] to serving range prev_range = C2.Sub(max_action_training_blob, min_action_training_blob) new_range = C2.Sub(max_action_serving_blob, min_action_serving_blob) subtract_prev_min = C2.Sub(actor_output_blob, min_action_training_blob) div_by_prev_range = C2.Div(subtract_prev_min, prev_range) scaled_for_serving_actions = C2.Add( C2.Mul(div_by_prev_range, new_range), min_action_serving_blob) output_lengths = "output/float_features.lengths" workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill( [C2.FlattenToVec(C2.ArgMax(actor_output_blob))], [output_lengths], value=trainer.actor.layers[-1].out_features, dtype=caffe2_pb2.TensorProto.INT32, ) action_feature_ids_blob = C2.NextBlob("action_feature_ids") workspace.FeedBlob(action_feature_ids_blob, np.array(action_feature_ids, dtype=np.int64)) parameters.append(action_feature_ids_blob) output_keys = "output/float_features.keys" workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64)) num_examples, _ = C2.Reshape(C2.Size("input/float_features.lengths"), shape=[1]) C2.net().Tile([action_feature_ids_blob, num_examples], [output_keys], axis=0) output_values = "output/float_features.values" workspace.FeedBlob(output_values, np.zeros(1, dtype=np.float32)) C2.net().FlattenToVec([scaled_for_serving_actions], [output_values]) workspace.CreateNet(net) return DDPGPredictor(net, torch_init_net, parameters)
def export(cls, trainer, actions, state_normalization_parameters, int_features=False): """ Creates a DiscreteActionPredictor from a DiscreteActionTrainer. :param trainer DiscreteActionTrainer :param actions list of action names :param state_normalization_parameters state NormalizationParameters :param int_features boolean indicating if int features blob will be present """ model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) workspace.FeedBlob("input/image", np.zeros([1, 1, 1, 1], dtype=np.int32)) workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32)) input_feature_lengths = C2.NextBlob("input_feature_lengths") input_feature_keys = C2.NextBlob("input_feature_keys") input_feature_values = C2.NextBlob("input_feature_values") if int_features: workspace.FeedBlob("input/int_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32)) C2.net().Cast( ["input/int_features.values"], ["input/int_features.values_float"], dtype=caffe2_pb2.TensorProto.FLOAT, ) C2.net().MergeMultiScalarFeatureTensors( [ "input/float_features.lengths", "input/float_features.keys", "input/float_features.values", "input/int_features.lengths", "input/int_features.keys", "input/int_features.values_float", ], [ input_feature_lengths, input_feature_keys, input_feature_values ], ) else: C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths]) C2.net().Copy(["input/float_features.keys"], [input_feature_keys]) C2.net().Copy(["input/float_features.values"], [input_feature_values]) parameters = [] if state_normalization_parameters is not None: preprocessor = PreprocessorNet(True) normalized_dense_matrix, new_parameters = preprocessor.normalize_sparse_matrix( input_feature_lengths, input_feature_keys, input_feature_values, state_normalization_parameters, "state_norm", False, False, ) parameters.extend(new_parameters) else: # Image input. Note: Currently this does the wrong thing if # more than one image is passed at a time. normalized_dense_matrix = "input/image" new_parameters, q_values = RLPredictor._forward_pass( model, trainer, normalized_dense_matrix, actions) parameters.extend(new_parameters) # Get 1 x n action index tensor under the max_q policy max_q_act_idxs = "max_q_policy_actions" C2.net().Flatten([C2.ArgMax(q_values)], [max_q_act_idxs], axis=0) shape_of_num_of_states = "num_states_shape" C2.net().FlattenToVec([max_q_act_idxs], [shape_of_num_of_states]) num_states, _ = C2.Reshape(C2.Size(shape_of_num_of_states), shape=[1]) # Get 1 x n action index tensor under the softmax policy temperature = C2.NextBlob("temperature") parameters.append(temperature) workspace.FeedBlob( temperature, np.array([trainer.rl_temperature], dtype=np.float32)) tempered_q_values = C2.Div(q_values, temperature, broadcast=1) softmax_values = C2.Softmax(tempered_q_values) softmax_act_idxs_nested = "softmax_act_idxs_nested" C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested]) softmax_act_idxs = "softmax_policy_actions" C2.net().Flatten([softmax_act_idxs_nested], [softmax_act_idxs], axis=0) action_names = C2.NextBlob("action_names") parameters.append(action_names) workspace.FeedBlob(action_names, np.array(actions)) # Concat action index tensors to get 2 x n tensor - [[max_q], [softmax]] # transpose & flatten to get [a1_maxq, a1_softmax, a2_maxq, a2_softmax, ...] max_q_act_blob = C2.Cast(max_q_act_idxs, to=caffe2_pb2.TensorProto.INT32) softmax_act_blob = C2.Cast(softmax_act_idxs, to=caffe2_pb2.TensorProto.INT32) C2.net().Append([max_q_act_blob, softmax_act_blob], [max_q_act_blob]) transposed_action_idxs = C2.Transpose(max_q_act_blob) flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs) output_values = "output/string_single_categorical_features.values" workspace.FeedBlob(output_values, np.zeros(1, dtype=np.int64)) C2.net().Gather([action_names, flat_transposed_action_idxs], [output_values]) output_lengths = "output/string_single_categorical_features.lengths" workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill( [shape_of_num_of_states], [output_lengths], value=2, dtype=caffe2_pb2.TensorProto.INT32, ) output_keys = "output/string_single_categorical_features.keys" workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64)) output_keys_tensor, _ = C2.Concat( C2.ConstantFill(shape=[1, 1], value=0, dtype=caffe2_pb2.TensorProto.INT64), C2.ConstantFill(shape=[1, 1], value=1, dtype=caffe2_pb2.TensorProto.INT64), axis=0, ) output_key_tile = C2.Tile(output_keys_tensor, num_states, axis=0) C2.net().FlattenToVec([output_key_tile], [output_keys]) workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(net) return DiscreteActionPredictor(net, parameters, int_features)
def preprocess_samples_discrete( self, samples: Samples, minibatch_size: int ) -> List[TrainingDataPage]: samples.shuffle() net = core.Net("gridworld_preprocessing") C2.set_net(net) preprocessor = PreprocessorNet(True) saa = StackedAssociativeArray.from_dict_list(samples.states, "states") state_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization, "state_norm", False, False, ) saa = StackedAssociativeArray.from_dict_list(samples.next_states, "next_states") next_state_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization, "next_state_norm", False, False, ) workspace.RunNetOnce(net) actions_one_hot = np.zeros( [len(samples.actions), len(self.ACTIONS)], dtype=np.float32 ) for i, action in enumerate(samples.actions): actions_one_hot[i, self.action_to_index(action)] = 1 rewards = np.array(samples.rewards, dtype=np.float32).reshape(-1, 1) propensities = np.array(samples.propensities, dtype=np.float32).reshape(-1, 1) next_actions_one_hot = np.zeros( [len(samples.next_actions), len(self.ACTIONS)], dtype=np.float32 ) for i, action in enumerate(samples.next_actions): if action == "": continue next_actions_one_hot[i, self.action_to_index(action)] = 1 possible_next_actions_mask = [] for pna in samples.possible_next_actions: pna_mask = [0] * self.num_actions for action in pna: pna_mask[self.action_to_index(action)] = 1 possible_next_actions_mask.append(pna_mask) possible_next_actions_mask = np.array( possible_next_actions_mask, dtype=np.float32 ) is_terminals = np.array(samples.is_terminal, dtype=np.bool).reshape(-1, 1) not_terminals = np.logical_not(is_terminals) episode_values = None if samples.reward_timelines is not None: episode_values = np.zeros(rewards.shape, dtype=np.float32) for i, reward_timeline in enumerate(samples.reward_timelines): for time_diff, reward in reward_timeline.items(): episode_values[i, 0] += reward * (DISCOUNT ** time_diff) states_ndarray = workspace.FetchBlob(state_matrix) next_states_ndarray = workspace.FetchBlob(next_state_matrix) time_diffs = np.ones(len(states_ndarray)) tdps = [] for start in range(0, states_ndarray.shape[0], minibatch_size): end = start + minibatch_size if end > states_ndarray.shape[0]: break tdps.append( TrainingDataPage( states=states_ndarray[start:end], actions=actions_one_hot[start:end], propensities=propensities[start:end], rewards=rewards[start:end], next_states=next_states_ndarray[start:end], not_terminals=not_terminals[start:end], next_actions=next_actions_one_hot[start:end], possible_next_actions=possible_next_actions_mask[start:end], episode_values=episode_values[start:end] if episode_values is not None else None, time_diffs=time_diffs[start:end], ) ) return tdps
def export_critic(cls, trainer, state_normalization_parameters, action_normalization_parameters, int_features=False): """Export caffe2 preprocessor net and pytorch critic forward pass as one caffe2 net. :param trainer DDPGTrainer :param state_normalization_parameters state NormalizationParameters :param action_normalization_parameters action NormalizationParameters :param int_features boolean indicating if int features blob will be present """ input_dim =\ len(state_normalization_parameters) + len(action_normalization_parameters) buffer = PytorchCaffe2Converter.pytorch_net_to_buffer( trainer.critic, input_dim) critic_input_blob, critic_output_blob, caffe2_netdef =\ PytorchCaffe2Converter.buffer_to_caffe2_netdef(buffer) torch_workspace = caffe2_netdef.workspace parameters = [] for blob_str in torch_workspace.Blobs(): workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str)) parameters.append(blob_str) torch_init_net = core.Net(caffe2_netdef.init_net) torch_predict_net = core.Net(caffe2_netdef.predict_net) model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) workspace.FeedBlob('input/float_features.lengths', np.zeros(1, dtype=np.int32)) workspace.FeedBlob('input/float_features.keys', np.zeros(1, dtype=np.int64)) workspace.FeedBlob('input/float_features.values', np.zeros(1, dtype=np.float32)) input_feature_lengths = 'input_feature_lengths' input_feature_keys = 'input_feature_keys' input_feature_values = 'input_feature_values' if int_features: workspace.FeedBlob('input/int_features.lengths', np.zeros(1, dtype=np.int32)) workspace.FeedBlob('input/int_features.keys', np.zeros(1, dtype=np.int64)) workspace.FeedBlob('input/int_features.values', np.zeros(1, dtype=np.int32)) C2.net().Cast(['input/int_features.values'], ['input/int_features.values_float'], dtype=caffe2_pb2.TensorProto.FLOAT) C2.net().MergeMultiScalarFeatureTensors([ 'input/float_features.lengths', 'input/float_features.keys', 'input/float_features.values', 'input/int_features.lengths', 'input/int_features.keys', 'input/int_features.values_float' ], [ input_feature_lengths, input_feature_keys, input_feature_values ]) else: C2.net().Copy(['input/float_features.lengths'], [input_feature_lengths]) C2.net().Copy(['input/float_features.keys'], [input_feature_keys]) C2.net().Copy(['input/float_features.values'], [input_feature_values]) preprocessor = PreprocessorNet(net, True) parameters.extend(preprocessor.parameters) state_normalized_dense_matrix, new_parameters = \ preprocessor.normalize_sparse_matrix( input_feature_lengths, input_feature_keys, input_feature_values, state_normalization_parameters, 'state_norm', ) parameters.extend(new_parameters) action_normalized_dense_matrix, new_parameters = \ preprocessor.normalize_sparse_matrix( input_feature_lengths, input_feature_keys, input_feature_values, action_normalization_parameters, 'action_norm', ) parameters.extend(new_parameters) state_action_normalized = 'state_action_normalized' state_action_normalized_dim = 'state_action_normalized_dim' net.Concat( [state_normalized_dense_matrix, action_normalized_dense_matrix], [state_action_normalized, state_action_normalized_dim], axis=1) net.Copy([state_action_normalized], [critic_input_blob]) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(torch_init_net) net.AppendNet(torch_init_net) net.AppendNet(torch_predict_net) C2.FlattenToVec(C2.ArgMax(critic_output_blob)) output_lengths = 'output/float_features.lengths' workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill([C2.FlattenToVec(C2.ArgMax(critic_output_blob))], [output_lengths], value=trainer.critic.layers[-1].out_features, dtype=caffe2_pb2.TensorProto.INT32) output_keys = 'output/float_features.keys' workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int32)) C2.net().LengthsRangeFill([output_lengths], [output_keys]) output_values = 'output/float_features.values' workspace.FeedBlob(output_values, np.zeros(1, dtype=np.float32)) C2.net().FlattenToVec([critic_output_blob], [output_values]) workspace.CreateNet(net) return DDPGPredictor(net, parameters, int_features)
def from_trainers(cls, trainer, state_features, action_features, state_normalization_parameters, action_normalization_parameters): """ Creates DiscreteActionPredictor from a list of action trainers :param trainer DiscreteActionTrainer :param state_features list of state feature names :param action_features list of action feature names """ # ensure state and action IDs have no intersection assert (len(set(state_features) & set(action_features)) == 0) normalization_parameters = dict( list(state_normalization_parameters.items()) + list(action_normalization_parameters.items())) features = state_features + action_features int_features = [int(feature) for feature in features] inputs = [ 'input/float_features.lengths', 'input/float_features.keys', 'input/float_features.values' ] workspace.FeedBlob('input/float_features.lengths', np.zeros(1, dtype=np.int32)) workspace.FeedBlob('input/float_features.keys', np.zeros(1, dtype=np.int64)) workspace.FeedBlob('input/float_features.values', np.zeros(1, dtype=np.float32)) model = model_helper.ModelHelper(name="predictor") net = model.net dense_input = net.NextBlob('dense_input') default_input_value = net.NextBlob('default_input_value') net.GivenTensorFill([], [default_input_value], shape=[], values=[MISSING_VALUE]) net.SparseToDenseMask([ 'input/float_features.keys', 'input/float_features.values', default_input_value, 'input/float_features.lengths', ], [dense_input], mask=int_features) for i, feature in enumerate(features): net.Slice( [dense_input], [feature], starts=[0, i], ends=[-1, (i + 1)], ) normalizer = PreprocessorNet(net, True) parameters = list(normalizer.parameters[:]) normalized_input_blobs = [] zero = "ZERO_from_trainers" workspace.FeedBlob(zero, np.array(0)) parameters.append(zero) for feature in features: normalized_input_blob, blob_parameters = normalizer.preprocess_blob( feature, normalization_parameters[feature], ) parameters.extend(blob_parameters) normalized_input_blobs.append(normalized_input_blob) concatenated_input_blob = "PredictorInput" output_dim = "PredictorOutputDim" for i, inp in enumerate(normalized_input_blobs): logger.info("input# {}: {}".format(i, inp)) net.Concat(normalized_input_blobs, [concatenated_input_blob, output_dim], axis=1) net.NanCheck(concatenated_input_blob, concatenated_input_blob) q_lengths = "output/string_weighted_multi_categorical_features.values.lengths" workspace.FeedBlob(q_lengths, np.array([1], dtype=np.int32)) q_keys = "output/string_weighted_multi_categorical_features.values.keys" workspace.FeedBlob(q_keys, np.array(['a'])) q_values_matrix = net.NextBlob('q_values_matrix') trainer.build_predictor(model, concatenated_input_blob, q_values_matrix) parameters.extend(model.GetAllParams()) q_values = 'output/string_weighted_multi_categorical_features.values.values' workspace.FeedBlob(q_values, np.array([1.0])) net.FlattenToVec([q_values_matrix], [q_values]) net.ConstantFill([q_values], [q_keys], value="Q", dtype=caffe2_pb2.TensorProto.STRING) net.ConstantFill([q_values], [q_lengths], value=1, dtype=caffe2_pb2.TensorProto.INT32) q_feature_lengths = "output/string_weighted_multi_categorical_features.lengths" workspace.FeedBlob(q_feature_lengths, np.array([1], dtype=np.int32)) net.ConstantFill([q_values], [q_feature_lengths], value=1, dtype=caffe2_pb2.TensorProto.INT32) q_feature_keys = "output/string_weighted_multi_categorical_features.keys" workspace.FeedBlob(q_feature_keys, np.array([0], dtype=np.int64)) net.ConstantFill([q_values], [q_feature_keys], value=0, dtype=caffe2_pb2.TensorProto.INT64) output_blobs = [ q_feature_lengths, q_feature_keys, q_lengths, q_keys, q_values, ] workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(net) predictor = cls(net, inputs, output_blobs, parameters, workspace.CurrentWorkspace()) return predictor
def export( cls, trainer, actions, state_normalization_parameters, int_features=False, model_on_gpu=False, set_missing_value_to_zero=False, ): """Export caffe2 preprocessor net and pytorch DQN forward pass as one caffe2 net. :param trainer DQNTrainer :param state_normalization_parameters state NormalizationParameters :param int_features boolean indicating if int features blob will be present :param model_on_gpu boolean indicating if the model is a GPU model or CPU model """ input_dim = trainer.num_features q_network = ( trainer.q_network.module if isinstance(trainer.q_network, DataParallel) else trainer.q_network ) buffer = PytorchCaffe2Converter.pytorch_net_to_buffer( q_network, input_dim, model_on_gpu ) qnet_input_blob, qnet_output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef( buffer ) torch_workspace = caffe2_netdef.workspace parameters = torch_workspace.Blobs() for blob_str in parameters: workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str)) torch_init_net = core.Net(caffe2_netdef.init_net) torch_predict_net = core.Net(caffe2_netdef.predict_net) logger.info("Generated ONNX predict net:") logger.info(str(torch_predict_net.Proto())) # While converting to metanetdef, the external_input of predict_net # will be recomputed. Add the real output of init_net to parameters # to make sure they will be counted. parameters.extend( set(caffe2_netdef.init_net.external_output) - set(caffe2_netdef.init_net.external_input) ) model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) workspace.FeedBlob("input/image", np.zeros([1, 1, 1, 1], dtype=np.int32)) workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32)) input_feature_lengths = "input_feature_lengths" input_feature_keys = "input_feature_keys" input_feature_values = "input_feature_values" if int_features: workspace.FeedBlob( "input/int_features.lengths", np.zeros(1, dtype=np.int32) ) workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32)) C2.net().Cast( ["input/int_features.values"], ["input/int_features.values_float"], dtype=caffe2_pb2.TensorProto.FLOAT, ) C2.net().MergeMultiScalarFeatureTensors( [ "input/float_features.lengths", "input/float_features.keys", "input/float_features.values", "input/int_features.lengths", "input/int_features.keys", "input/int_features.values_float", ], [input_feature_lengths, input_feature_keys, input_feature_values], ) else: C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths]) C2.net().Copy(["input/float_features.keys"], [input_feature_keys]) C2.net().Copy(["input/float_features.values"], [input_feature_values]) if state_normalization_parameters is not None: sorted_feature_ids = sort_features_by_normalization( state_normalization_parameters )[0] dense_matrix, new_parameters = sparse_to_dense( input_feature_lengths, input_feature_keys, input_feature_values, sorted_feature_ids, set_missing_value_to_zero=set_missing_value_to_zero, ) parameters.extend(new_parameters) preprocessor_net = PreprocessorNet() state_normalized_dense_matrix, new_parameters = preprocessor_net.normalize_dense_matrix( dense_matrix, sorted_feature_ids, state_normalization_parameters, "state_norm_", True, ) parameters.extend(new_parameters) else: # Image input. Note: Currently this does the wrong thing if # more than one image is passed at a time. state_normalized_dense_matrix = "input/image" net.Copy([state_normalized_dense_matrix], [qnet_input_blob]) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(torch_init_net) net.AppendNet(torch_predict_net) new_parameters, q_values = RLPredictor._forward_pass( model, trainer, state_normalized_dense_matrix, actions, qnet_output_blob ) parameters.extend(new_parameters) # Get 1 x n action index tensor under the max_q policy max_q_act_idxs = "max_q_policy_actions" C2.net().Flatten([C2.ArgMax(q_values)], [max_q_act_idxs], axis=0) shape_of_num_of_states = "num_states_shape" C2.net().FlattenToVec([max_q_act_idxs], [shape_of_num_of_states]) num_states, _ = C2.Reshape(C2.Size(shape_of_num_of_states), shape=[1]) # Get 1 x n action index tensor under the softmax policy temperature = C2.NextBlob("temperature") parameters.append(temperature) workspace.FeedBlob( temperature, np.array([trainer.rl_temperature], dtype=np.float32) ) tempered_q_values = C2.Div(q_values, temperature, broadcast=1) softmax_values = C2.Softmax(tempered_q_values) softmax_act_idxs_nested = "softmax_act_idxs_nested" C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested]) softmax_act_idxs = "softmax_policy_actions" C2.net().Flatten([softmax_act_idxs_nested], [softmax_act_idxs], axis=0) action_names = C2.NextBlob("action_names") parameters.append(action_names) workspace.FeedBlob(action_names, np.array(actions)) # Concat action index tensors to get 2 x n tensor - [[max_q], [softmax]] # transpose & flatten to get [a1_maxq, a1_softmax, a2_maxq, a2_softmax, ...] max_q_act_blob = C2.Cast(max_q_act_idxs, to=caffe2_pb2.TensorProto.INT32) softmax_act_blob = C2.Cast(softmax_act_idxs, to=caffe2_pb2.TensorProto.INT32) C2.net().Append([max_q_act_blob, softmax_act_blob], [max_q_act_blob]) transposed_action_idxs = C2.Transpose(max_q_act_blob) flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs) workspace.FeedBlob(OUTPUT_SINGLE_CAT_VALS_NAME, np.zeros(1, dtype=np.int64)) C2.net().Gather( [action_names, flat_transposed_action_idxs], [OUTPUT_SINGLE_CAT_VALS_NAME] ) workspace.FeedBlob(OUTPUT_SINGLE_CAT_LENGTHS_NAME, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill( [shape_of_num_of_states], [OUTPUT_SINGLE_CAT_LENGTHS_NAME], value=2, dtype=caffe2_pb2.TensorProto.INT32, ) workspace.FeedBlob(OUTPUT_SINGLE_CAT_KEYS_NAME, np.zeros(1, dtype=np.int64)) output_keys_tensor, _ = C2.Concat( C2.ConstantFill(shape=[1, 1], value=0, dtype=caffe2_pb2.TensorProto.INT64), C2.ConstantFill(shape=[1, 1], value=1, dtype=caffe2_pb2.TensorProto.INT64), axis=0, ) output_key_tile = C2.Tile(output_keys_tensor, num_states, axis=0) C2.net().FlattenToVec([output_key_tile], [OUTPUT_SINGLE_CAT_KEYS_NAME]) workspace.CreateNet(net) return DQNPredictor(net, torch_init_net, parameters, int_features)
def export( cls, trainer, actions, state_normalization_parameters, model_on_gpu=False, set_missing_value_to_zero=False, ): """Export caffe2 preprocessor net and pytorch DQN forward pass as one caffe2 net. :param trainer DQNTrainer :param state_normalization_parameters state NormalizationParameters :param model_on_gpu boolean indicating if the model is a GPU model or CPU model """ input_dim = trainer.num_features q_network = (trainer.q_network.module if isinstance( trainer.q_network, DataParallel) else trainer.q_network) buffer = PytorchCaffe2Converter.pytorch_net_to_buffer( q_network, input_dim, model_on_gpu) qnet_input_blob, qnet_output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef( buffer) torch_workspace = caffe2_netdef.workspace parameters = torch_workspace.Blobs() for blob_str in parameters: workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str)) # Remove the input blob from parameters since it's not a real # input (will be calculated by preprocessor) parameters.remove(qnet_input_blob) torch_init_net = core.Net(caffe2_netdef.init_net) torch_predict_net = core.Net(caffe2_netdef.predict_net) logger.info("Generated ONNX predict net:") logger.info(str(torch_predict_net.Proto())) # While converting to metanetdef, the external_input of predict_net # will be recomputed. Add the real output of init_net to parameters # to make sure they will be counted. parameters.extend( set(caffe2_netdef.init_net.external_output) - set(caffe2_netdef.init_net.external_input)) model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) workspace.FeedBlob("input/image", np.zeros([1, 1, 1, 1], dtype=np.int32)) workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32)) input_feature_lengths = "input_feature_lengths" input_feature_keys = "input_feature_keys" input_feature_values = "input_feature_values" C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths]) C2.net().Copy(["input/float_features.keys"], [input_feature_keys]) C2.net().Copy(["input/float_features.values"], [input_feature_values]) if state_normalization_parameters is not None: sorted_feature_ids = sort_features_by_normalization( state_normalization_parameters)[0] sparse_to_dense_processor = Caffe2SparseToDenseProcessor() dense_matrix, new_parameters = sparse_to_dense_processor( sorted_feature_ids, StackedAssociativeArray(input_feature_lengths, input_feature_keys, input_feature_values), set_missing_value_to_zero=set_missing_value_to_zero, ) parameters.extend(new_parameters) preprocessor_net = PreprocessorNet() state_normalized_dense_matrix, new_parameters = preprocessor_net.normalize_dense_matrix( dense_matrix, sorted_feature_ids, state_normalization_parameters, "state_norm_", True, ) parameters.extend(new_parameters) else: # Image input. Note: Currently this does the wrong thing if # more than one image is passed at a time. state_normalized_dense_matrix = "input/image" net.Copy([state_normalized_dense_matrix], [qnet_input_blob]) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(torch_init_net) net.AppendNet(torch_predict_net) new_parameters, q_values = RLPredictor._forward_pass( model, trainer, state_normalized_dense_matrix, actions, qnet_output_blob) parameters.extend(new_parameters) # Get 1 x n action index tensor under the max_q policy max_q_act_idxs = "max_q_policy_actions" C2.net().Flatten([C2.ArgMax(q_values)], [max_q_act_idxs], axis=0) shape_of_num_of_states = "num_states_shape" C2.net().FlattenToVec([max_q_act_idxs], [shape_of_num_of_states]) num_states, _ = C2.Reshape(C2.Size(shape_of_num_of_states), shape=[1]) # Get 1 x n action index tensor under the softmax policy temperature = C2.NextBlob("temperature") parameters.append(temperature) workspace.FeedBlob( temperature, np.array([trainer.rl_temperature], dtype=np.float32)) tempered_q_values = C2.Div(q_values, temperature, broadcast=1) softmax_values = C2.Softmax(tempered_q_values) softmax_act_idxs_nested = "softmax_act_idxs_nested" C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested]) softmax_act_idxs = "softmax_policy_actions" C2.net().Flatten([softmax_act_idxs_nested], [softmax_act_idxs], axis=0) action_names = C2.NextBlob("action_names") parameters.append(action_names) workspace.FeedBlob(action_names, np.array(actions)) # Concat action index tensors to get 2 x n tensor - [[max_q], [softmax]] # transpose & flatten to get [a1_maxq, a1_softmax, a2_maxq, a2_softmax, ...] max_q_act_blob = C2.Cast(max_q_act_idxs, to=caffe2_pb2.TensorProto.INT32) softmax_act_blob = C2.Cast(softmax_act_idxs, to=caffe2_pb2.TensorProto.INT32) C2.net().Append([max_q_act_blob, softmax_act_blob], [max_q_act_blob]) transposed_action_idxs = C2.Transpose(max_q_act_blob) flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs) workspace.FeedBlob(OUTPUT_SINGLE_CAT_VALS_NAME, np.zeros(1, dtype=np.int64)) C2.net().Gather([action_names, flat_transposed_action_idxs], [OUTPUT_SINGLE_CAT_VALS_NAME]) workspace.FeedBlob(OUTPUT_SINGLE_CAT_LENGTHS_NAME, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill( [shape_of_num_of_states], [OUTPUT_SINGLE_CAT_LENGTHS_NAME], value=2, dtype=caffe2_pb2.TensorProto.INT32, ) workspace.FeedBlob(OUTPUT_SINGLE_CAT_KEYS_NAME, np.zeros(1, dtype=np.int64)) output_keys_tensor, _ = C2.Concat( C2.ConstantFill(shape=[1, 1], value=0, dtype=caffe2_pb2.TensorProto.INT64), C2.ConstantFill(shape=[1, 1], value=1, dtype=caffe2_pb2.TensorProto.INT64), axis=0, ) output_key_tile = C2.Tile(output_keys_tensor, num_states, axis=0) C2.net().FlattenToVec([output_key_tile], [OUTPUT_SINGLE_CAT_KEYS_NAME]) workspace.CreateNet(net) return DQNPredictor(net, torch_init_net, parameters)
def export( cls, trainer, state_normalization_parameters, action_normalization_parameters, int_features=False, ): """ Creates a ContinuousActionDQNPredictor from a ContinuousActionDQNTrainer. :param trainer ContinuousActionDQNTrainer :param state_normalization_parameters state NormalizationParameters :param action_normalization_parameters action NormalizationParameters :param int_features boolean indicating if int features blob will be present """ # ensure state and action IDs have no intersection assert (len( set(state_normalization_parameters.keys()) & set(action_normalization_parameters.keys())) == 0) model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) workspace.FeedBlob('input/float_features.lengths', np.zeros(1, dtype=np.int32)) workspace.FeedBlob('input/float_features.keys', np.zeros(1, dtype=np.int64)) workspace.FeedBlob('input/float_features.values', np.zeros(1, dtype=np.float32)) input_feature_lengths = 'input_feature_lengths' input_feature_keys = 'input_feature_keys' input_feature_values = 'input_feature_values' if int_features: workspace.FeedBlob('input/int_features.lengths', np.zeros(1, dtype=np.int32)) workspace.FeedBlob('input/int_features.keys', np.zeros(1, dtype=np.int64)) workspace.FeedBlob('input/int_features.values', np.zeros(1, dtype=np.int32)) C2.net().Cast(['input/int_features.values'], ['input/int_features.values_float'], dtype=caffe2_pb2.TensorProto.FLOAT) C2.net().MergeMultiScalarFeatureTensors([ 'input/float_features.lengths', 'input/float_features.keys', 'input/float_features.values', 'input/int_features.lengths', 'input/int_features.keys', 'input/int_features.values_float' ], [ input_feature_lengths, input_feature_keys, input_feature_values ]) else: C2.net().Copy(['input/float_features.lengths'], [input_feature_lengths]) C2.net().Copy(['input/float_features.keys'], [input_feature_keys]) C2.net().Copy(['input/float_features.values'], [input_feature_values]) preprocessor = PreprocessorNet(net, True) parameters = [] parameters.extend(preprocessor.parameters) state_normalized_dense_matrix, new_parameters = \ preprocessor.normalize_sparse_matrix( input_feature_lengths, input_feature_keys, input_feature_values, state_normalization_parameters, 'state_norm', ) parameters.extend(new_parameters) action_normalized_dense_matrix, new_parameters = \ preprocessor.normalize_sparse_matrix( input_feature_lengths, input_feature_keys, input_feature_values, action_normalization_parameters, 'action_norm', ) parameters.extend(new_parameters) state_action_normalized = 'state_action_normalized' state_action_normalized_dim = 'state_action_normalized_dim' net.Concat( [state_normalized_dense_matrix, action_normalized_dense_matrix], [state_action_normalized, state_action_normalized_dim], axis=1) new_parameters, q_values = RLPredictor._forward_pass( model, trainer, state_action_normalized, ['Q'], ) parameters.extend(new_parameters) flat_q_values_key = \ 'output/string_weighted_multi_categorical_features.values.values' num_examples, _ = C2.Reshape(C2.Size(flat_q_values_key), shape=[1]) q_value_blob, _ = C2.Reshape(flat_q_values_key, shape=[1, -1]) # Get 1 x n (number of examples) action index tensor under the max_q policy max_q_act_idxs = 'max_q_policy_actions' C2.net().FlattenToVec([C2.ArgMax(q_value_blob)], [max_q_act_idxs]) max_q_act_blob = C2.Tile(max_q_act_idxs, num_examples, axis=0) # Get 1 x n (number of examples) action index tensor under the softmax policy temperature = C2.NextBlob("temperature") parameters.append(temperature) workspace.FeedBlob( temperature, np.array([trainer.rl_temperature], dtype=np.float32)) tempered_q_values = C2.Div(q_value_blob, "temperature", broadcast=1) softmax_values = C2.Softmax(tempered_q_values) softmax_act_idxs_nested = 'softmax_act_idxs_nested' C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested]) softmax_act_blob = C2.Tile(C2.FlattenToVec(softmax_act_idxs_nested), num_examples, axis=0) # Concat action idx vecs to get 2 x n tensor [[a_maxq, ..], [a_softmax, ..]] # transpose & flatten to get [a_maxq, a_softmax, a_maxq, a_softmax, ...] max_q_act_blob = C2.Cast(max_q_act_blob, to=caffe2_pb2.TensorProto.INT64) softmax_act_blob = C2.Cast(softmax_act_blob, to=caffe2_pb2.TensorProto.INT64) max_q_act_blob_nested, _ = C2.Reshape(max_q_act_blob, shape=[1, -1]) softmax_act_blob_nested, _ = C2.Reshape(softmax_act_blob, shape=[1, -1]) C2.net().Append([max_q_act_blob_nested, softmax_act_blob_nested], [max_q_act_blob_nested]) transposed_action_idxs = C2.Transpose(max_q_act_blob_nested) flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs) output_values = 'output/int_single_categorical_features.values' workspace.FeedBlob(output_values, np.zeros(1, dtype=np.int64)) C2.net().Copy([flat_transposed_action_idxs], [output_values]) output_lengths = 'output/int_single_categorical_features.lengths' workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill([flat_q_values_key], [output_lengths], value=2, dtype=caffe2_pb2.TensorProto.INT32) output_keys = 'output/int_single_categorical_features.keys' workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64)) output_keys_tensor, _ = C2.Concat( C2.ConstantFill(shape=[1, 1], value=0, dtype=caffe2_pb2.TensorProto.INT64), C2.ConstantFill(shape=[1, 1], value=1, dtype=caffe2_pb2.TensorProto.INT64), axis=0, ) output_key_tile = C2.Tile(output_keys_tensor, num_examples, axis=0) C2.net().FlattenToVec([output_key_tile], [output_keys]) workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(net) return ContinuousActionDQNPredictor(net, parameters, int_features)
def benchmark(num_forward_passes): """ Benchmark preprocessor speeds: 1 - PyTorch 2 - PyTorch -> ONNX -> C2 3 - C2 """ feature_value_map = gen_data( num_binary_features=10, num_boxcox_features=10, num_continuous_features=10, num_enum_features=10, num_prob_features=10, num_quantile_features=10, ) normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( name, values, 10 ) sorted_features, _ = sort_features_by_normalization(normalization_parameters) # Dummy input input_matrix = np.zeros([10000, len(sorted_features)], dtype=np.float32) # PyTorch Preprocessor pytorch_preprocessor = Preprocessor(normalization_parameters, False) for i, feature in enumerate(sorted_features): input_matrix[:, i] = feature_value_map[feature] #################### time pytorch ############################ start = time.time() for _ in range(NUM_FORWARD_PASSES): _ = pytorch_preprocessor.forward(input_matrix) end = time.time() logger.info( "PyTorch: {} forward passes done in {} seconds".format( NUM_FORWARD_PASSES, end - start ) ) ################ time pytorch -> ONNX -> caffe2 #################### buffer = PytorchCaffe2Converter.pytorch_net_to_buffer( pytorch_preprocessor, len(sorted_features), False ) input_blob, output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef( buffer ) torch_workspace = caffe2_netdef.workspace parameters = torch_workspace.Blobs() for blob_str in parameters: workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str)) torch_init_net = core.Net(caffe2_netdef.init_net) torch_predict_net = core.Net(caffe2_netdef.predict_net) input_matrix_blob = "input_matrix_blob" workspace.FeedBlob(input_blob, input_matrix) workspace.RunNetOnce(torch_init_net) start = time.time() for _ in range(NUM_FORWARD_PASSES): workspace.RunNetOnce(torch_predict_net) _ = workspace.FetchBlob(output_blob) end = time.time() logger.info( "PyTorch -> ONNX -> Caffe2: {} forward passes done in {} seconds".format( NUM_FORWARD_PASSES, end - start ) ) #################### time caffe2 ############################ norm_net = core.Net("net") C2.set_net(norm_net) preprocessor = PreprocessorNet() input_matrix_blob = "input_matrix_blob" workspace.FeedBlob(input_matrix_blob, np.array([], dtype=np.float32)) output_blob, _ = preprocessor.normalize_dense_matrix( input_matrix_blob, sorted_features, normalization_parameters, "", False ) workspace.FeedBlob(input_matrix_blob, input_matrix) start = time.time() for _ in range(NUM_FORWARD_PASSES): workspace.RunNetOnce(norm_net) _ = workspace.FetchBlob(output_blob) end = time.time() logger.info( "Caffe2: {} forward passes done in {} seconds".format( NUM_FORWARD_PASSES, end - start ) )
def export( cls, trainer, actions, state_normalization_parameters, int_features=False, model_on_gpu=False, ): """Export caffe2 preprocessor net and pytorch DQN forward pass as one caffe2 net. :param trainer DQNTrainer :param state_normalization_parameters state NormalizationParameters :param int_features boolean indicating if int features blob will be present :param model_on_gpu boolean indicating if the model is a GPU model or CPU model """ input_dim = trainer.num_features buffer = PytorchCaffe2Converter.pytorch_net_to_buffer( trainer.q_network, input_dim, model_on_gpu) qnet_input_blob, qnet_output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef( buffer) torch_workspace = caffe2_netdef.workspace parameters = torch_workspace.Blobs() for blob_str in parameters: workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str)) torch_init_net = core.Net(caffe2_netdef.init_net) torch_predict_net = core.Net(caffe2_netdef.predict_net) model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) workspace.FeedBlob("input/image", np.zeros([1, 1, 1, 1], dtype=np.int32)) workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32)) input_feature_lengths = "input_feature_lengths" input_feature_keys = "input_feature_keys" input_feature_values = "input_feature_values" if int_features: workspace.FeedBlob("input/int_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32)) C2.net().Cast( ["input/int_features.values"], ["input/int_features.values_float"], dtype=caffe2_pb2.TensorProto.FLOAT, ) C2.net().MergeMultiScalarFeatureTensors( [ "input/float_features.lengths", "input/float_features.keys", "input/float_features.values", "input/int_features.lengths", "input/int_features.keys", "input/int_features.values_float", ], [ input_feature_lengths, input_feature_keys, input_feature_values ], ) else: C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths]) C2.net().Copy(["input/float_features.keys"], [input_feature_keys]) C2.net().Copy(["input/float_features.values"], [input_feature_values]) if state_normalization_parameters is not None: preprocessor = PreprocessorNet(clip_anomalies=True) state_normalized_dense_matrix, new_parameters = preprocessor.normalize_sparse_matrix( input_feature_lengths, input_feature_keys, input_feature_values, state_normalization_parameters, blobname_prefix="state_norm", split_sparse_to_dense=False, split_expensive_feature_groups=False, ) parameters.extend(new_parameters) else: # Image input. Note: Currently this does the wrong thing if # more than one image is passed at a time. state_normalized_dense_matrix = "input/image" net.Copy([state_normalized_dense_matrix], [qnet_input_blob]) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(torch_init_net) net.AppendNet(torch_predict_net) new_parameters, q_values = RLPredictor._forward_pass( model, trainer, state_normalized_dense_matrix, actions, qnet_output_blob) parameters.extend(new_parameters) # Get 1 x n action index tensor under the max_q policy max_q_act_idxs = "max_q_policy_actions" C2.net().Flatten([C2.ArgMax(q_values)], [max_q_act_idxs], axis=0) shape_of_num_of_states = "num_states_shape" C2.net().FlattenToVec([max_q_act_idxs], [shape_of_num_of_states]) num_states, _ = C2.Reshape(C2.Size(shape_of_num_of_states), shape=[1]) # Get 1 x n action index tensor under the softmax policy temperature = C2.NextBlob("temperature") parameters.append(temperature) workspace.FeedBlob( temperature, np.array([trainer.rl_temperature], dtype=np.float32)) tempered_q_values = C2.Div(q_values, temperature, broadcast=1) softmax_values = C2.Softmax(tempered_q_values) softmax_act_idxs_nested = "softmax_act_idxs_nested" C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested]) softmax_act_idxs = "softmax_policy_actions" C2.net().Flatten([softmax_act_idxs_nested], [softmax_act_idxs], axis=0) action_names = C2.NextBlob("action_names") parameters.append(action_names) workspace.FeedBlob(action_names, np.array(actions)) # Concat action index tensors to get 2 x n tensor - [[max_q], [softmax]] # transpose & flatten to get [a1_maxq, a1_softmax, a2_maxq, a2_softmax, ...] max_q_act_blob = C2.Cast(max_q_act_idxs, to=caffe2_pb2.TensorProto.INT32) softmax_act_blob = C2.Cast(softmax_act_idxs, to=caffe2_pb2.TensorProto.INT32) C2.net().Append([max_q_act_blob, softmax_act_blob], [max_q_act_blob]) transposed_action_idxs = C2.Transpose(max_q_act_blob) flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs) workspace.FeedBlob(OUTPUT_SINGLE_CAT_VALS_NAME, np.zeros(1, dtype=np.int64)) C2.net().Gather([action_names, flat_transposed_action_idxs], [OUTPUT_SINGLE_CAT_VALS_NAME]) workspace.FeedBlob(OUTPUT_SINGLE_CAT_LENGTHS_NAME, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill( [shape_of_num_of_states], [OUTPUT_SINGLE_CAT_LENGTHS_NAME], value=2, dtype=caffe2_pb2.TensorProto.INT32, ) workspace.FeedBlob(OUTPUT_SINGLE_CAT_KEYS_NAME, np.zeros(1, dtype=np.int64)) output_keys_tensor, _ = C2.Concat( C2.ConstantFill(shape=[1, 1], value=0, dtype=caffe2_pb2.TensorProto.INT64), C2.ConstantFill(shape=[1, 1], value=1, dtype=caffe2_pb2.TensorProto.INT64), axis=0, ) output_key_tile = C2.Tile(output_keys_tensor, num_states, axis=0) C2.net().FlattenToVec([output_key_tile], [OUTPUT_SINGLE_CAT_KEYS_NAME]) workspace.CreateNet(net) return DQNPredictor(net, parameters, int_features)
def test_preprocessing_network(self): feature_value_map = read_data() normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( name, values, feature_type=self._feature_type_override(name) ) test_features = NumpyFeatureProcessor.preprocess( feature_value_map, normalization_parameters ) net = core.Net("PreprocessingTestNet") C2.set_net(net) preprocessor = PreprocessorNet() name_preprocessed_blob_map = {} for feature_name in feature_value_map: workspace.FeedBlob(str(feature_name), np.array([0], dtype=np.int32)) preprocessed_blob, _ = preprocessor.preprocess_blob( str(feature_name), [normalization_parameters[feature_name]] ) name_preprocessed_blob_map[feature_name] = preprocessed_blob workspace.CreateNet(net) for feature_name, feature_value in six.iteritems(feature_value_map): feature_value = np.expand_dims(feature_value, -1) workspace.FeedBlob(str(feature_name), feature_value) workspace.RunNetOnce(net) for feature_name in feature_value_map: normalized_features = workspace.FetchBlob( name_preprocessed_blob_map[feature_name] ) if feature_name != ENUM_FEATURE_ID: normalized_features = np.squeeze(normalized_features, -1) tolerance = 0.01 if feature_name == BOXCOX_FEATURE_ID: # At the limit, boxcox has some numerical instability tolerance = 0.5 non_matching = np.where( np.logical_not( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, ) ) ) self.assertTrue( np.all( np.isclose( normalized_features, test_features[feature_name], rtol=tolerance, atol=tolerance, ) ), "{} does not match: {} {}".format( feature_name, normalized_features[non_matching].tolist(), test_features[feature_name][non_matching].tolist(), ), )
def test_prepare_normalization_and_normalize(self): features, feature_value_map = preprocessing_util.read_data() normalization_parameters = {} for name, values in feature_value_map.items(): normalization_parameters[name] = normalization.identify_parameter( values, 10) for k, v in normalization_parameters.items(): if k == CONTINUOUS: self.assertEqual(v.feature_type, CONTINUOUS) self.assertIs(v.boxcox_lambda, None) self.assertIs(v.boxcox_shift, None) elif k == BOXCOX: self.assertEqual(v.feature_type, BOXCOX) self.assertIsNot(v.boxcox_lambda, None) self.assertIsNot(v.boxcox_shift, None) else: assert v.feature_type == k or v.feature_type + "_2" + k norm_net = core.Net("net") C2.set_net(norm_net) preprocessor = PreprocessorNet(norm_net, False) input_matrix = np.zeros([10000, len(features)], dtype=np.float32) for i, feature in enumerate(features): input_matrix[:, i] = feature_value_map[feature] input_matrix_blob = 'input_matrix_blob' workspace.FeedBlob(input_matrix_blob, np.array([], dtype=np.float32)) output_blob, _ = preprocessor.normalize_dense_matrix( input_matrix_blob, features, normalization_parameters, '') workspace.FeedBlob(input_matrix_blob, input_matrix) workspace.RunNetOnce(norm_net) normalized_feature_matrix = workspace.FetchBlob(output_blob) normalized_features = {} on_column = 0 for feature in features: norm = normalization_parameters[feature] if norm.feature_type == ENUM: column_size = len(norm.possible_values) else: column_size = 1 normalized_features[feature] = \ normalized_feature_matrix[:, on_column:( on_column + column_size )] on_column += column_size self.assertTrue( all([ np.isfinite(parameter.stddev) and np.isfinite(parameter.mean) for parameter in normalization_parameters.values() ])) for k, v in six.iteritems(normalized_features): self.assertTrue(np.all(np.isfinite(v))) feature_type = normalization_parameters[k].feature_type if feature_type == identify_types.PROBABILITY: sigmoidv = special.expit(v) self.assertTrue( np.all( np.logical_and(np.greater(sigmoidv, 0), np.less(sigmoidv, 1)))) elif feature_type == identify_types.ENUM: possible_values = normalization_parameters[k].possible_values self.assertEqual(v.shape[0], len(feature_value_map[k])) self.assertEqual(v.shape[1], len(possible_values)) possible_value_map = {} for i, possible_value in enumerate(possible_values): possible_value_map[possible_value] = i for i, row in enumerate(v): original_feature = feature_value_map[k][i] self.assertEqual(possible_value_map[original_feature], np.where(row == 1)[0][0]) elif feature_type == identify_types.QUANTILE: for i, feature in enumerate(v[0]): original_feature = feature_value_map[k][i] expected = self._value_to_quantile( original_feature, normalization_parameters[k].quantiles) self.assertAlmostEqual(feature, expected, 2) elif feature_type == identify_types.BINARY: pass elif feature_type == identify_types.CONTINUOUS or \ feature_type == identify_types.BOXCOX: one_stddev = np.isclose(np.std(v, ddof=1), 1, atol=0.01) zero_stddev = np.isclose(np.std(v, ddof=1), 0, atol=0.01) zero_mean = np.isclose(np.mean(v), 0, atol=0.01) self.assertTrue( np.all(zero_mean), 'mean of feature {} is {}, not 0'.format(k, np.mean(v))) self.assertTrue(np.all(np.logical_or(one_stddev, zero_stddev))) else: raise NotImplementedError()
def export_actor( cls, trainer, state_normalization_parameters, action_feature_ids, min_action_range_tensor_serving, max_action_range_tensor_serving, int_features=False, model_on_gpu=False, ): """Export caffe2 preprocessor net and pytorch actor forward pass as one caffe2 net. :param trainer DDPGTrainer :param state_normalization_parameters state NormalizationParameters :param min_action_range_tensor_serving pytorch tensor that specifies min action value for each dimension :param max_action_range_tensor_serving pytorch tensor that specifies min action value for each dimension :param state_normalization_parameters state NormalizationParameters :param int_features boolean indicating if int features blob will be present :param model_on_gpu boolean indicating if the model is a GPU model or CPU model """ model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) parameters: List[str] = [] workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32)) input_feature_lengths = "input_feature_lengths" input_feature_keys = "input_feature_keys" input_feature_values = "input_feature_values" if int_features: workspace.FeedBlob( "input/int_features.lengths", np.zeros(1, dtype=np.int32) ) workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32)) C2.net().Cast( ["input/int_features.values"], ["input/int_features.values_float"], dtype=caffe2_pb2.TensorProto.FLOAT, ) C2.net().MergeMultiScalarFeatureTensors( [ "input/float_features.lengths", "input/float_features.keys", "input/float_features.values", "input/int_features.lengths", "input/int_features.keys", "input/int_features.values_float", ], [input_feature_lengths, input_feature_keys, input_feature_values], ) else: C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths]) C2.net().Copy(["input/float_features.keys"], [input_feature_keys]) C2.net().Copy(["input/float_features.values"], [input_feature_values]) preprocessor = PreprocessorNet() sorted_features, _ = sort_features_by_normalization( state_normalization_parameters ) state_dense_matrix, new_parameters = sparse_to_dense( input_feature_lengths, input_feature_keys, input_feature_values, sorted_features, ) parameters.extend(new_parameters) state_normalized_dense_matrix, new_parameters = preprocessor.normalize_dense_matrix( state_dense_matrix, sorted_features, state_normalization_parameters, "state_norm", False, ) parameters.extend(new_parameters) torch_init_net, torch_predict_net, new_parameters, actor_input_blob, actor_output_blob, min_action_training_blob, max_action_training_blob, min_action_serving_blob, max_action_serving_blob = DDPGPredictor.generate_train_net( trainer, model, min_action_range_tensor_serving, max_action_range_tensor_serving, model_on_gpu, ) parameters.extend(new_parameters) net.Copy([state_normalized_dense_matrix], [actor_input_blob]) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(torch_init_net) net.AppendNet(torch_predict_net) # Scale actors actions from [-1, 1] to serving range prev_range = C2.Sub(max_action_training_blob, min_action_training_blob) new_range = C2.Sub(max_action_serving_blob, min_action_serving_blob) subtract_prev_min = C2.Sub(actor_output_blob, min_action_training_blob) div_by_prev_range = C2.Div(subtract_prev_min, prev_range) scaled_for_serving_actions = C2.Add( C2.Mul(div_by_prev_range, new_range), min_action_serving_blob ) output_lengths = "output/float_features.lengths" workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill( [C2.FlattenToVec(C2.ArgMax(actor_output_blob))], [output_lengths], value=trainer.actor.layers[-1].out_features, dtype=caffe2_pb2.TensorProto.INT32, ) action_feature_ids_blob = C2.NextBlob("action_feature_ids") workspace.FeedBlob( action_feature_ids_blob, np.array(action_feature_ids, dtype=np.int64) ) parameters.append(action_feature_ids_blob) output_keys = "output/float_features.keys" workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64)) num_examples, _ = C2.Reshape(C2.Size("input/float_features.lengths"), shape=[1]) C2.net().Tile([action_feature_ids_blob, num_examples], [output_keys], axis=1) output_values = "output/float_features.values" workspace.FeedBlob(output_values, np.zeros(1, dtype=np.float32)) C2.net().FlattenToVec([scaled_for_serving_actions], [output_values]) workspace.CreateNet(net) return DDPGPredictor(net, torch_init_net, parameters, int_features)
def preprocess_samples_discrete( self, states: List[Dict[int, float]], actions: List[str], rewards: List[float], next_states: List[Dict[int, float]], next_actions: List[str], is_terminals: List[bool], possible_next_actions: List[List[str]], reward_timelines: Optional[List[Dict[int, float]]], minibatch_size: int, ) -> List[TrainingDataPage]: # Shuffle if reward_timelines is None: merged = list( zip(states, actions, rewards, next_states, next_actions, is_terminals, possible_next_actions)) random.shuffle(merged) states, actions, rewards, next_states, next_actions, \ is_terminals, possible_next_actions = zip(*merged) else: merged = list( zip(states, actions, rewards, next_states, next_actions, is_terminals, possible_next_actions, reward_timelines)) random.shuffle(merged) states, actions, rewards, next_states, next_actions, \ is_terminals, possible_next_actions, reward_timelines = zip(*merged) net = core.Net('gridworld_preprocessing') C2.set_net(net) preprocessor = PreprocessorNet(net, True) saa = StackedAssociativeArray.from_dict_list(states, 'states') state_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization, 'state_norm', ) saa = StackedAssociativeArray.from_dict_list(next_states, 'next_states') next_state_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization, 'next_state_norm', ) workspace.RunNetOnce(net) actions_one_hot = np.zeros( [len(actions), len(self.ACTIONS)], dtype=np.float32) for i, action in enumerate(actions): actions_one_hot[i, self.ACTIONS.index(action)] = 1 rewards = np.array(rewards, dtype=np.float32).reshape(-1, 1) next_actions_one_hot = np.zeros( [len(next_actions), len(self.ACTIONS)], dtype=np.float32) for i, action in enumerate(next_actions): if action == '': continue next_actions_one_hot[i, self.ACTIONS.index(action)] = 1 possible_next_actions_mask = [] for pna in possible_next_actions: pna_mask = [0] * self.num_actions for action in pna: pna_mask[self.ACTIONS.index(action)] = 1 possible_next_actions_mask.append(pna_mask) possible_next_actions_mask = np.array(possible_next_actions_mask, dtype=np.float32) is_terminals = np.array(is_terminals, dtype=np.bool).reshape(-1, 1) not_terminals = np.logical_not(is_terminals) if reward_timelines is not None: reward_timelines = np.array(reward_timelines, dtype=np.object) states_ndarray = workspace.FetchBlob(state_matrix) next_states_ndarray = workspace.FetchBlob(next_state_matrix) tdps = [] for start in range(0, states_ndarray.shape[0], minibatch_size): end = start + minibatch_size if end > states_ndarray.shape[0]: break tdps.append( TrainingDataPage( states=states_ndarray[start:end], actions=actions_one_hot[start:end], rewards=rewards[start:end], next_states=next_states_ndarray[start:end], not_terminals=not_terminals[start:end], next_actions=next_actions_one_hot[start:end], possible_next_actions=possible_next_actions_mask[ start:end], reward_timelines=reward_timelines[start:end] if reward_timelines is not None else None, )) return tdps