def _sum_deterministic_policy(self, model_names, path): net = core.Net('DeterministicPolicy') C2.set_net(net) output = 'ActionProbabilities' workspace.FeedBlob(output, np.array([1.0])) model_outputs = [] for model in model_names: model_output = '{}_Output'.format(model) workspace.FeedBlob(model_output, np.array([1.0], dtype=np.float32)) model_outputs.append(model_output) max_action = C2.FlattenToVec( C2.ArgMax(C2.Transpose(C2.Sum(*model_outputs))) ) one_blob = C2.NextBlob('one') workspace.FeedBlob(one_blob, np.array([1.0], dtype=np.float32)) C2.net().SparseToDense( [ max_action, one_blob, model_outputs[0], ], [output], ) meta = PredictorExportMeta( net, [one_blob], model_outputs, [output], ) save_to_db('minidb', path, meta)
def process( self, sorted_features: List[int], sparse_data: StackedAssociativeArray, set_missing_value_to_zero: bool = False, ) -> Tuple[str, List[str]]: lengths_blob = sparse_data.lengths keys_blob = sparse_data.keys values_blob = sparse_data.values MISSING_SCALAR = C2.NextBlob("MISSING_SCALAR") missing_value = 0.0 if set_missing_value_to_zero else MISSING_VALUE workspace.FeedBlob(MISSING_SCALAR, np.array([missing_value], dtype=np.float32)) C2.net().GivenTensorFill([], [MISSING_SCALAR], shape=[], values=[missing_value]) parameters: List[str] = [MISSING_SCALAR] assert len(sorted_features) > 0, "Sorted features is empty" dense_input = C2.SparseToDenseMask(keys_blob, values_blob, MISSING_SCALAR, lengths_blob, mask=sorted_features)[0] return dense_input, parameters
def process( self, sparse_data: StackedAssociativeArray ) -> Tuple[str, str, List[str]]: lengths_blob = sparse_data.lengths keys_blob = sparse_data.keys values_blob = sparse_data.values MISSING_SCALAR = C2.NextBlob("MISSING_SCALAR") missing_value = 0.0 if self.set_missing_value_to_zero else MISSING_VALUE workspace.FeedBlob(MISSING_SCALAR, np.array([missing_value], dtype=np.float32)) C2.net().GivenTensorFill([], [MISSING_SCALAR], shape=[], values=[missing_value]) parameters: List[str] = [MISSING_SCALAR] assert len(self.sorted_features) > 0, "Sorted features is empty" dense_input = C2.NextBlob("dense_input") dense_input_presence = C2.NextBlob("dense_input_presence") C2.net().SparseToDenseMask( [keys_blob, values_blob, MISSING_SCALAR, lengths_blob], [dense_input, dense_input_presence], mask=self.sorted_features, return_presence_mask=True, ) if self.set_missing_value_to_zero: dense_input_presence = C2.And( C2.GT(dense_input, -1e-4, broadcast=1), C2.LT(dense_input, 1e-4, broadcast=1), ) return dense_input, dense_input_presence, parameters
def normalize_dense_matrix( self, input_matrix: str, features: List[str], normalization_parameters: Dict[str, NormalizationParameters], blobname_prefix: str, ) -> Tuple[str, List[str]]: """ Normalizes inputs according to parameters. Expects a dense matrix whose ith column corresponds to feature i. Note that the Caffe2 BatchBoxCox operator isn't implemented on CUDA GPU so we need to use a CPU context. :param input_matrix: Input matrix to normalize. :param features: Array that maps feature ids to column indices. :param normalization_parameters: Mapping from feature names to NormalizationParameters. :param blobname_prefix: Prefix for input blobs to norm_net. :param num_output_features: The number of features in an output processed datapoint. If set to None, this function will compute it. """ with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)): feature_starts = self._get_type_boundaries( features, normalization_parameters) normalized_input_blobs = [] parameters: List[str] = [] for i, feature_type in enumerate(FEATURE_TYPES): start_index = feature_starts[i] if (i + 1) == len(FEATURE_TYPES): end_index = len(normalization_parameters) else: end_index = feature_starts[i + 1] if start_index == end_index: continue # No features of this type sliced_input_features = self._get_input_blob( blobname_prefix, feature_type) C2.net().Slice( [input_matrix], [sliced_input_features], starts=[0, start_index], ends=[-1, end_index], ) normalized_input_blob, blob_parameters = self.preprocess_blob( sliced_input_features, [ normalization_parameters[x] for x in features[start_index:end_index] ], ) parameters.extend(blob_parameters) normalized_input_blobs.append(normalized_input_blob) for i, inp in enumerate(normalized_input_blobs): logger.info("input# {}: {}".format(i, inp)) concatenated_input_blob, concatenated_input_blob_dim = C2.Concat( *normalized_input_blobs, axis=1) concatenated_input_blob = C2.NanCheck(concatenated_input_blob) return concatenated_input_blob, parameters
def _forward_pass(cls, model, trainer, normalized_dense_matrix, actions): C2.set_model(model) parameters = [] q_values = "q_values" workspace.FeedBlob(q_values, np.zeros(1, dtype=np.float32)) trainer.build_predictor(model, normalized_dense_matrix, q_values) parameters.extend(model.GetAllParams()) action_names = C2.NextBlob("action_names") parameters.append(action_names) workspace.FeedBlob(action_names, np.array(actions)) action_range = C2.NextBlob("action_range") parameters.append(action_range) workspace.FeedBlob(action_range, np.array(list(range(len(actions))))) output_shape = C2.Shape(q_values) output_shape_row_count = C2.Slice(output_shape, starts=[0], ends=[1]) output_row_shape = C2.Slice(q_values, starts=[0, 0], ends=[-1, 1]) output_feature_keys = "output/string_weighted_multi_categorical_features.keys" workspace.FeedBlob(output_feature_keys, np.zeros(1, dtype=np.int64)) output_feature_keys_matrix = C2.ConstantFill( output_row_shape, value=0, dtype=caffe2_pb2.TensorProto.INT64) # Note: sometimes we need to use an explicit output name, so we call # C2.net().Fn(...) C2.net().FlattenToVec([output_feature_keys_matrix], [output_feature_keys]) output_feature_lengths = ( "output/string_weighted_multi_categorical_features.lengths") workspace.FeedBlob(output_feature_lengths, np.zeros(1, dtype=np.int32)) output_feature_lengths_matrix = C2.ConstantFill( output_row_shape, value=1, dtype=caffe2_pb2.TensorProto.INT32) C2.net().FlattenToVec([output_feature_lengths_matrix], [output_feature_lengths]) output_keys = "output/string_weighted_multi_categorical_features.values.keys" workspace.FeedBlob(output_keys, np.array(["a"])) C2.net().Tile([action_names, output_shape_row_count], [output_keys], axis=1) output_lengths_matrix = C2.ConstantFill( output_row_shape, value=len(actions), dtype=caffe2_pb2.TensorProto.INT32) output_lengths = ( "output/string_weighted_multi_categorical_features.values.lengths") workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().FlattenToVec([output_lengths_matrix], [output_lengths]) output_values = ( "output/string_weighted_multi_categorical_features.values.values") workspace.FeedBlob(output_values, np.array([1.0])) C2.net().FlattenToVec([q_values], [output_values]) return parameters, q_values
def _create_rl_train_net(self) -> None: self.rl_train_model = ModelHelper(name="rl_train_" + self.model_id) C2.set_model(self.rl_train_model) if self.reward_shape is not None: for action_index, boost in self.reward_shape.items(): action_boost = C2.Mul( C2.Slice( "actions", starts=[0, action_index], ends=[-1, action_index + 1] ), boost, broadcast=1, ) C2.net().Sum(["rewards", action_boost], ["rewards"]) if self.maxq_learning: next_q_values = self.get_max_q_values( "next_states", self.get_possible_next_actions(), True ) else: next_q_values = self.get_q_values("next_states", "next_actions", True) discount_blob = C2.ConstantFill("time_diff", value=self.rl_discount_rate) if self.use_seq_num_diff_as_time_diff: time_diff_adjusted_discount_blob = C2.Pow( discount_blob, C2.Cast("time_diff", to=caffe2_pb2.TensorProto.FLOAT) ) else: time_diff_adjusted_discount_blob = discount_blob q_vals_target = C2.Add( "rewards", C2.Mul( C2.Mul( C2.Cast( "not_terminals", to=caffe2_pb2.TensorProto.FLOAT ), # type: ignore time_diff_adjusted_discount_blob, broadcast=1, ), next_q_values, ), ) self.update_model("states", "actions", q_vals_target) workspace.RunNetOnce(self.rl_train_model.param_init_net) self.rl_train_model.net.Proto().num_workers = ( RLTrainer.DEFAULT_TRAINING_NUM_WORKERS ) self.rl_train_model.net.Proto().type = "async_scheduling" workspace.CreateNet(self.rl_train_model.net) C2.set_model(None)
def save_sum_deterministic_policy(model_names, path, db_type): net = core.Net("DeterministicPolicy") C2.set_net(net) output = "ActionProbabilities" workspace.FeedBlob(output, np.array([1.0])) model_outputs = [] for model in model_names: model_output = "{}_Output".format(model) workspace.FeedBlob(model_output, np.array([[1.0]], dtype=np.float32)) model_outputs.append(model_output) max_action = C2.FlattenToVec(C2.ArgMax(C2.Transpose(C2.Sum(*model_outputs)))) one_blob = C2.NextBlob("one") workspace.FeedBlob(one_blob, np.array([1.0], dtype=np.float32)) C2.net().SparseToDense([max_action, one_blob, model_outputs[0]], [output]) meta = PredictorExportMeta(net, [one_blob], model_outputs, [output]) save_to_db(db_type, path, meta)
def _create_reward_train_net(self) -> None: self.reward_train_model = ModelHelper(name="reward_train_" + self.model_id) C2.set_model(self.reward_train_model) if self.reward_shape is not None: for action_index, boost in self.reward_shape.items(): action_boost = C2.Mul( C2.Slice("actions", starts=[0, action_index], ends=[-1, action_index + 1]), boost, broadcast=1, ) C2.net().Sum(["rewards", action_boost], ["rewards"]) self.update_model("states", "actions", "rewards") workspace.RunNetOnce(self.reward_train_model.param_init_net) workspace.CreateNet(self.reward_train_model.net) C2.set_model(None)
def sparse_to_dense(lengths_blob: str, keys_blob: str, values_blob: str, sorted_features: List[int]) -> Tuple[str, List[str]]: MISSING_SCALAR = C2.NextBlob("MISSING_SCALAR") workspace.FeedBlob(MISSING_SCALAR, np.array([MISSING_VALUE], dtype=np.float32)) C2.net().GivenTensorFill([], [MISSING_SCALAR], shape=[], values=[MISSING_VALUE]) parameters: List[str] = [MISSING_SCALAR] assert len(sorted_features) > 0, "Sorted features is empty" dense_input = C2.SparseToDenseMask(keys_blob, values_blob, MISSING_SCALAR, lengths_blob, mask=sorted_features)[0] return dense_input, parameters
def _create_rl_train_net(self) -> None: self.rl_train_model = ModelHelper(name="rl_train_" + self.model_id) C2.set_model(self.rl_train_model) if self.reward_shape is not None: for action_index, boost in self.reward_shape.items(): action_boost = C2.Mul( C2.Slice( 'actions', starts=[0, action_index], ends=[-1, action_index + 1], ), boost, broadcast=1, ) C2.net().Sum(['rewards', action_boost], ['rewards']) if self.maxq_learning: next_q_values = self.get_max_q_values( 'next_states', self.get_possible_next_actions(), True, ) else: next_q_values = self.get_q_values('next_states', 'next_actions', True) q_vals_target = C2.Add( 'rewards', C2.Mul( C2.Mul( C2.Cast('not_terminals', to=caffe2_pb2.TensorProto.FLOAT), # type: ignore self.rl_discount_rate, broadcast=1, ), next_q_values)) self.update_model('states', 'actions', q_vals_target) workspace.RunNetOnce(self.rl_train_model.param_init_net) workspace.CreateNet(self.rl_train_model.net) C2.set_model(None)
def sparse_to_dense( lengths_blob: str, keys_blob: str, values_blob: str, sorted_features: List[int], set_missing_value_to_zero: bool = False, ) -> Tuple[str, List[str]]: MISSING_SCALAR = C2.NextBlob("MISSING_SCALAR") missing_value = 0.0 if set_missing_value_to_zero else MISSING_VALUE workspace.FeedBlob(MISSING_SCALAR, np.array([missing_value], dtype=np.float32)) C2.net().GivenTensorFill([], [MISSING_SCALAR], shape=[], values=[missing_value]) parameters: List[str] = [MISSING_SCALAR] assert len(sorted_features) > 0, "Sorted features is empty" dense_input = C2.SparseToDenseMask( keys_blob, values_blob, MISSING_SCALAR, lengths_blob, mask=sorted_features )[0] return dense_input, parameters
def _create_reward_train_net(self) -> None: self.reward_train_model = ModelHelper(name="reward_train_" + self.model_id) C2.set_model(self.reward_train_model) if self.reward_shape is not None: for action_index, boost in self.reward_shape.items(): action_boost = C2.Mul( C2.Slice("actions", starts=[0, action_index], ends=[-1, action_index + 1]), boost, broadcast=1, ) C2.net().Sum(["rewards", action_boost], ["rewards"]) self.update_model("states", "actions", "rewards") workspace.RunNetOnce(self.reward_train_model.param_init_net) self.reward_train_model.net.Proto().num_workers = ( RLTrainer.DEFAULT_TRAINING_NUM_WORKERS) self.reward_train_model.net.Proto().type = "async_scheduling" workspace.CreateNet(self.reward_train_model.net) C2.set_model(None)
def export( cls, trainer, state_normalization_parameters, action_normalization_parameters, int_features=False, model_on_gpu=False, ): """Export caffe2 preprocessor net and pytorch DQN forward pass as one caffe2 net. :param trainer ParametricDQNTrainer :param state_normalization_parameters state NormalizationParameters :param action_normalization_parameters action NormalizationParameters :param int_features boolean indicating if int features blob will be present :param model_on_gpu boolean indicating if the model is a GPU model or CPU model """ input_dim = trainer.num_features if isinstance(trainer.q_network, DataParallel): trainer.q_network = trainer.q_network.module buffer = PytorchCaffe2Converter.pytorch_net_to_buffer( trainer.q_network, input_dim, model_on_gpu ) qnet_input_blob, qnet_output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef( buffer ) torch_workspace = caffe2_netdef.workspace parameters = torch_workspace.Blobs() for blob_str in parameters: workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str)) torch_init_net = core.Net(caffe2_netdef.init_net) torch_predict_net = core.Net(caffe2_netdef.predict_net) # While converting to metanetdef, the external_input of predict_net # will be recomputed. Add the real output of init_net to parameters # to make sure they will be counted. parameters.extend( set(caffe2_netdef.init_net.external_output) - set(caffe2_netdef.init_net.external_input) ) # ensure state and action IDs have no intersection assert ( len( set(state_normalization_parameters.keys()) & set(action_normalization_parameters.keys()) ) == 0 ) model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32)) input_feature_lengths = "input_feature_lengths" input_feature_keys = "input_feature_keys" input_feature_values = "input_feature_values" if int_features: workspace.FeedBlob( "input/int_features.lengths", np.zeros(1, dtype=np.int32) ) workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32)) C2.net().Cast( ["input/int_features.values"], ["input/int_features.values_float"], dtype=caffe2_pb2.TensorProto.FLOAT, ) C2.net().MergeMultiScalarFeatureTensors( [ "input/float_features.lengths", "input/float_features.keys", "input/float_features.values", "input/int_features.lengths", "input/int_features.keys", "input/int_features.values_float", ], [input_feature_lengths, input_feature_keys, input_feature_values], ) else: C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths]) C2.net().Copy(["input/float_features.keys"], [input_feature_keys]) C2.net().Copy(["input/float_features.values"], [input_feature_values]) preprocessor = PreprocessorNet(True) sorted_state_features, _ = sort_features_by_normalization( state_normalization_parameters ) state_dense_matrix, new_parameters = sparse_to_dense( input_feature_lengths, input_feature_keys, input_feature_values, sorted_state_features, ) parameters.extend(new_parameters) state_normalized_dense_matrix, new_parameters = preprocessor.normalize_dense_matrix( state_dense_matrix, sorted_state_features, state_normalization_parameters, "state_norm", False, ) parameters.extend(new_parameters) sorted_action_features, _ = sort_features_by_normalization( action_normalization_parameters ) action_dense_matrix, new_parameters = sparse_to_dense( input_feature_lengths, input_feature_keys, input_feature_values, sorted_action_features, ) parameters.extend(new_parameters) action_normalized_dense_matrix, new_parameters = preprocessor.normalize_dense_matrix( action_dense_matrix, sorted_action_features, action_normalization_parameters, "action_norm", False, ) parameters.extend(new_parameters) state_action_normalized = "state_action_normalized" state_action_normalized_dim = "state_action_normalized_dim" net.Concat( [state_normalized_dense_matrix, action_normalized_dense_matrix], [state_action_normalized, state_action_normalized_dim], axis=1, ) net.Copy([state_action_normalized], [qnet_input_blob]) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(torch_init_net) net.AppendNet(torch_predict_net) new_parameters, q_values = RLPredictor._forward_pass( model, trainer, state_action_normalized, ["Q"], qnet_output_blob ) parameters.extend(new_parameters) flat_q_values_key = ( "output/string_weighted_multi_categorical_features.values.values" ) num_examples, _ = C2.Reshape(C2.Size(flat_q_values_key), shape=[1]) q_value_blob, _ = C2.Reshape(flat_q_values_key, shape=[1, -1]) # Get 1 x n (number of examples) action index tensor under the max_q policy max_q_act_idxs = "max_q_policy_actions" C2.net().FlattenToVec([C2.ArgMax(q_value_blob)], [max_q_act_idxs]) max_q_act_blob = C2.Tile(max_q_act_idxs, num_examples, axis=0) # Get 1 x n (number of examples) action index tensor under the softmax policy temperature = C2.NextBlob("temperature") parameters.append(temperature) workspace.FeedBlob( temperature, np.array([trainer.rl_temperature], dtype=np.float32) ) tempered_q_values = C2.Div(q_value_blob, temperature, broadcast=1) softmax_values = C2.Softmax(tempered_q_values) softmax_act_idxs_nested = "softmax_act_idxs_nested" C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested]) softmax_act_blob = C2.Tile( C2.FlattenToVec(softmax_act_idxs_nested), num_examples, axis=0 ) # Concat action idx vecs to get 2 x n tensor [[a_maxq, ..], [a_softmax, ..]] # transpose & flatten to get [a_maxq, a_softmax, a_maxq, a_softmax, ...] max_q_act_blob = C2.Cast(max_q_act_blob, to=caffe2_pb2.TensorProto.INT64) softmax_act_blob = C2.Cast(softmax_act_blob, to=caffe2_pb2.TensorProto.INT64) max_q_act_blob_nested, _ = C2.Reshape(max_q_act_blob, shape=[1, -1]) softmax_act_blob_nested, _ = C2.Reshape(softmax_act_blob, shape=[1, -1]) C2.net().Append( [max_q_act_blob_nested, softmax_act_blob_nested], [max_q_act_blob_nested] ) transposed_action_idxs = C2.Transpose(max_q_act_blob_nested) flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs) output_values = "output/int_single_categorical_features.values" workspace.FeedBlob(output_values, np.zeros(1, dtype=np.int64)) C2.net().Copy([flat_transposed_action_idxs], [output_values]) output_lengths = "output/int_single_categorical_features.lengths" workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill( [flat_q_values_key], [output_lengths], value=2, dtype=caffe2_pb2.TensorProto.INT32, ) output_keys = "output/int_single_categorical_features.keys" workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64)) output_keys_tensor, _ = C2.Concat( C2.ConstantFill(shape=[1, 1], value=0, dtype=caffe2_pb2.TensorProto.INT64), C2.ConstantFill(shape=[1, 1], value=1, dtype=caffe2_pb2.TensorProto.INT64), axis=0, ) output_key_tile = C2.Tile(output_keys_tensor, num_examples, axis=0) C2.net().FlattenToVec([output_key_tile], [output_keys]) workspace.CreateNet(net) return ParametricDQNPredictor(net, torch_init_net, parameters, int_features)
def normalize_sparse_matrix( self, lengths_blob: str, keys_blob: str, values_blob: str, normalization_parameters: Dict[int, NormalizationParameters], blobname_prefix: str, split_sparse_to_dense: bool, split_expensive_feature_groups: bool, normalize: bool = True, sorted_features_override: List[int] = None, ) -> Tuple[str, List[str]]: if sorted_features_override: sorted_features = sorted_features_override else: sorted_features, _ = sort_features_by_normalization( normalization_parameters) int_features = [int(feature) for feature in sorted_features] preprocess_num_batches = 8 if split_sparse_to_dense else 1 lengths_batch = [] keys_batch = [] values_batch = [] for _ in range(preprocess_num_batches): lengths_batch.append(C2.NextBlob(blobname_prefix + "_length_batch")) keys_batch.append(C2.NextBlob(blobname_prefix + "_key_batch")) values_batch.append(C2.NextBlob(blobname_prefix + "_value_batch")) C2.net().Split([lengths_blob], lengths_batch, axis=0) total_lengths_batch = [] for x in range(preprocess_num_batches): total_lengths_batch.append( C2.Reshape(C2.ReduceBackSum(lengths_batch[x], num_reduce_dims=1), shape=[1])[0]) total_lengths_batch_concat, _ = C2.Concat(*total_lengths_batch, axis=0) C2.net().Split([keys_blob, total_lengths_batch_concat], keys_batch, axis=0) C2.net().Split([values_blob, total_lengths_batch_concat], values_batch, axis=0) dense_input_fragments = [] parameters: List[str] = [] MISSING_SCALAR = self._store_parameter( parameters, "MISSING_SCALAR", np.array([MISSING_VALUE], dtype=np.float32)) C2.net().GivenTensorFill([], [MISSING_SCALAR], shape=[], values=[MISSING_VALUE]) for preprocess_batch in range(preprocess_num_batches): dense_input_fragment = C2.SparseToDenseMask( keys_batch[preprocess_batch], values_batch[preprocess_batch], MISSING_SCALAR, lengths_batch[preprocess_batch], mask=int_features, )[0] if normalize: normalized_fragment, p = self.normalize_dense_matrix( dense_input_fragment, sorted_features, normalization_parameters, blobname_prefix, split_expensive_feature_groups, ) dense_input_fragments.append(normalized_fragment) parameters.extend(p) else: dense_input_fragments.append(dense_input_fragment) dense_input = C2.NextBlob(blobname_prefix + "_dense_input") dense_input_dims = C2.NextBlob(blobname_prefix + "_dense_input_dims") C2.net().Concat(dense_input_fragments, [dense_input, dense_input_dims], axis=0) return dense_input, parameters
def export_actor(cls, trainer, state_normalization_parameters, int_features=False): """Export caffe2 preprocessor net and pytorch actor forward pass as one caffe2 net. :param trainer DDPGTrainer :param state_normalization_parameters state NormalizationParameters :param int_features boolean indicating if int features blob will be present """ input_dim = len(state_normalization_parameters) buffer = PytorchCaffe2Converter.pytorch_net_to_buffer( trainer.actor, input_dim) actor_input_blob, actor_output_blob, caffe2_netdef =\ PytorchCaffe2Converter.buffer_to_caffe2_netdef(buffer) torch_workspace = caffe2_netdef.workspace parameters = [] for blob_str in torch_workspace.Blobs(): workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str)) parameters.append(blob_str) torch_init_net = core.Net(caffe2_netdef.init_net) torch_predict_net = core.Net(caffe2_netdef.predict_net) model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) workspace.FeedBlob('input/float_features.lengths', np.zeros(1, dtype=np.int32)) workspace.FeedBlob('input/float_features.keys', np.zeros(1, dtype=np.int64)) workspace.FeedBlob('input/float_features.values', np.zeros(1, dtype=np.float32)) input_feature_lengths = 'input_feature_lengths' input_feature_keys = 'input_feature_keys' input_feature_values = 'input_feature_values' if int_features: workspace.FeedBlob('input/int_features.lengths', np.zeros(1, dtype=np.int32)) workspace.FeedBlob('input/int_features.keys', np.zeros(1, dtype=np.int64)) workspace.FeedBlob('input/int_features.values', np.zeros(1, dtype=np.int32)) C2.net().Cast(['input/int_features.values'], ['input/int_features.values_float'], dtype=caffe2_pb2.TensorProto.FLOAT) C2.net().MergeMultiScalarFeatureTensors([ 'input/float_features.lengths', 'input/float_features.keys', 'input/float_features.values', 'input/int_features.lengths', 'input/int_features.keys', 'input/int_features.values_float' ], [ input_feature_lengths, input_feature_keys, input_feature_values ]) else: C2.net().Copy(['input/float_features.lengths'], [input_feature_lengths]) C2.net().Copy(['input/float_features.keys'], [input_feature_keys]) C2.net().Copy(['input/float_features.values'], [input_feature_values]) preprocessor = PreprocessorNet(net, True) parameters.extend(preprocessor.parameters) state_normalized_dense_matrix, new_parameters = \ preprocessor.normalize_sparse_matrix( input_feature_lengths, input_feature_keys, input_feature_values, state_normalization_parameters, 'state_norm', ) parameters.extend(new_parameters) net.Copy([state_normalized_dense_matrix], [actor_input_blob]) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(torch_init_net) net.AppendNet(torch_init_net) net.AppendNet(torch_predict_net) C2.FlattenToVec(C2.ArgMax(actor_output_blob)) output_lengths = 'output/float_features.lengths' workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill([C2.FlattenToVec(C2.ArgMax(actor_output_blob))], [output_lengths], value=trainer.actor.layers[-1].out_features, dtype=caffe2_pb2.TensorProto.INT32) output_keys = 'output/float_features.keys' workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int32)) C2.net().LengthsRangeFill([output_lengths], [output_keys]) output_values = 'output/float_features.values' workspace.FeedBlob(output_values, np.zeros(1, dtype=np.float32)) C2.net().FlattenToVec([actor_output_blob], [output_values]) workspace.CreateNet(net) return DDPGPredictor(net, parameters, int_features)
def export( cls, trainer, state_normalization_parameters, action_normalization_parameters, ): """ Creates ContinuousActionDQNPredictor from a list of action trainers :param trainer ContinuousActionDQNPredictor :param state_features list of state feature names :param action_features list of action feature names """ # ensure state and action IDs have no intersection assert (len( set(state_normalization_parameters.keys()) & set(action_normalization_parameters.keys())) == 0) model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) workspace.FeedBlob('input/float_features.lengths', np.zeros(1, dtype=np.int32)) workspace.FeedBlob('input/float_features.keys', np.zeros(1, dtype=np.int32)) workspace.FeedBlob('input/float_features.values', np.zeros(1, dtype=np.float32)) preprocessor = PreprocessorNet(net, True) parameters = [] parameters.extend(preprocessor.parameters) state_normalized_dense_matrix, new_parameters = \ preprocessor.normalize_sparse_matrix( 'input/float_features.lengths', 'input/float_features.keys', 'input/float_features.values', state_normalization_parameters, 'state_norm', ) parameters.extend(new_parameters) action_normalized_dense_matrix, new_parameters = \ preprocessor.normalize_sparse_matrix( 'input/float_features.lengths', 'input/float_features.keys', 'input/float_features.values', action_normalization_parameters, 'action_norm', ) parameters.extend(new_parameters) state_action_normalized = 'state_action_normalized' state_action_normalized_dim = 'state_action_normalized_dim' net.Concat( [state_normalized_dense_matrix, action_normalized_dense_matrix], [state_action_normalized, state_action_normalized_dim], axis=1) new_parameters, q_values = RLPredictor._forward_pass( model, trainer, state_action_normalized, ['Q'], ) parameters.extend(new_parameters) flat_q_values_key = \ 'output/string_weighted_multi_categorical_features.values.values' num_examples, _ = C2.Reshape(C2.Size(flat_q_values_key), shape=[1]) q_value_blob, _ = C2.Reshape(flat_q_values_key, shape=[1, -1]) # Get 1 x n (number of examples) action index tensor under the max_q policy max_q_act_idxs = 'max_q_policy_actions' C2.net().FlattenToVec([C2.ArgMax(q_value_blob)], [max_q_act_idxs]) max_q_act_blob = C2.Tile(max_q_act_idxs, num_examples, axis=0) # Get 1 x n (number of examples) action index tensor under the softmax policy temperature = C2.NextBlob("temperature") parameters.append(temperature) workspace.FeedBlob( temperature, np.array([trainer.rl_temperature], dtype=np.float32)) tempered_q_values = C2.Div(q_value_blob, "temperature", broadcast=1) softmax_values = C2.Softmax(tempered_q_values) softmax_act_idxs_nested = 'softmax_act_idxs_nested' C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested]) softmax_act_blob = C2.Tile(C2.FlattenToVec(softmax_act_idxs_nested), num_examples, axis=0) # Concat action idx vecs to get 2 x n tensor [[a_maxq, ..], [a_softmax, ..]] # transpose & flatten to get [a_maxq, a_softmax, a_maxq, a_softmax, ...] max_q_act_blob = C2.Cast(max_q_act_blob, to=caffe2_pb2.TensorProto.INT64) softmax_act_blob = C2.Cast(softmax_act_blob, to=caffe2_pb2.TensorProto.INT64) max_q_act_blob_nested, _ = C2.Reshape(max_q_act_blob, shape=[1, -1]) softmax_act_blob_nested, _ = C2.Reshape(softmax_act_blob, shape=[1, -1]) C2.net().Append([max_q_act_blob_nested, softmax_act_blob_nested], [max_q_act_blob_nested]) transposed_action_idxs = C2.Transpose(max_q_act_blob_nested) flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs) output_values = 'output/int_single_categorical_features.values' workspace.FeedBlob(output_values, np.zeros(1, dtype=np.int64)) C2.net().Copy([flat_transposed_action_idxs], [output_values]) output_lengths = 'output/int_single_categorical_features.lengths' workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill([flat_q_values_key], [output_lengths], value=2, dtype=caffe2_pb2.TensorProto.INT32) output_keys = 'output/int_single_categorical_features.keys' workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64)) output_keys_tensor, _ = C2.Concat( C2.ConstantFill(shape=[1, 1], value=0, dtype=caffe2_pb2.TensorProto.INT64), C2.ConstantFill(shape=[1, 1], value=1, dtype=caffe2_pb2.TensorProto.INT64), axis=0, ) output_key_tile = C2.Tile(output_keys_tensor, num_examples, axis=0) C2.net().FlattenToVec([output_key_tile], [output_keys]) workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(net) return ContinuousActionDQNPredictor(net, parameters)
def _forward_pass( cls, model, trainer, normalized_dense_matrix, actions, qnet_output_blob ): C2.set_model(model) parameters = [] q_values = "q_values" C2.net().Copy([qnet_output_blob], [q_values]) action_names = C2.NextBlob("action_names") parameters.append(action_names) workspace.FeedBlob(action_names, np.array(actions)) action_range = C2.NextBlob("action_range") parameters.append(action_range) workspace.FeedBlob(action_range, np.array(list(range(len(actions))))) output_shape = C2.Shape(q_values) output_shape_row_count = C2.Slice(output_shape, starts=[0], ends=[1]) output_row_shape = C2.Slice(q_values, starts=[0, 0], ends=[-1, 1]) output_feature_keys = "output/string_weighted_multi_categorical_features.keys" workspace.FeedBlob(output_feature_keys, np.zeros(1, dtype=np.int64)) output_feature_keys_matrix = C2.ConstantFill( output_row_shape, value=0, dtype=caffe2_pb2.TensorProto.INT64 ) # Note: sometimes we need to use an explicit output name, so we call # C2.net().Fn(...) C2.net().FlattenToVec([output_feature_keys_matrix], [output_feature_keys]) output_feature_lengths = ( "output/string_weighted_multi_categorical_features.lengths" ) workspace.FeedBlob(output_feature_lengths, np.zeros(1, dtype=np.int32)) output_feature_lengths_matrix = C2.ConstantFill( output_row_shape, value=1, dtype=caffe2_pb2.TensorProto.INT32 ) C2.net().FlattenToVec([output_feature_lengths_matrix], [output_feature_lengths]) output_keys = "output/string_weighted_multi_categorical_features.values.keys" workspace.FeedBlob(output_keys, np.array(["a"])) C2.net().Tile([action_names, output_shape_row_count], [output_keys], axis=1) output_lengths_matrix = C2.ConstantFill( output_row_shape, value=len(actions), dtype=caffe2_pb2.TensorProto.INT32 ) output_lengths = ( "output/string_weighted_multi_categorical_features.values.lengths" ) workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().FlattenToVec([output_lengths_matrix], [output_lengths]) output_values = ( "output/string_weighted_multi_categorical_features.values.values" ) workspace.FeedBlob(output_values, np.array([1.0])) C2.net().FlattenToVec([q_values], [output_values]) return parameters, q_values
def export( cls, trainer, actions, state_normalization_parameters, int_features=False, model_on_gpu=False, set_missing_value_to_zero=False, ): """Export caffe2 preprocessor net and pytorch DQN forward pass as one caffe2 net. :param trainer DQNTrainer :param state_normalization_parameters state NormalizationParameters :param int_features boolean indicating if int features blob will be present :param model_on_gpu boolean indicating if the model is a GPU model or CPU model """ input_dim = trainer.num_features q_network = (trainer.q_network.module if isinstance( trainer.q_network, DataParallel) else trainer.q_network) buffer = PytorchCaffe2Converter.pytorch_net_to_buffer( q_network, input_dim, model_on_gpu) qnet_input_blob, qnet_output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef( buffer) torch_workspace = caffe2_netdef.workspace parameters = torch_workspace.Blobs() for blob_str in parameters: workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str)) torch_init_net = core.Net(caffe2_netdef.init_net) torch_predict_net = core.Net(caffe2_netdef.predict_net) logger.info("Generated ONNX predict net:") logger.info(str(torch_predict_net.Proto())) # While converting to metanetdef, the external_input of predict_net # will be recomputed. Add the real output of init_net to parameters # to make sure they will be counted. parameters.extend( set(caffe2_netdef.init_net.external_output) - set(caffe2_netdef.init_net.external_input)) model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) workspace.FeedBlob("input/image", np.zeros([1, 1, 1, 1], dtype=np.int32)) workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32)) input_feature_lengths = "input_feature_lengths" input_feature_keys = "input_feature_keys" input_feature_values = "input_feature_values" if int_features: workspace.FeedBlob("input/int_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32)) C2.net().Cast( ["input/int_features.values"], ["input/int_features.values_float"], dtype=caffe2_pb2.TensorProto.FLOAT, ) C2.net().MergeMultiScalarFeatureTensors( [ "input/float_features.lengths", "input/float_features.keys", "input/float_features.values", "input/int_features.lengths", "input/int_features.keys", "input/int_features.values_float", ], [ input_feature_lengths, input_feature_keys, input_feature_values ], ) else: C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths]) C2.net().Copy(["input/float_features.keys"], [input_feature_keys]) C2.net().Copy(["input/float_features.values"], [input_feature_values]) if state_normalization_parameters is not None: sorted_feature_ids = sort_features_by_normalization( state_normalization_parameters)[0] dense_matrix, new_parameters = sparse_to_dense( input_feature_lengths, input_feature_keys, input_feature_values, sorted_feature_ids, set_missing_value_to_zero=set_missing_value_to_zero, ) parameters.extend(new_parameters) preprocessor_net = PreprocessorNet() state_normalized_dense_matrix, new_parameters = preprocessor_net.normalize_dense_matrix( dense_matrix, sorted_feature_ids, state_normalization_parameters, "state_norm_", True, ) parameters.extend(new_parameters) else: # Image input. Note: Currently this does the wrong thing if # more than one image is passed at a time. state_normalized_dense_matrix = "input/image" net.Copy([state_normalized_dense_matrix], [qnet_input_blob]) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(torch_init_net) net.AppendNet(torch_predict_net) new_parameters, q_values = RLPredictor._forward_pass( model, trainer, state_normalized_dense_matrix, actions, qnet_output_blob) parameters.extend(new_parameters) # Get 1 x n action index tensor under the max_q policy max_q_act_idxs = "max_q_policy_actions" C2.net().Flatten([C2.ArgMax(q_values)], [max_q_act_idxs], axis=0) shape_of_num_of_states = "num_states_shape" C2.net().FlattenToVec([max_q_act_idxs], [shape_of_num_of_states]) num_states, _ = C2.Reshape(C2.Size(shape_of_num_of_states), shape=[1]) # Get 1 x n action index tensor under the softmax policy temperature = C2.NextBlob("temperature") parameters.append(temperature) workspace.FeedBlob( temperature, np.array([trainer.rl_temperature], dtype=np.float32)) tempered_q_values = C2.Div(q_values, temperature, broadcast=1) softmax_values = C2.Softmax(tempered_q_values) softmax_act_idxs_nested = "softmax_act_idxs_nested" C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested]) softmax_act_idxs = "softmax_policy_actions" C2.net().Flatten([softmax_act_idxs_nested], [softmax_act_idxs], axis=0) action_names = C2.NextBlob("action_names") parameters.append(action_names) workspace.FeedBlob(action_names, np.array(actions)) # Concat action index tensors to get 2 x n tensor - [[max_q], [softmax]] # transpose & flatten to get [a1_maxq, a1_softmax, a2_maxq, a2_softmax, ...] max_q_act_blob = C2.Cast(max_q_act_idxs, to=caffe2_pb2.TensorProto.INT32) softmax_act_blob = C2.Cast(softmax_act_idxs, to=caffe2_pb2.TensorProto.INT32) C2.net().Append([max_q_act_blob, softmax_act_blob], [max_q_act_blob]) transposed_action_idxs = C2.Transpose(max_q_act_blob) flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs) workspace.FeedBlob(OUTPUT_SINGLE_CAT_VALS_NAME, np.zeros(1, dtype=np.int64)) C2.net().Gather([action_names, flat_transposed_action_idxs], [OUTPUT_SINGLE_CAT_VALS_NAME]) workspace.FeedBlob(OUTPUT_SINGLE_CAT_LENGTHS_NAME, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill( [shape_of_num_of_states], [OUTPUT_SINGLE_CAT_LENGTHS_NAME], value=2, dtype=caffe2_pb2.TensorProto.INT32, ) workspace.FeedBlob(OUTPUT_SINGLE_CAT_KEYS_NAME, np.zeros(1, dtype=np.int64)) output_keys_tensor, _ = C2.Concat( C2.ConstantFill(shape=[1, 1], value=0, dtype=caffe2_pb2.TensorProto.INT64), C2.ConstantFill(shape=[1, 1], value=1, dtype=caffe2_pb2.TensorProto.INT64), axis=0, ) output_key_tile = C2.Tile(output_keys_tensor, num_states, axis=0) C2.net().FlattenToVec([output_key_tile], [OUTPUT_SINGLE_CAT_KEYS_NAME]) workspace.CreateNet(net) return DQNPredictor(net, torch_init_net, parameters, int_features)
def export_actor( cls, trainer, state_normalization_parameters, action_feature_ids, min_action_range_tensor_serving, max_action_range_tensor_serving, int_features=False, model_on_gpu=False, ): """Export caffe2 preprocessor net and pytorch actor forward pass as one caffe2 net. :param trainer DDPGTrainer :param state_normalization_parameters state NormalizationParameters :param min_action_range_tensor_serving pytorch tensor that specifies min action value for each dimension :param max_action_range_tensor_serving pytorch tensor that specifies min action value for each dimension :param state_normalization_parameters state NormalizationParameters :param int_features boolean indicating if int features blob will be present :param model_on_gpu boolean indicating if the model is a GPU model or CPU model """ model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) parameters: List[str] = [] workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32)) input_feature_lengths = "input_feature_lengths" input_feature_keys = "input_feature_keys" input_feature_values = "input_feature_values" if int_features: workspace.FeedBlob( "input/int_features.lengths", np.zeros(1, dtype=np.int32) ) workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32)) C2.net().Cast( ["input/int_features.values"], ["input/int_features.values_float"], dtype=caffe2_pb2.TensorProto.FLOAT, ) C2.net().MergeMultiScalarFeatureTensors( [ "input/float_features.lengths", "input/float_features.keys", "input/float_features.values", "input/int_features.lengths", "input/int_features.keys", "input/int_features.values_float", ], [input_feature_lengths, input_feature_keys, input_feature_values], ) else: C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths]) C2.net().Copy(["input/float_features.keys"], [input_feature_keys]) C2.net().Copy(["input/float_features.values"], [input_feature_values]) preprocessor = PreprocessorNet() sorted_features, _ = sort_features_by_normalization( state_normalization_parameters ) state_dense_matrix, new_parameters = sparse_to_dense( input_feature_lengths, input_feature_keys, input_feature_values, sorted_features, ) parameters.extend(new_parameters) state_normalized_dense_matrix, new_parameters = preprocessor.normalize_dense_matrix( state_dense_matrix, sorted_features, state_normalization_parameters, "state_norm", False, ) parameters.extend(new_parameters) torch_init_net, torch_predict_net, new_parameters, actor_input_blob, actor_output_blob, min_action_training_blob, max_action_training_blob, min_action_serving_blob, max_action_serving_blob = DDPGPredictor.generate_train_net( trainer, model, min_action_range_tensor_serving, max_action_range_tensor_serving, model_on_gpu, ) parameters.extend(new_parameters) net.Copy([state_normalized_dense_matrix], [actor_input_blob]) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(torch_init_net) net.AppendNet(torch_predict_net) # Scale actors actions from [-1, 1] to serving range prev_range = C2.Sub(max_action_training_blob, min_action_training_blob) new_range = C2.Sub(max_action_serving_blob, min_action_serving_blob) subtract_prev_min = C2.Sub(actor_output_blob, min_action_training_blob) div_by_prev_range = C2.Div(subtract_prev_min, prev_range) scaled_for_serving_actions = C2.Add( C2.Mul(div_by_prev_range, new_range), min_action_serving_blob ) output_lengths = "output/float_features.lengths" workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill( [C2.FlattenToVec(C2.ArgMax(actor_output_blob))], [output_lengths], value=trainer.actor.layers[-1].out_features, dtype=caffe2_pb2.TensorProto.INT32, ) action_feature_ids_blob = C2.NextBlob("action_feature_ids") workspace.FeedBlob( action_feature_ids_blob, np.array(action_feature_ids, dtype=np.int64) ) parameters.append(action_feature_ids_blob) output_keys = "output/float_features.keys" workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64)) num_examples, _ = C2.Reshape(C2.Size("input/float_features.lengths"), shape=[1]) C2.net().Tile([action_feature_ids_blob, num_examples], [output_keys], axis=1) output_values = "output/float_features.values" workspace.FeedBlob(output_values, np.zeros(1, dtype=np.float32)) C2.net().FlattenToVec([scaled_for_serving_actions], [output_values]) workspace.CreateNet(net) return DDPGPredictor(net, torch_init_net, parameters, int_features)
def export( cls, trainer, state_normalization_parameters, action_normalization_parameters, int_features=False, ): """ Creates a ContinuousActionDQNPredictor from a ContinuousActionDQNTrainer. :param trainer ContinuousActionDQNTrainer :param state_normalization_parameters state NormalizationParameters :param action_normalization_parameters action NormalizationParameters :param int_features boolean indicating if int features blob will be present """ # ensure state and action IDs have no intersection assert ( len( set(state_normalization_parameters.keys()) & set(action_normalization_parameters.keys()) ) == 0 ) model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32)) input_feature_lengths = "input_feature_lengths" input_feature_keys = "input_feature_keys" input_feature_values = "input_feature_values" if int_features: workspace.FeedBlob( "input/int_features.lengths", np.zeros(1, dtype=np.int32) ) workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32)) C2.net().Cast( ["input/int_features.values"], ["input/int_features.values_float"], dtype=caffe2_pb2.TensorProto.FLOAT, ) C2.net().MergeMultiScalarFeatureTensors( [ "input/float_features.lengths", "input/float_features.keys", "input/float_features.values", "input/int_features.lengths", "input/int_features.keys", "input/int_features.values_float", ], [input_feature_lengths, input_feature_keys, input_feature_values], ) else: C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths]) C2.net().Copy(["input/float_features.keys"], [input_feature_keys]) C2.net().Copy(["input/float_features.values"], [input_feature_values]) parameters = [] state_normalized_dense_matrix, new_parameters = sparse_to_dense( input_feature_lengths, input_feature_keys, input_feature_values, state_normalization_parameters, None, ) parameters.extend(new_parameters) action_normalized_dense_matrix, new_parameters = sparse_to_dense( input_feature_lengths, input_feature_keys, input_feature_values, action_normalization_parameters, None, ) parameters.extend(new_parameters) state_action_normalized = "state_action_normalized" state_action_normalized_dim = "state_action_normalized_dim" net.Concat( [state_normalized_dense_matrix, action_normalized_dense_matrix], [state_action_normalized, state_action_normalized_dim], axis=1, ) new_parameters, q_values = RLPredictor._forward_pass( model, trainer, state_action_normalized, ["Q"] ) parameters.extend(new_parameters) flat_q_values_key = ( "output/string_weighted_multi_categorical_features.values.values" ) num_examples, _ = C2.Reshape(C2.Size(flat_q_values_key), shape=[1]) q_value_blob, _ = C2.Reshape(flat_q_values_key, shape=[1, -1]) # Get 1 x n (number of examples) action index tensor under the max_q policy max_q_act_idxs = "max_q_policy_actions" C2.net().FlattenToVec([C2.ArgMax(q_value_blob)], [max_q_act_idxs]) max_q_act_blob = C2.Tile(max_q_act_idxs, num_examples, axis=0) # Get 1 x n (number of examples) action index tensor under the softmax policy temperature = C2.NextBlob("temperature") parameters.append(temperature) workspace.FeedBlob( temperature, np.array([trainer.rl_temperature], dtype=np.float32) ) tempered_q_values = C2.Div(q_value_blob, temperature, broadcast=1) softmax_values = C2.Softmax(tempered_q_values) softmax_act_idxs_nested = "softmax_act_idxs_nested" C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested]) softmax_act_blob = C2.Tile( C2.FlattenToVec(softmax_act_idxs_nested), num_examples, axis=0 ) # Concat action idx vecs to get 2 x n tensor [[a_maxq, ..], [a_softmax, ..]] # transpose & flatten to get [a_maxq, a_softmax, a_maxq, a_softmax, ...] max_q_act_blob = C2.Cast(max_q_act_blob, to=caffe2_pb2.TensorProto.INT64) softmax_act_blob = C2.Cast(softmax_act_blob, to=caffe2_pb2.TensorProto.INT64) max_q_act_blob_nested, _ = C2.Reshape(max_q_act_blob, shape=[1, -1]) softmax_act_blob_nested, _ = C2.Reshape(softmax_act_blob, shape=[1, -1]) C2.net().Append( [max_q_act_blob_nested, softmax_act_blob_nested], [max_q_act_blob_nested] ) transposed_action_idxs = C2.Transpose(max_q_act_blob_nested) flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs) output_values = "output/int_single_categorical_features.values" workspace.FeedBlob(output_values, np.zeros(1, dtype=np.int64)) C2.net().Copy([flat_transposed_action_idxs], [output_values]) output_lengths = "output/int_single_categorical_features.lengths" workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill( [flat_q_values_key], [output_lengths], value=2, dtype=caffe2_pb2.TensorProto.INT32, ) output_keys = "output/int_single_categorical_features.keys" workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64)) output_keys_tensor, _ = C2.Concat( C2.ConstantFill(shape=[1, 1], value=0, dtype=caffe2_pb2.TensorProto.INT64), C2.ConstantFill(shape=[1, 1], value=1, dtype=caffe2_pb2.TensorProto.INT64), axis=0, ) output_key_tile = C2.Tile(output_keys_tensor, num_examples, axis=0) C2.net().FlattenToVec([output_key_tile], [output_keys]) workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(net) return ContinuousActionDQNPredictor(net, parameters, int_features)
def export_critic( cls, trainer, state_normalization_parameters, action_normalization_parameters, int_features=False, model_on_gpu=False, ): """Export caffe2 preprocessor net and pytorch critic forward pass as one caffe2 net. :param trainer DDPGTrainer :param state_normalization_parameters state NormalizationParameters :param action_normalization_parameters action NormalizationParameters :param int_features boolean indicating if int features blob will be present """ input_dim = trainer.state_dim + trainer.action_dim buffer = PytorchCaffe2Converter.pytorch_net_to_buffer( trainer.critic, input_dim, model_on_gpu ) critic_input_blob, critic_output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef( buffer ) torch_workspace = caffe2_netdef.workspace parameters = [] for blob_str in torch_workspace.Blobs(): workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str)) parameters.append(blob_str) torch_init_net = core.Net(caffe2_netdef.init_net) torch_predict_net = core.Net(caffe2_netdef.predict_net) model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32)) input_feature_lengths = "input_feature_lengths" input_feature_keys = "input_feature_keys" input_feature_values = "input_feature_values" if int_features: workspace.FeedBlob( "input/int_features.lengths", np.zeros(1, dtype=np.int32) ) workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32)) C2.net().Cast( ["input/int_features.values"], ["input/int_features.values_float"], dtype=caffe2_pb2.TensorProto.FLOAT, ) C2.net().MergeMultiScalarFeatureTensors( [ "input/float_features.lengths", "input/float_features.keys", "input/float_features.values", "input/int_features.lengths", "input/int_features.keys", "input/int_features.values_float", ], [input_feature_lengths, input_feature_keys, input_feature_values], ) else: C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths]) C2.net().Copy(["input/float_features.keys"], [input_feature_keys]) C2.net().Copy(["input/float_features.values"], [input_feature_values]) preprocessor = PreprocessorNet(True) state_normalized_dense_matrix, new_parameters = preprocessor.normalize_sparse_matrix( input_feature_lengths, input_feature_keys, input_feature_values, state_normalization_parameters, "state_norm", False, False, ) parameters.extend(new_parameters) # Don't normalize actions, just go from sparse -> dense action_dense_matrix, new_parameters = preprocessor.normalize_sparse_matrix( input_feature_lengths, input_feature_keys, input_feature_values, action_normalization_parameters, "action_norm", False, False, normalize=False, ) parameters.extend(new_parameters) state_action_normalized = "state_action_normalized" state_action_normalized_dim = "state_action_normalized_dim" net.Concat( [state_normalized_dense_matrix, action_dense_matrix], [state_action_normalized, state_action_normalized_dim], axis=1, ) net.Copy([state_action_normalized], [critic_input_blob]) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(torch_init_net) net.AppendNet(torch_init_net) net.AppendNet(torch_predict_net) C2.FlattenToVec(C2.ArgMax(critic_output_blob)) output_lengths = "output/float_features.lengths" workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill( [C2.FlattenToVec(C2.ArgMax(critic_output_blob))], [output_lengths], value=trainer.critic.layers[-1].out_features, dtype=caffe2_pb2.TensorProto.INT32, ) output_keys = "output/float_features.keys" workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int32)) C2.net().LengthsRangeFill([output_lengths], [output_keys]) output_values = "output/float_features.values" workspace.FeedBlob(output_values, np.zeros(1, dtype=np.float32)) C2.net().FlattenToVec([critic_output_blob], [output_values]) workspace.CreateNet(net) return DDPGPredictor(net, parameters, int_features)
def export_actor( cls, trainer, state_normalization_parameters, min_action_range_tensor_serving, max_action_range_tensor_serving, int_features=False, model_on_gpu=False, ): """Export caffe2 preprocessor net and pytorch actor forward pass as one caffe2 net. :param trainer DDPGTrainer :param state_normalization_parameters state NormalizationParameters :param min_action_range_tensor_serving pytorch tensor that specifies min action value for each dimension :param max_action_range_tensor_serving pytorch tensor that specifies min action value for each dimension :param state_normalization_parameters state NormalizationParameters :param int_features boolean indicating if int features blob will be present :param model_on_gpu boolean indicating if the model is a GPU model or CPU model """ input_dim = trainer.state_dim buffer = PytorchCaffe2Converter.pytorch_net_to_buffer( trainer.actor, input_dim, model_on_gpu ) actor_input_blob, actor_output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef( buffer ) torch_workspace = caffe2_netdef.workspace parameters = torch_workspace.Blobs() for blob_str in parameters: workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str)) torch_init_net = core.Net(caffe2_netdef.init_net) torch_predict_net = core.Net(caffe2_netdef.predict_net) model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) # Feed action scaling tensors for serving min_action_serving_blob = C2.NextBlob("min_action_range_tensor_serving") workspace.FeedBlob( min_action_serving_blob, min_action_range_tensor_serving.cpu().data.numpy() ) parameters.append(str(min_action_serving_blob)) max_action_serving_blob = C2.NextBlob("max_action_range_tensor_serving") workspace.FeedBlob( max_action_serving_blob, max_action_range_tensor_serving.cpu().data.numpy() ) parameters.append(str(max_action_serving_blob)) # Feed action scaling tensors for training [-1, 1] due to tanh actor min_vals_training = trainer.min_action_range_tensor_training.cpu().data.numpy() min_action_training_blob = C2.NextBlob("min_action_range_tensor_training") workspace.FeedBlob(min_action_training_blob, min_vals_training) parameters.append(str(min_action_training_blob)) max_vals_training = trainer.max_action_range_tensor_training.cpu().data.numpy() max_action_training_blob = C2.NextBlob("max_action_range_tensor_training") workspace.FeedBlob(max_action_training_blob, max_vals_training) parameters.append(str(max_action_training_blob)) workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32)) input_feature_lengths = "input_feature_lengths" input_feature_keys = "input_feature_keys" input_feature_values = "input_feature_values" if int_features: workspace.FeedBlob( "input/int_features.lengths", np.zeros(1, dtype=np.int32) ) workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32)) C2.net().Cast( ["input/int_features.values"], ["input/int_features.values_float"], dtype=caffe2_pb2.TensorProto.FLOAT, ) C2.net().MergeMultiScalarFeatureTensors( [ "input/float_features.lengths", "input/float_features.keys", "input/float_features.values", "input/int_features.lengths", "input/int_features.keys", "input/int_features.values_float", ], [input_feature_lengths, input_feature_keys, input_feature_values], ) else: C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths]) C2.net().Copy(["input/float_features.keys"], [input_feature_keys]) C2.net().Copy(["input/float_features.values"], [input_feature_values]) preprocessor = PreprocessorNet(True) state_normalized_dense_matrix, new_parameters = preprocessor.normalize_sparse_matrix( input_feature_lengths, input_feature_keys, input_feature_values, state_normalization_parameters, "state_norm", False, False, ) parameters.extend(new_parameters) net.Copy([state_normalized_dense_matrix], [actor_input_blob]) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(torch_init_net) net.AppendNet(torch_predict_net) C2.FlattenToVec(C2.ArgMax(actor_output_blob)) output_lengths = "output/float_features.lengths" workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill( [C2.FlattenToVec(C2.ArgMax(actor_output_blob))], [output_lengths], value=trainer.actor.layers[-1].out_features, dtype=caffe2_pb2.TensorProto.INT32, ) output_keys = "output/float_features.keys" workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int32)) C2.net().LengthsRangeFill([output_lengths], [output_keys]) output_values = "output/float_features.values" workspace.FeedBlob(output_values, np.zeros(1, dtype=np.float32)) # Scale actors actions from [-1, 1] to serving range prev_range = C2.Sub(max_action_training_blob, min_action_training_blob) new_range = C2.Sub(max_action_serving_blob, min_action_serving_blob) subtract_prev_min = C2.Sub(actor_output_blob, min_action_training_blob) div_by_prev_range = C2.Div(subtract_prev_min, prev_range) scaled_for_serving_actions = C2.Add( C2.Mul(div_by_prev_range, new_range), min_action_serving_blob ) C2.net().FlattenToVec([scaled_for_serving_actions], [output_values]) workspace.CreateNet(net) return DDPGPredictor(net, parameters, int_features)
def normalize_dense_matrix( self, input_matrix: str, features: List[int], normalization_parameters: Dict[int, NormalizationParameters], blobname_prefix: str, split_expensive_feature_groups: bool, ) -> Tuple[str, List[str]]: """ Normalizes inputs according to parameters. Expects a dense matrix whose ith column corresponds to feature i. Note that the Caffe2 BatchBoxCox operator isn't implemented on CUDA GPU so we need to use a CPU context. :param input_matrix: Input matrix to normalize. :param features: Array that maps feature ids to column indices. :param normalization_parameters: Mapping from feature names to NormalizationParameters. :param blobname_prefix: Prefix for input blobs to norm_net. :param num_output_features: The number of features in an output processed datapoint. If set to None, this function will compute it. """ with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)): feature_starts = self._get_type_boundaries( features, normalization_parameters ) normalized_input_blobs = [] parameters: List[str] = [] for i, feature_type in enumerate(FEATURE_TYPES): start_index = feature_starts[i] if (i + 1) == len(FEATURE_TYPES): end_index = len(normalization_parameters) else: end_index = feature_starts[i + 1] if start_index == end_index: continue # No features of this type slices = [] split_feature_group, split_intervals = self._should_split_feature_group( split_expensive_feature_groups, start_index, end_index, feature_type ) if split_feature_group: for j in range(len(split_intervals) - 1): slice_blob = self._get_input_blob_indexed( blobname_prefix, feature_type, j ) C2.net().Slice( [input_matrix], [slice_blob], starts=[0, split_intervals[j]], ends=[-1, split_intervals[j + 1]], ) slices.append( (slice_blob, split_intervals[j], split_intervals[j + 1]) ) else: sliced_input_features = self._get_input_blob( blobname_prefix, feature_type ) C2.net().Slice( [input_matrix], [sliced_input_features], starts=[0, start_index], ends=[-1, end_index], ) slices.append((sliced_input_features, start_index, end_index)) for (slice_blob, start, end) in slices: normalized_input_blob, blob_parameters = self.preprocess_blob( slice_blob, [normalization_parameters[x] for x in features[start:end]], ) logger.info( "Processed split ({}, {}) for feature type {}".format( start, end, feature_type ) ) parameters.extend(blob_parameters) normalized_input_blobs.append(normalized_input_blob) for i, inp in enumerate(normalized_input_blobs): logger.info("input# {}: {}".format(i, inp)) concatenated_input_blob, concatenated_input_blob_dim = C2.Concat( *normalized_input_blobs, axis=1 ) return concatenated_input_blob, parameters
def export( cls, trainer, actions, state_normalization_parameters, int_features=False, model_on_gpu=False, set_missing_value_to_zero=False, ): """Export caffe2 preprocessor net and pytorch DQN forward pass as one caffe2 net. :param trainer DQNTrainer :param state_normalization_parameters state NormalizationParameters :param int_features boolean indicating if int features blob will be present :param model_on_gpu boolean indicating if the model is a GPU model or CPU model """ input_dim = trainer.num_features q_network = ( trainer.q_network.module if isinstance(trainer.q_network, DataParallel) else trainer.q_network ) buffer = PytorchCaffe2Converter.pytorch_net_to_buffer( q_network, input_dim, model_on_gpu ) qnet_input_blob, qnet_output_blob, caffe2_netdef = PytorchCaffe2Converter.buffer_to_caffe2_netdef( buffer ) torch_workspace = caffe2_netdef.workspace parameters = torch_workspace.Blobs() for blob_str in parameters: workspace.FeedBlob(blob_str, torch_workspace.FetchBlob(blob_str)) torch_init_net = core.Net(caffe2_netdef.init_net) torch_predict_net = core.Net(caffe2_netdef.predict_net) logger.info("Generated ONNX predict net:") logger.info(str(torch_predict_net.Proto())) # While converting to metanetdef, the external_input of predict_net # will be recomputed. Add the real output of init_net to parameters # to make sure they will be counted. parameters.extend( set(caffe2_netdef.init_net.external_output) - set(caffe2_netdef.init_net.external_input) ) model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) workspace.FeedBlob("input/image", np.zeros([1, 1, 1, 1], dtype=np.int32)) workspace.FeedBlob("input/float_features.lengths", np.zeros(1, dtype=np.int32)) workspace.FeedBlob("input/float_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/float_features.values", np.zeros(1, dtype=np.float32)) input_feature_lengths = "input_feature_lengths" input_feature_keys = "input_feature_keys" input_feature_values = "input_feature_values" if int_features: workspace.FeedBlob( "input/int_features.lengths", np.zeros(1, dtype=np.int32) ) workspace.FeedBlob("input/int_features.keys", np.zeros(1, dtype=np.int64)) workspace.FeedBlob("input/int_features.values", np.zeros(1, dtype=np.int32)) C2.net().Cast( ["input/int_features.values"], ["input/int_features.values_float"], dtype=caffe2_pb2.TensorProto.FLOAT, ) C2.net().MergeMultiScalarFeatureTensors( [ "input/float_features.lengths", "input/float_features.keys", "input/float_features.values", "input/int_features.lengths", "input/int_features.keys", "input/int_features.values_float", ], [input_feature_lengths, input_feature_keys, input_feature_values], ) else: C2.net().Copy(["input/float_features.lengths"], [input_feature_lengths]) C2.net().Copy(["input/float_features.keys"], [input_feature_keys]) C2.net().Copy(["input/float_features.values"], [input_feature_values]) if state_normalization_parameters is not None: sorted_feature_ids = sort_features_by_normalization( state_normalization_parameters )[0] dense_matrix, new_parameters = sparse_to_dense( input_feature_lengths, input_feature_keys, input_feature_values, sorted_feature_ids, set_missing_value_to_zero=set_missing_value_to_zero, ) parameters.extend(new_parameters) preprocessor_net = PreprocessorNet() state_normalized_dense_matrix, new_parameters = preprocessor_net.normalize_dense_matrix( dense_matrix, sorted_feature_ids, state_normalization_parameters, "state_norm_", True, ) parameters.extend(new_parameters) else: # Image input. Note: Currently this does the wrong thing if # more than one image is passed at a time. state_normalized_dense_matrix = "input/image" net.Copy([state_normalized_dense_matrix], [qnet_input_blob]) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(torch_init_net) net.AppendNet(torch_predict_net) new_parameters, q_values = RLPredictor._forward_pass( model, trainer, state_normalized_dense_matrix, actions, qnet_output_blob ) parameters.extend(new_parameters) # Get 1 x n action index tensor under the max_q policy max_q_act_idxs = "max_q_policy_actions" C2.net().Flatten([C2.ArgMax(q_values)], [max_q_act_idxs], axis=0) shape_of_num_of_states = "num_states_shape" C2.net().FlattenToVec([max_q_act_idxs], [shape_of_num_of_states]) num_states, _ = C2.Reshape(C2.Size(shape_of_num_of_states), shape=[1]) # Get 1 x n action index tensor under the softmax policy temperature = C2.NextBlob("temperature") parameters.append(temperature) workspace.FeedBlob( temperature, np.array([trainer.rl_temperature], dtype=np.float32) ) tempered_q_values = C2.Div(q_values, temperature, broadcast=1) softmax_values = C2.Softmax(tempered_q_values) softmax_act_idxs_nested = "softmax_act_idxs_nested" C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested]) softmax_act_idxs = "softmax_policy_actions" C2.net().Flatten([softmax_act_idxs_nested], [softmax_act_idxs], axis=0) action_names = C2.NextBlob("action_names") parameters.append(action_names) workspace.FeedBlob(action_names, np.array(actions)) # Concat action index tensors to get 2 x n tensor - [[max_q], [softmax]] # transpose & flatten to get [a1_maxq, a1_softmax, a2_maxq, a2_softmax, ...] max_q_act_blob = C2.Cast(max_q_act_idxs, to=caffe2_pb2.TensorProto.INT32) softmax_act_blob = C2.Cast(softmax_act_idxs, to=caffe2_pb2.TensorProto.INT32) C2.net().Append([max_q_act_blob, softmax_act_blob], [max_q_act_blob]) transposed_action_idxs = C2.Transpose(max_q_act_blob) flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs) workspace.FeedBlob(OUTPUT_SINGLE_CAT_VALS_NAME, np.zeros(1, dtype=np.int64)) C2.net().Gather( [action_names, flat_transposed_action_idxs], [OUTPUT_SINGLE_CAT_VALS_NAME] ) workspace.FeedBlob(OUTPUT_SINGLE_CAT_LENGTHS_NAME, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill( [shape_of_num_of_states], [OUTPUT_SINGLE_CAT_LENGTHS_NAME], value=2, dtype=caffe2_pb2.TensorProto.INT32, ) workspace.FeedBlob(OUTPUT_SINGLE_CAT_KEYS_NAME, np.zeros(1, dtype=np.int64)) output_keys_tensor, _ = C2.Concat( C2.ConstantFill(shape=[1, 1], value=0, dtype=caffe2_pb2.TensorProto.INT64), C2.ConstantFill(shape=[1, 1], value=1, dtype=caffe2_pb2.TensorProto.INT64), axis=0, ) output_key_tile = C2.Tile(output_keys_tensor, num_states, axis=0) C2.net().FlattenToVec([output_key_tile], [OUTPUT_SINGLE_CAT_KEYS_NAME]) workspace.CreateNet(net) return DQNPredictor(net, torch_init_net, parameters, int_features)
def preprocess_blob(self, blob, normalization_parameters): """ Takes in a blob and its normalization parameters. Outputs a tuple whose first element is a blob containing the normalized input blob and whose second element contains all the parameter blobs used to create it. Call this from a CPU context and ensure the input blob exists in it. """ parameters: List[str] = [] ZERO = self._store_parameter(parameters, "ZERO", np.array([0], dtype=np.float32)) MISSING_U = self._store_parameter( parameters, "MISSING_U", np.array([MISSING_VALUE + 1e-4], dtype=np.float32)) MISSING_L = self._store_parameter( parameters, "MISSING_L", np.array([MISSING_VALUE - 1e-4], dtype=np.float32)) is_empty_l = C2.GT(blob, MISSING_L, broadcast=1) is_empty_u = C2.LT(blob, MISSING_U, broadcast=1) is_empty = C2.And(is_empty_l, is_empty_u) for i in range(len(normalization_parameters) - 1): if (normalization_parameters[i].feature_type != normalization_parameters[i + 1].feature_type): raise Exception( "Only one feature type is allowed per call to preprocess_blob!" ) feature_type = normalization_parameters[0].feature_type if feature_type == identify_types.BINARY: TOLERANCE = self._store_parameter(parameters, "TOLERANCE", np.array(1e-3, dtype=np.float32)) is_gt_zero = C2.GT(blob, C2.Add(ZERO, TOLERANCE, broadcast=1), broadcast=1) is_lt_zero = C2.LT(blob, C2.Sub(ZERO, TOLERANCE, broadcast=1), broadcast=1) bool_blob = C2.Or(is_gt_zero, is_lt_zero) blob = C2.Cast(bool_blob, to=caffe2_pb2.TensorProto.FLOAT) elif feature_type == identify_types.PROBABILITY: blob = C2.Logit(C2.Clip(blob, min=0.01, max=0.99)) elif feature_type == identify_types.ENUM: for parameter in normalization_parameters: possible_values = parameter.possible_values for x in possible_values: if x < 0: logger.fatal( "Invalid enum possible value for feature: " + str(x) + " " + str(parameter.possible_values)) raise Exception( "Invalid enum possible value for feature " + blob + ": " + str(x) + " " + str(parameter.possible_values)) int_blob = C2.Cast(blob, to=core.DataType.INT32) # Batch one hot transform with MISSING_VALUE as a possible value feature_lengths = [ len(p.possible_values) + 1 for p in normalization_parameters ] feature_lengths_blob = self._store_parameter( parameters, "feature_lengths_blob", np.array(feature_lengths, dtype=np.int32), ) feature_values = [ x for p in normalization_parameters for x in p.possible_values + [int(MISSING_VALUE)] ] feature_values_blob = self._store_parameter( parameters, "feature_values_blob", np.array(feature_values, dtype=np.int32), ) one_hot_output = C2.BatchOneHot(int_blob, feature_lengths_blob, feature_values_blob) flattened_one_hot = C2.FlattenToVec(one_hot_output) # Remove missing values with a mask cols_to_include = [[1] * len(p.possible_values) + [0] for p in normalization_parameters] cols_to_include = [x for col in cols_to_include for x in col] mask = self._store_parameter( parameters, "mask", np.array(cols_to_include, dtype=np.int32)) zero_vec = C2.ConstantFill(one_hot_output, value=0, dtype=caffe2_pb2.TensorProto.INT32) repeated_mask_bool = C2.Cast(C2.Add(zero_vec, mask, broadcast=1), to=core.DataType.BOOL) flattened_repeated_mask = C2.FlattenToVec(repeated_mask_bool) flattened_one_hot_proc = C2.NextBlob("flattened_one_hot_proc") flattened_one_hot_proc_indices = C2.NextBlob( "flattened_one_hot_proc_indices") C2.net().BooleanMask( [flattened_one_hot, flattened_repeated_mask], [flattened_one_hot_proc, flattened_one_hot_proc_indices], ) one_hot_shape = C2.Shape(one_hot_output) shape_delta = self._store_parameter( parameters, "shape_delta", np.array([0, len(normalization_parameters)], dtype=np.int64), ) target_shape = C2.Sub(one_hot_shape, shape_delta, broadcast=1) output_int_blob = C2.NextBlob("output_int_blob") output_int_blob_old_shape = C2.NextBlob( "output_int_blob_old_shape") C2.net().Reshape( [flattened_one_hot_proc, target_shape], [output_int_blob, output_int_blob_old_shape], ) output_blob = C2.Cast(output_int_blob, to=core.DataType.FLOAT) return output_blob, parameters elif feature_type == identify_types.QUANTILE: # This transformation replaces a set of values with their quantile. # The quantile boundaries are provided in the normalization params. quantile_sizes = [ len(norm.quantiles) for norm in normalization_parameters ] num_boundaries_blob = self._store_parameter( parameters, "num_boundaries_blob", np.array(quantile_sizes, dtype=np.int32), ) quantile_values = np.array([], dtype=np.float32) quantile_labels = np.array([], dtype=np.float32) for norm in normalization_parameters: quantile_values = np.append( quantile_values, np.array(norm.quantiles, dtype=np.float32)) # TODO: Fix this: the np.unique is making this part not true. quantile_labels = np.append( quantile_labels, np.arange(len(norm.quantiles), dtype=np.float32) / float(len(norm.quantiles)), ) quantiles = np.vstack([quantile_values, quantile_labels]).T quantiles_blob = self._store_parameter(parameters, "quantiles_blob", quantiles) quantile_blob = C2.Percentile(blob, quantiles_blob, num_boundaries_blob) blob = quantile_blob elif (feature_type == identify_types.CONTINUOUS or feature_type == identify_types.BOXCOX): boxcox_shifts = [] boxcox_lambdas = [] means = [] stddevs = [] for norm in normalization_parameters: if feature_type == identify_types.BOXCOX: assert (norm.boxcox_shift is not None and norm.boxcox_lambda is not None) boxcox_shifts.append(norm.boxcox_shift) boxcox_lambdas.append(norm.boxcox_lambda) means.append(norm.mean) stddevs.append(norm.stddev) if feature_type == identify_types.BOXCOX: boxcox_shift_blob = self._store_parameter( parameters, "boxcox_shift", np.array(boxcox_shifts, dtype=np.float32), ) boxcox_lambda_blob = self._store_parameter( parameters, "boxcox_shift", np.array(boxcox_lambdas, dtype=np.float32), ) blob = C2.BatchBoxCox(blob, boxcox_lambda_blob, boxcox_shift_blob) means_blob = self._store_parameter( parameters, "means_blob", np.array([means], dtype=np.float32)) stddevs_blob = self._store_parameter( parameters, "stddevs_blob", np.array([stddevs], dtype=np.float32)) blob = C2.Sub(blob, means_blob, broadcast=1, axis=0) blob = C2.Div(blob, stddevs_blob, broadcast=1, axis=0) if self.clip_anomalies: blob = C2.Clip(blob, min=-3.0, max=3.0) else: raise NotImplementedError( "Invalid feature type: {}".format(feature_type)) zeros = C2.ConstantFill(blob, value=0.) output_blob = C2.Where(is_empty, zeros, blob) return output_blob, parameters
def export(cls, trainer, actions, state_normalization_parameters, int_features=False): """ Creates a DiscreteActionPredictor from a DiscreteActionTrainer. :param trainer DiscreteActionTrainer :param actions list of action names :param state_normalization_parameters state NormalizationParameters :param int_features boolean indicating if int features blob will be present """ model = model_helper.ModelHelper(name="predictor") net = model.net C2.set_model(model) workspace.FeedBlob('input/image', np.zeros([1, 1, 1, 1], dtype=np.int32)) workspace.FeedBlob('input/float_features.lengths', np.zeros(1, dtype=np.int32)) workspace.FeedBlob('input/float_features.keys', np.zeros(1, dtype=np.int64)) workspace.FeedBlob('input/float_features.values', np.zeros(1, dtype=np.float32)) input_feature_lengths = 'input_feature_lengths' input_feature_keys = 'input_feature_keys' input_feature_values = 'input_feature_values' if int_features: workspace.FeedBlob('input/int_features.lengths', np.zeros(1, dtype=np.int32)) workspace.FeedBlob('input/int_features.keys', np.zeros(1, dtype=np.int64)) workspace.FeedBlob('input/int_features.values', np.zeros(1, dtype=np.int32)) C2.net().Cast(['input/int_features.values'], ['input/int_features.values_float'], dtype=caffe2_pb2.TensorProto.FLOAT) C2.net().MergeMultiScalarFeatureTensors([ 'input/float_features.lengths', 'input/float_features.keys', 'input/float_features.values', 'input/int_features.lengths', 'input/int_features.keys', 'input/int_features.values_float' ], [ input_feature_lengths, input_feature_keys, input_feature_values ]) else: C2.net().Copy(['input/float_features.lengths'], [input_feature_lengths]) C2.net().Copy(['input/float_features.keys'], [input_feature_keys]) C2.net().Copy(['input/float_features.values'], [input_feature_values]) parameters = [] if state_normalization_parameters is not None: preprocessor = PreprocessorNet(net, True) parameters.extend(preprocessor.parameters) normalized_dense_matrix, new_parameters = \ preprocessor.normalize_sparse_matrix( input_feature_lengths, input_feature_keys, input_feature_values, state_normalization_parameters, 'state_norm', ) parameters.extend(new_parameters) else: # Image input. Note: Currently this does the wrong thing if # more than one image is passed at a time. normalized_dense_matrix = 'input/image' new_parameters, q_values = RLPredictor._forward_pass( model, trainer, normalized_dense_matrix, actions, ) parameters.extend(new_parameters) # Get 1 x n action index tensor under the max_q policy max_q_act_idxs = 'max_q_policy_actions' C2.net().Flatten([C2.ArgMax(q_values)], [max_q_act_idxs], axis=0) shape_of_num_of_states = 'num_states_shape' C2.net().FlattenToVec([max_q_act_idxs], [shape_of_num_of_states]) num_states, _ = C2.Reshape(C2.Size(shape_of_num_of_states), shape=[1]) # Get 1 x n action index tensor under the softmax policy temperature = C2.NextBlob("temperature") parameters.append(temperature) workspace.FeedBlob( temperature, np.array([trainer.rl_temperature], dtype=np.float32)) tempered_q_values = C2.Div(q_values, "temperature", broadcast=1) softmax_values = C2.Softmax(tempered_q_values) softmax_act_idxs_nested = 'softmax_act_idxs_nested' C2.net().WeightedSample([softmax_values], [softmax_act_idxs_nested]) softmax_act_idxs = 'softmax_policy_actions' C2.net().Flatten([softmax_act_idxs_nested], [softmax_act_idxs], axis=0) # Concat action index tensors to get 2 x n tensor - [[max_q], [softmax]] # transpose & flatten to get [a1_maxq, a1_softmax, a2_maxq, a2_softmax, ...] max_q_act_blob = C2.Cast(max_q_act_idxs, to=caffe2_pb2.TensorProto.INT32) softmax_act_blob = C2.Cast(softmax_act_idxs, to=caffe2_pb2.TensorProto.INT32) C2.net().Append([max_q_act_blob, softmax_act_blob], [max_q_act_blob]) transposed_action_idxs = C2.Transpose(max_q_act_blob) flat_transposed_action_idxs = C2.FlattenToVec(transposed_action_idxs) output_values = 'output/string_single_categorical_features.values' workspace.FeedBlob(output_values, np.zeros(1, dtype=np.int64)) C2.net().Gather(["action_names", flat_transposed_action_idxs], [output_values]) output_lengths = 'output/string_single_categorical_features.lengths' workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().ConstantFill([shape_of_num_of_states], [output_lengths], value=2, dtype=caffe2_pb2.TensorProto.INT32) output_keys = 'output/string_single_categorical_features.keys' workspace.FeedBlob(output_keys, np.zeros(1, dtype=np.int64)) output_keys_tensor, _ = C2.Concat( C2.ConstantFill(shape=[1, 1], value=0, dtype=caffe2_pb2.TensorProto.INT64), C2.ConstantFill(shape=[1, 1], value=1, dtype=caffe2_pb2.TensorProto.INT64), axis=0, ) output_key_tile = C2.Tile(output_keys_tensor, num_states, axis=0) C2.net().FlattenToVec([output_key_tile], [output_keys]) workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(net) return DiscreteActionPredictor(net, parameters, int_features)
def preprocess_blob(self, blob, normalization_parameters): """ Takes in a blob and its normalization parameters. Outputs a tuple whose first element is a blob containing the normalized input blob and whose second element contains all the parameter blobs used to create it. Call this from a CPU context and ensure the input blob exists in it. """ parameters: List[str] = [] MISSING_U = self._store_parameter( parameters, "MISSING_U", np.array([MISSING_VALUE + 1e-4], dtype=np.float32) ) MISSING_L = self._store_parameter( parameters, "MISSING_L", np.array([MISSING_VALUE - 1e-4], dtype=np.float32) ) is_empty_l = C2.GT(blob, MISSING_L, broadcast=1) is_empty_u = C2.LT(blob, MISSING_U, broadcast=1) is_empty = C2.And(is_empty_l, is_empty_u) for i in range(len(normalization_parameters) - 1): if ( normalization_parameters[i].feature_type != normalization_parameters[i + 1].feature_type ): raise Exception( "Only one feature type is allowed per call to preprocess_blob!" ) feature_type = normalization_parameters[0].feature_type if feature_type == identify_types.BINARY: TOLERANCE = self._store_parameter( parameters, "TOLERANCE", np.array(1e-3, dtype=np.float32) ) ZERO = self._store_parameter( parameters, "ZERO", np.array([0], dtype=np.float32) ) is_gt_zero = C2.GT(blob, C2.Add(ZERO, TOLERANCE, broadcast=1), broadcast=1) is_lt_zero = C2.LT(blob, C2.Sub(ZERO, TOLERANCE, broadcast=1), broadcast=1) bool_blob = C2.Or(is_gt_zero, is_lt_zero) blob = C2.Cast(bool_blob, to=caffe2_pb2.TensorProto.FLOAT) elif feature_type == identify_types.PROBABILITY: ONE = self._store_parameter( parameters, "ONE", np.array([1], dtype=np.float32) ) NEGATIVE_ONE = self._store_parameter( parameters, "NEGATIVE_ONE", np.array([-1], dtype=np.float32) ) clipped = C2.Clip(blob, min=0.01, max=0.99) blob = C2.Mul( C2.Log(C2.Sub(C2.Pow(clipped, exponent=-1.0), ONE, broadcast=1)), NEGATIVE_ONE, broadcast=1, ) elif feature_type == identify_types.ENUM: for parameter in normalization_parameters: possible_values = parameter.possible_values for x in possible_values: if x < 0: logger.fatal( "Invalid enum possible value for feature: " + str(x) + " " + str(parameter.possible_values) ) raise Exception( "Invalid enum possible value for feature " + blob + ": " + str(x) + " " + str(parameter.possible_values) ) int_blob = C2.Cast(blob, to=core.DataType.INT32) # Batch one hot transform with MISSING_VALUE as a possible value feature_lengths = [ len(p.possible_values) + 1 for p in normalization_parameters ] feature_lengths_blob = self._store_parameter( parameters, "feature_lengths_blob", np.array(feature_lengths, dtype=np.int32), ) feature_values = [ x for p in normalization_parameters for x in p.possible_values + [int(MISSING_VALUE)] ] feature_values_blob = self._store_parameter( parameters, "feature_values_blob", np.array(feature_values, dtype=np.int32), ) one_hot_output = C2.BatchOneHot( int_blob, feature_lengths_blob, feature_values_blob ) flattened_one_hot = C2.FlattenToVec(one_hot_output) # Remove missing values with a mask cols_to_include = [ [1] * len(p.possible_values) + [0] for p in normalization_parameters ] cols_to_include = [x for col in cols_to_include for x in col] mask = self._store_parameter( parameters, "mask", np.array(cols_to_include, dtype=np.int32) ) zero_vec = C2.ConstantFill( one_hot_output, value=0, dtype=caffe2_pb2.TensorProto.INT32 ) repeated_mask_bool = C2.Cast( C2.Add(zero_vec, mask, broadcast=1), to=core.DataType.BOOL ) flattened_repeated_mask = C2.FlattenToVec(repeated_mask_bool) flattened_one_hot_proc = C2.NextBlob("flattened_one_hot_proc") flattened_one_hot_proc_indices = C2.NextBlob( "flattened_one_hot_proc_indices" ) C2.net().BooleanMask( [flattened_one_hot, flattened_repeated_mask], [flattened_one_hot_proc, flattened_one_hot_proc_indices], ) one_hot_shape = C2.Shape(one_hot_output) shape_delta = self._store_parameter( parameters, "shape_delta", np.array([0, len(normalization_parameters)], dtype=np.int64), ) target_shape = C2.Sub(one_hot_shape, shape_delta, broadcast=1) output_int_blob = C2.NextBlob("output_int_blob") output_int_blob_old_shape = C2.NextBlob("output_int_blob_old_shape") C2.net().Reshape( [flattened_one_hot_proc, target_shape], [output_int_blob, output_int_blob_old_shape], ) output_blob = C2.Cast(output_int_blob, to=core.DataType.FLOAT) return output_blob, parameters elif feature_type == identify_types.QUANTILE: # This transformation replaces a set of values with their quantile. # The quantile boundaries are provided in the normalization params. quantile_sizes = [len(norm.quantiles) for norm in normalization_parameters] num_boundaries_blob = self._store_parameter( parameters, "num_boundaries_blob", np.array(quantile_sizes, dtype=np.int32), ) quantile_values = np.array([], dtype=np.float32) quantile_labels = np.array([], dtype=np.float32) for norm in normalization_parameters: quantile_values = np.append( quantile_values, np.array(norm.quantiles, dtype=np.float32) ) quantile_labels = np.append( quantile_labels, np.arange(len(norm.quantiles), dtype=np.float32) / float(len(norm.quantiles) - 1.0), ) quantiles = np.vstack([quantile_values, quantile_labels]).T quantiles_blob = self._store_parameter( parameters, "quantiles_blob", quantiles ) quantile_blob = C2.Percentile(blob, quantiles_blob, num_boundaries_blob) blob = quantile_blob elif ( feature_type == identify_types.CONTINUOUS or feature_type == identify_types.BOXCOX ): boxcox_shifts = [] boxcox_lambdas = [] means = [] stddevs = [] for norm in normalization_parameters: if feature_type == identify_types.BOXCOX: assert ( norm.boxcox_shift is not None and norm.boxcox_lambda is not None ) boxcox_shifts.append(norm.boxcox_shift) boxcox_lambdas.append(norm.boxcox_lambda) means.append(norm.mean) stddevs.append(norm.stddev) if feature_type == identify_types.BOXCOX: boxcox_shift_blob = self._store_parameter( parameters, "boxcox_shift", np.array(boxcox_shifts, dtype=np.float32), ) boxcox_lambda_blob = self._store_parameter( parameters, "boxcox_shift", np.array(boxcox_lambdas, dtype=np.float32), ) blob = C2.BatchBoxCox(blob, boxcox_lambda_blob, boxcox_shift_blob) means_blob = self._store_parameter( parameters, "means_blob", np.array([means], dtype=np.float32) ) stddevs_blob = self._store_parameter( parameters, "stddevs_blob", np.array([stddevs], dtype=np.float32) ) blob = C2.Sub(blob, means_blob, broadcast=1, axis=0) blob = C2.Div(blob, stddevs_blob, broadcast=1, axis=0) blob = C2.Clip(blob, min=MIN_FEATURE_VALUE, max=MAX_FEATURE_VALUE) elif feature_type == identify_types.CONTINUOUS_ACTION: serving_min_value = np.array( [norm.min_value for norm in normalization_parameters], dtype=np.float32 ) serving_max_value = np.array( [norm.max_value for norm in normalization_parameters], dtype=np.float32 ) training_min_value = ( np.ones(len(normalization_parameters), dtype=np.float32) * -1 + EPS ) scaling_factor = ( (np.ones(len(normalization_parameters), dtype=np.float32) - EPS) * 2 / (serving_max_value - serving_min_value) ) serving_min_blob = self._store_parameter( parameters, "serving_min_blob", serving_min_value ) training_min_blob = self._store_parameter( parameters, "training_min_blob", training_min_value ) scaling_factor_blob = self._store_parameter( parameters, "scaling_factor_blob", scaling_factor ) blob = C2.Sub(blob, serving_min_blob, broadcast=1, axis=1) blob = C2.Mul(blob, scaling_factor_blob, broadcast=1, axis=1) blob = C2.Add(blob, training_min_blob, broadcast=1, axis=1) blob = C2.Clip(blob, min=-1 + EPS, max=1 - EPS) else: raise NotImplementedError("Invalid feature type: {}".format(feature_type)) zeros = C2.ConstantFill(blob, value=0.0) output_blob = C2.Where(is_empty, zeros, blob) output_blob = C2.NanCheck(output_blob) return output_blob, parameters