def _forward_pass( cls, model, trainer, normalized_dense_matrix, actions, qnet_output_blob ): C2.set_model(model) parameters = [] q_values = "q_values" C2.net().Copy([qnet_output_blob], [q_values]) action_names = C2.NextBlob("action_names") parameters.append(action_names) workspace.FeedBlob(action_names, np.array(actions)) action_range = C2.NextBlob("action_range") parameters.append(action_range) workspace.FeedBlob(action_range, np.array(list(range(len(actions))))) output_shape = C2.Shape(q_values) output_shape_row_count = C2.Slice(output_shape, starts=[0], ends=[1]) output_row_shape = C2.Slice(q_values, starts=[0, 0], ends=[-1, 1]) output_feature_keys = "output/string_weighted_multi_categorical_features.keys" workspace.FeedBlob(output_feature_keys, np.zeros(1, dtype=np.int64)) output_feature_keys_matrix = C2.ConstantFill( output_row_shape, value=0, dtype=caffe2_pb2.TensorProto.INT64 ) # Note: sometimes we need to use an explicit output name, so we call # C2.net().Fn(...) C2.net().FlattenToVec([output_feature_keys_matrix], [output_feature_keys]) output_feature_lengths = ( "output/string_weighted_multi_categorical_features.lengths" ) workspace.FeedBlob(output_feature_lengths, np.zeros(1, dtype=np.int32)) output_feature_lengths_matrix = C2.ConstantFill( output_row_shape, value=1, dtype=caffe2_pb2.TensorProto.INT32 ) C2.net().FlattenToVec([output_feature_lengths_matrix], [output_feature_lengths]) output_keys = "output/string_weighted_multi_categorical_features.values.keys" workspace.FeedBlob(output_keys, np.array(["a"])) C2.net().Tile([action_names, output_shape_row_count], [output_keys], axis=0) output_lengths_matrix = C2.ConstantFill( output_row_shape, value=len(actions), dtype=caffe2_pb2.TensorProto.INT32 ) output_lengths = ( "output/string_weighted_multi_categorical_features.values.lengths" ) workspace.FeedBlob(output_lengths, np.zeros(1, dtype=np.int32)) C2.net().FlattenToVec([output_lengths_matrix], [output_lengths]) output_values = ( "output/string_weighted_multi_categorical_features.values.values" ) workspace.FeedBlob(output_values, np.array([1.0])) C2.net().FlattenToVec([q_values], [output_values]) return parameters, q_values
def preprocess_blob(self, blob, normalization_parameters): """ Takes in a blob and its normalization parameters. Outputs a tuple whose first element is a blob containing the normalized input blob and whose second element contains all the parameter blobs used to create it. Call this from a CPU context and ensure the input blob exists in it. """ parameters: List[str] = [] ZERO = self._store_parameter(parameters, "ZERO", np.array([0], dtype=np.float32)) MISSING_U = self._store_parameter( parameters, "MISSING_U", np.array([MISSING_VALUE + 1e-4], dtype=np.float32)) MISSING_L = self._store_parameter( parameters, "MISSING_L", np.array([MISSING_VALUE - 1e-4], dtype=np.float32)) is_empty_l = C2.GT(blob, MISSING_L, broadcast=1) is_empty_u = C2.LT(blob, MISSING_U, broadcast=1) is_empty = C2.And(is_empty_l, is_empty_u) for i in range(len(normalization_parameters) - 1): if (normalization_parameters[i].feature_type != normalization_parameters[i + 1].feature_type): raise Exception( "Only one feature type is allowed per call to preprocess_blob!" ) feature_type = normalization_parameters[0].feature_type if feature_type == identify_types.BINARY: TOLERANCE = self._store_parameter(parameters, "TOLERANCE", np.array(1e-3, dtype=np.float32)) is_gt_zero = C2.GT(blob, C2.Add(ZERO, TOLERANCE, broadcast=1), broadcast=1) is_lt_zero = C2.LT(blob, C2.Sub(ZERO, TOLERANCE, broadcast=1), broadcast=1) bool_blob = C2.Or(is_gt_zero, is_lt_zero) blob = C2.Cast(bool_blob, to=caffe2_pb2.TensorProto.FLOAT) elif feature_type == identify_types.PROBABILITY: blob = C2.Logit(C2.Clip(blob, min=0.01, max=0.99)) elif feature_type == identify_types.ENUM: for parameter in normalization_parameters: possible_values = parameter.possible_values for x in possible_values: if x < 0: logger.fatal( "Invalid enum possible value for feature: " + str(x) + " " + str(parameter.possible_values)) raise Exception( "Invalid enum possible value for feature " + blob + ": " + str(x) + " " + str(parameter.possible_values)) int_blob = C2.Cast(blob, to=core.DataType.INT32) # Batch one hot transform with MISSING_VALUE as a possible value feature_lengths = [ len(p.possible_values) + 1 for p in normalization_parameters ] feature_lengths_blob = self._store_parameter( parameters, "feature_lengths_blob", np.array(feature_lengths, dtype=np.int32), ) feature_values = [ x for p in normalization_parameters for x in p.possible_values + [int(MISSING_VALUE)] ] feature_values_blob = self._store_parameter( parameters, "feature_values_blob", np.array(feature_values, dtype=np.int32), ) one_hot_output = C2.BatchOneHot(int_blob, feature_lengths_blob, feature_values_blob) flattened_one_hot = C2.FlattenToVec(one_hot_output) # Remove missing values with a mask cols_to_include = [[1] * len(p.possible_values) + [0] for p in normalization_parameters] cols_to_include = [x for col in cols_to_include for x in col] mask = self._store_parameter( parameters, "mask", np.array(cols_to_include, dtype=np.int32)) zero_vec = C2.ConstantFill(one_hot_output, value=0, dtype=caffe2_pb2.TensorProto.INT32) repeated_mask_bool = C2.Cast(C2.Add(zero_vec, mask, broadcast=1), to=core.DataType.BOOL) flattened_repeated_mask = C2.FlattenToVec(repeated_mask_bool) flattened_one_hot_proc = C2.NextBlob("flattened_one_hot_proc") flattened_one_hot_proc_indices = C2.NextBlob( "flattened_one_hot_proc_indices") C2.net().BooleanMask( [flattened_one_hot, flattened_repeated_mask], [flattened_one_hot_proc, flattened_one_hot_proc_indices], ) one_hot_shape = C2.Shape(one_hot_output) shape_delta = self._store_parameter( parameters, "shape_delta", np.array([0, len(normalization_parameters)], dtype=np.int64), ) target_shape = C2.Sub(one_hot_shape, shape_delta, broadcast=1) output_int_blob = C2.NextBlob("output_int_blob") output_int_blob_old_shape = C2.NextBlob( "output_int_blob_old_shape") C2.net().Reshape( [flattened_one_hot_proc, target_shape], [output_int_blob, output_int_blob_old_shape], ) output_blob = C2.Cast(output_int_blob, to=core.DataType.FLOAT) return output_blob, parameters elif feature_type == identify_types.QUANTILE: # This transformation replaces a set of values with their quantile. # The quantile boundaries are provided in the normalization params. quantile_sizes = [ len(norm.quantiles) for norm in normalization_parameters ] num_boundaries_blob = self._store_parameter( parameters, "num_boundaries_blob", np.array(quantile_sizes, dtype=np.int32), ) quantile_values = np.array([], dtype=np.float32) quantile_labels = np.array([], dtype=np.float32) for norm in normalization_parameters: quantile_values = np.append( quantile_values, np.array(norm.quantiles, dtype=np.float32)) # TODO: Fix this: the np.unique is making this part not true. quantile_labels = np.append( quantile_labels, np.arange(len(norm.quantiles), dtype=np.float32) / float(len(norm.quantiles)), ) quantiles = np.vstack([quantile_values, quantile_labels]).T quantiles_blob = self._store_parameter(parameters, "quantiles_blob", quantiles) quantile_blob = C2.Percentile(blob, quantiles_blob, num_boundaries_blob) blob = quantile_blob elif (feature_type == identify_types.CONTINUOUS or feature_type == identify_types.BOXCOX): boxcox_shifts = [] boxcox_lambdas = [] means = [] stddevs = [] for norm in normalization_parameters: if feature_type == identify_types.BOXCOX: assert (norm.boxcox_shift is not None and norm.boxcox_lambda is not None) boxcox_shifts.append(norm.boxcox_shift) boxcox_lambdas.append(norm.boxcox_lambda) means.append(norm.mean) stddevs.append(norm.stddev) if feature_type == identify_types.BOXCOX: boxcox_shift_blob = self._store_parameter( parameters, "boxcox_shift", np.array(boxcox_shifts, dtype=np.float32), ) boxcox_lambda_blob = self._store_parameter( parameters, "boxcox_shift", np.array(boxcox_lambdas, dtype=np.float32), ) blob = C2.BatchBoxCox(blob, boxcox_lambda_blob, boxcox_shift_blob) means_blob = self._store_parameter( parameters, "means_blob", np.array([means], dtype=np.float32)) stddevs_blob = self._store_parameter( parameters, "stddevs_blob", np.array([stddevs], dtype=np.float32)) blob = C2.Sub(blob, means_blob, broadcast=1, axis=0) blob = C2.Div(blob, stddevs_blob, broadcast=1, axis=0) if self.clip_anomalies: blob = C2.Clip(blob, min=-3.0, max=3.0) else: raise NotImplementedError( "Invalid feature type: {}".format(feature_type)) zeros = C2.ConstantFill(blob, value=0.) output_blob = C2.Where(is_empty, zeros, blob) return output_blob, parameters