コード例 #1
0
def test_given_decrypt_operator_class_then_we_get_the_correct_class():
    for operator_name in ["decrypt"]:
        operator = OperatorsFactory().create_operator_class(
            operator_name, OperatorType.Deanonymize)
        assert operator
        assert operator.operator_name() == operator_name
        assert operator.operator_type() == OperatorType.Deanonymize
コード例 #2
0
def test_given_anonymize_operators_class_then_we_get_the_correct_class():
    for operator_name in ["hash", "mask", "redact", "replace", "encrypt"]:
        operator = OperatorsFactory().create_operator_class(
            operator_name, OperatorType.Anonymize)
        assert operator
        assert operator.operator_name() == operator_name
        assert (operator.operator_type() == OperatorType.Anonymize
                or operator.operator_type() == OperatorType.All)
コード例 #3
0
def test_given_anonymizers_list_then_all_classes_are_there():
    anonymizers = OperatorsFactory.get_anonymizers()
    assert len(anonymizers) == 5
    for class_name in ["hash", "mask", "redact", "replace", "encrypt"]:
        assert anonymizers.get(class_name)
コード例 #4
0
def test_given_wrong_operator_then_we_fail():
    with pytest.raises(InvalidParamException,
                       match="Invalid operator type '3'."):
        OperatorsFactory().create_operator_class("bla", 3)
コード例 #5
0
def test_given_wrong_name_for_anonymizer_class_then_we_fail():
    with pytest.raises(InvalidParamException,
                       match="Invalid operator class 'decrypt'."):
        OperatorsFactory().create_operator_class("decrypt",
                                                 OperatorType.Anonymize)
コード例 #6
0
def test_given_decryptors_list_then_all_classes_are_there():
    decryptors = OperatorsFactory.get_deanonymizers()
    assert len(decryptors) == 1
    for class_name in ["decrypt"]:
        assert decryptors.get(class_name)
コード例 #7
0
 def __init__(self):
     self.logger = logging.getLogger("presidio-anonymizer")
     self.operators_factory = OperatorsFactory()
コード例 #8
0
class EngineBase(ABC):
    """Handle the logic of operations over the text using the operators."""

    def __init__(self):
        self.logger = logging.getLogger("presidio-anonymizer")
        self.operators_factory = OperatorsFactory()

    def _operate(self,
                 text: str,
                 pii_entities: List[PIIEntity],
                 operators_metadata: Dict[str, OperatorConfig],
                 operator_type: OperatorType) -> EngineResult:
        """
        Operate will do the operations required by the user over the text.

        :param text: the text we need to operate on.
        :param pii_entities: data about the text entities we want to operate over.
        :param operators_metadata: dictionary where the key is the entity_type and what
        :type operator_type: either anonymize or deanonymize
        we want to perform over this entity_type.
        :return:
        """
        text_replace_builder = TextReplaceBuilder(original_text=text)
        engine_result = EngineResult()
        sorted_pii_entities = sorted(pii_entities, reverse=True)
        for operator in sorted_pii_entities:
            text_to_operate_on = text_replace_builder.get_text_in_position(
                operator.start, operator.end
            )

            self.logger.debug(
                f"performing operation {operator}"
            )
            operator_metadata = self.__get_entity_operator_metadata(
                operator.entity_type, operators_metadata)
            changed_text = self.__operate_on_text(
                operator, text_to_operate_on, operator_metadata, operator_type
            )
            index_from_end = text_replace_builder.replace_text_get_insertion_index(
                changed_text, operator.start, operator.end
            )

            # The following creates an intermediate list of result entities,
            # ordered from end to start, and the indexes will be normalized
            # from start to end once the loop ends and the text length is deterministic.
            result_item = OperatorResult(changed_text,
                                         operator_metadata.operator_name,
                                         0, index_from_end,
                                         operator.entity_type)
            engine_result.add_item(result_item)

        engine_result.set_text(text_replace_builder.output_text)
        engine_result.normalize_item_indexes()
        return engine_result

    def __operate_on_text(
            self,
            text_metadata: PIIEntity,
            text_to_operate_on: str,
            operator_metadata: OperatorConfig, operator_type: OperatorType
    ) -> str:
        entity_type = text_metadata.entity_type
        self.logger.debug(f"getting operator for {entity_type}")
        operator = self.operators_factory.create_operator_class(
            operator_metadata.operator_name, operator_type)
        self.logger.debug(f"validating operator {operator} for {entity_type}")
        operator.validate(params=operator_metadata.params)
        params = operator_metadata.params
        params["entity_type"] = entity_type
        self.logger.debug(f"operating on {entity_type} with {operator}")
        operated_on_text = operator.operate(params=params, text=text_to_operate_on)
        return operated_on_text

    @staticmethod
    def __get_entity_operator_metadata(
            entity_type: str, operators_metadata: Dict[str, OperatorConfig] = {}
    ) -> OperatorConfig:
        # We try to get the operator from the list by entity_type.
        # If it does not exist, we get the default from the list.
        operator = operators_metadata.get(entity_type)
        if operator:
            return operator
        else:
            return operators_metadata.get("DEFAULT")