Example #1
0
    def test_get_value_unsupported_args(self, faker):
        class FakeMock:
            def ean(unmatched_args):
                pass

        faker.return_value = FakeMock()
        faked_generator = FakeColumnGenerator()
        with pytest.raises(UnsupportedFakeTypeError):
            faked_generator.get_value("ean", {"not_valid_length": 13})
Example #2
0
    def test_get_value_unsupported(self, faker):
        class FakeMock:
            def ean(unmatched_args):
                pass

        faker.return_value = FakeMock()
        faked_generator = FakeColumnGenerator()
        with pytest.raises(UnsupportedFakeTypeError):
            faked_generator.get_value("NOT A VALID METHOD")
Example #3
0
class FakeColumnGeneratorTests(unittest.TestCase):
    def setUp(self):
        self.generator = FakeColumnGenerator()

    def test_supports_supported(self):
        assert self.generator.supports("first_name") is True

    def test_supports_unsupported_method(self):
        assert self.generator.supports("NOT A VALID METHOD") is False

    def test_supports_unsupported_args(self):
        assert self.generator.supports("first_name", {
            "arg1": 2,
            "arg2": 3
        }) is False

    def test_get_data_type_known_column(self):
        assert self.generator.get_data_type("date") == FakeDataType.DATE

    def test_get_data_type_unknown_column(self):
        assert self.generator.get_data_type(
            "user_agent") == FakeDataType.STRING

    @patch("pynonymizer.fake.Faker")
    def test_get_value_unsupported(self, faker):
        class FakeMock:
            def ean(unmatched_args):
                pass

        faker.return_value = FakeMock()
        faked_generator = FakeColumnGenerator()
        with pytest.raises(UnsupportedFakeTypeError):
            faked_generator.get_value("NOT A VALID METHOD")

    @patch("pynonymizer.fake.Faker")
    def test_get_value_unsupported_args(self, faker):
        class FakeMock:
            def ean(unmatched_args):
                pass

        faker.return_value = FakeMock()
        faked_generator = FakeColumnGenerator()
        with pytest.raises(UnsupportedFakeTypeError):
            faked_generator.get_value("ean", {"not_valid_length": 13})
Example #4
0
    def parse_config(self, raw_config, locale_override=None):
        """
        parse a configuration dict into a DatabaseStrategy.
        :param raw_config:
        :return:
        """
        config = StrategyParser.__normalize_table_list(deepcopy(raw_config))

        locale = config.get("locale", "en_GB")
        if locale_override:
            locale = locale_override

        providers = config.get("providers", [])
        self.fake_seeder = FakeColumnGenerator(locale=locale,
                                               providers=providers)

        table_strategies = [
            self.__parse_table(table_config)
            for table_config in config["tables"]
        ]

        before_scripts = None
        after_scripts = None
        try:
            scripts = config["scripts"]

            if "before" in scripts:
                before_scripts = scripts["before"]

            if "after" in scripts:
                after_scripts = scripts["after"]
        except KeyError:
            pass

        return DatabaseStrategy(
            table_strategies=table_strategies,
            before_scripts=before_scripts,
            after_scripts=after_scripts,
        )
Example #5
0
def pynonymize(
        input_path=None, strategyfile_path=None, output_path=None, db_user=None, db_password=None, db_type=None,
        db_host=None, db_name=None, db_port=None, fake_locale=None, start_at_step=None, stop_at_step=None, skip_steps=None,
        seed_rows=None, dry_run=False, verbose=False,

        **kwargs
    ):

    # Default and Normalize args
    if start_at_step is None:
        start_at_step = ProcessSteps.START
    else:
        start_at_step = ProcessSteps.from_value(start_at_step)

    if stop_at_step is None:
        stop_at_step = ProcessSteps.END
    else:
        stop_at_step = ProcessSteps.from_value(stop_at_step)

    if skip_steps and len(skip_steps) > 0:
        skip_steps = [ProcessSteps.from_value(skip) for skip in skip_steps]

    if db_type is None:
        db_type = "mysql"

    if fake_locale is None:
        fake_locale = "en_GB"

    if seed_rows is None:
        seed_rows = 150

    actions = StepActionMap(start_at_step, stop_at_step, skip_steps, dry_run=dry_run)

    # Validate mandatory args (depends on step actions)
    validations = []

    if not actions.skipped(ProcessSteps.RESTORE_DB):
        if input_path is None:
            validations.append("Missing INPUT")

    if not actions.skipped(ProcessSteps.ANONYMIZE_DB):
        if strategyfile_path is None:
            validations.append("Missing STRATEGYFILE")
        else:
            # only auto-determine the db_name if we have a strategyfile AND we are anonymizing.
            if db_name is None:
                db_name = get_temp_db_name(strategyfile_path)

    if not actions.skipped(ProcessSteps.DUMP_DB):
        if output_path is None:
            validations.append("Missing OUTPUT")

    if db_user is None:
        validations.append("Missing DB_USER")

    if db_password is None:
        validations.append("Missing DB_PASSWORD")

    if db_name is None:
        validations.append("Missing DB_NAME: Auto-resolve failed.")

    if len(validations) > 0:
        raise ArgumentValidationError(validations)

    # init strategy as it relies on I/O - fail fast here preferred to after restore
    if not actions.skipped(ProcessSteps.ANONYMIZE_DB):
        fake_seeder = FakeColumnGenerator(fake_locale)
        strategy_parser = StrategyParser(fake_seeder)

        logger.debug("loading strategyfile %s...", strategyfile_path)
        strategy = strategy_parser.parse_config(read_config(strategyfile_path))

    # Discover db-type kwargs
    # mssql_backup_option -> backup_option and pass these to the constructor
    db_kwargs = {}
    db_arg_prefix = f"{db_type}_"
    for k, v in kwargs.items():
        if k.startswith(db_arg_prefix):
            db_kwargs[ k[len(db_arg_prefix):] ] = v

    logger.debug("Database: (%s:%s)%s@%s name: %s", db_host, db_port, db_type, db_user, db_name)
    db_provider = get_provider(
        type=db_type,
        db_host=db_host,
        db_user=db_user,
        db_pass=db_password,
        db_name=db_name,
        db_port=db_port,
        seed_rows=seed_rows,
        **db_kwargs
    )

    # main process - no destructive/non-retryable actions should happen before this line ---
    logger.info(actions.summary(ProcessSteps.CREATE_DB))
    if not actions.skipped(ProcessSteps.CREATE_DB):
        db_provider.create_database()

    logger.info(actions.summary(ProcessSteps.RESTORE_DB))
    if not actions.skipped(ProcessSteps.RESTORE_DB):
        db_provider.restore_database(input_path)

    logger.info(actions.summary(ProcessSteps.ANONYMIZE_DB))
    if not actions.skipped(ProcessSteps.ANONYMIZE_DB):
        db_provider.anonymize_database(strategy)

    logger.info(actions.summary(ProcessSteps.DUMP_DB))
    if not actions.skipped(ProcessSteps.DUMP_DB):
        db_provider.dump_database(output_path)

    logger.info(actions.summary(ProcessSteps.DROP_DB))
    if not actions.skipped(ProcessSteps.DROP_DB):
        db_provider.drop_database()

    logger.info("Process complete!")
Example #6
0
 def setUp(self):
     self.generator = FakeColumnGenerator()
Example #7
0
def fake_column_generator():
    from pynonymizer.fake import FakeColumnGenerator
    return FakeColumnGenerator()
Example #8
0
 def setUp(self):
     self.generator = FakeColumnGenerator(locale="en_US")
Example #9
0
 def test_when_using_custom_provider_should_add_provider(
         self, fake, import_module):
     generator = FakeColumnGenerator(
         locale="en_GB", providers=["some.module.somewhere.MagicProvider"])
     import_module.assert_any_call("some.module.somewhere")
     fake().add_provider.assert_any_call(import_module().MagicProvider)