def test_get_update_table_unsupported_column_type( update_table_strategy_unknown): """ get_update_table should raise UnsupportedColumnStrategyError if presented with an unsupported column type """ with pytest.raises(UnsupportedColumnStrategyError): query_factory.get_update_table("seed_table", update_table_strategy_unknown)
def anonymize_database(self, database_strategy): """ Anonymize a restored database using the passed database strategy :param database_strategy: a strategy.DatabaseStrategy configuration :return: """ qualifier_map = database_strategy.fake_update_qualifier_map if len(qualifier_map) > 0: self.logger.info("creating seed table with %d columns", len(qualifier_map)) create_seed_table_sql = query_factory.get_create_seed_table(SEED_TABLE_NAME, qualifier_map) self.__runner.db_execute(create_seed_table_sql) self.logger.info("Inserting seed data") self.__seed(qualifier_map) self.__run_scripts(database_strategy.before_scripts, "before") table_strategies = database_strategy.table_strategies self.logger.info("Anonymizing %d tables", len(table_strategies)) with tqdm(desc="Anonymizing database", total=len(table_strategies)) as progressbar: for table_strategy in table_strategies: if table_strategy.schema is not None: self.logger.warning( "%s: MySQL provider does not support table schema. This option will be ignored.", table_strategy.table_name ) if table_strategy.strategy_type == TableStrategyTypes.TRUNCATE: progressbar.set_description("Truncating {}".format(table_strategy.table_name)) self.__runner.db_execute(query_factory.get_truncate_table(table_strategy.table_name)) elif table_strategy.strategy_type == TableStrategyTypes.DELETE: progressbar.set_description("Deleting {}".format(table_strategy.table_name)) self.__runner.db_execute(query_factory.get_delete_table(table_strategy.table_name)) elif table_strategy.strategy_type == TableStrategyTypes.UPDATE_COLUMNS: progressbar.set_description("Anonymizing {}".format(table_strategy.table_name)) statements = query_factory.get_update_table(SEED_TABLE_NAME, table_strategy) self.__runner.db_execute(statements) else: raise UnsupportedTableStrategyError(table_strategy) progressbar.update() self.__run_scripts(database_strategy.after_scripts, "after") self.logger.info("dropping seed table") self.__runner.db_execute(query_factory.get_drop_seed_table(SEED_TABLE_NAME)) # Wait an arbitrary amount of time here to prevent this step from interacting with # transactional dump operations self.logger.debug("Waiting for trailing operations to complete...") sleep(0.2)
def test_get_update_table_literal(literal_strategy): result_queries = query_factory.get_update_table( "seed_table", UpdateColumnsTableStrategy( "anon_table", [LiteralUpdateColumnStrategy("literal_column", "RAND()")])) assert result_queries == [ "UPDATE `anon_table` SET `literal_column` = RAND();" ]
def test_get_update_table_fake_column(column_strategy_list): update_table_all = query_factory.get_update_table( "seed_table", UpdateColumnsTableStrategy("anon_table", column_strategy_list)) assert update_table_all == [ "UPDATE `anon_table` SET " "`test_column1` = ( SELECT `first_name` FROM `seed_table` ORDER BY RAND() LIMIT 1)," "`test_column2` = ( SELECT `last_name` FROM `seed_table` ORDER BY RAND() LIMIT 1)," "`test_column3` = ('')," "`test_column4` = ( SELECT MD5(FLOOR((NOW() + RAND()) * (RAND() * RAND() / RAND()) + RAND())) )," "`test_column5` = ( SELECT CONCAT(MD5(FLOOR((NOW() + RAND()) * (RAND() * RAND() / RAND()) + RAND())), '@', MD5(FLOOR((NOW() + RAND()) * (RAND() * RAND() / RAND()) + RAND())), '.com') )," "`test_column6` = RAND();" ]