def test_merge_update_migrations_no_datetime_filter(self) -> None: migrations_file = self._migration_file_path_for_tag("tagC") migrations: List[RawTableMigration] = [ UpdateRawTableMigration( migrations_file=migrations_file, update_datetime_filters=None, filters=[("COL1", "123")], updates=[("COL2", "1234")], ), UpdateRawTableMigration( migrations_file=migrations_file, update_datetime_filters=None, filters=[("COL1", "456")], updates=[("COL2", "4567")], ), ] with local_project_id_override("recidiviz-456"): queries_map = RawTableMigrationGenerator.migration_queries( migrations) expected_queries_map = [ """UPDATE `recidiviz-456.us_xx_raw_data.tagC` original SET COL2 = updates.new__COL2 FROM (SELECT * FROM UNNEST([ STRUCT('123' AS COL1, '1234' AS new__COL2), STRUCT('456' AS COL1, '4567' AS new__COL2) ])) updates WHERE original.COL1 = updates.COL1;""" ] self.assertEqual(expected_queries_map, queries_map)
def test_update_migration_update_datetime_agnostic(self) -> None: migration = UpdateRawTableMigration( migrations_file=self._migration_file_path_for_tag("tagC"), update_datetime_filters=None, filters=[("COL1", "31415")], updates=[("COL1", "91011")], ) with local_project_id_override("recidiviz-456"): project_1_query_map = migration.migration_queries_by_update_datetime( ) with local_project_id_override("recidiviz-789"): project_2_query_map = migration.migration_queries_by_update_datetime( ) expected_query_map = { UPDATE_DATETIME_AGNOSTIC_DATETIME: "UPDATE `recidiviz-456.us_xx_raw_data.tagC` SET COL1 = '91011' WHERE COL1 = '31415';" } self.assertEqual(expected_query_map, project_1_query_map) expected_query_map = { UPDATE_DATETIME_AGNOSTIC_DATETIME: "UPDATE `recidiviz-789.us_xx_raw_data.tagC` SET COL1 = '91011' WHERE COL1 = '31415';" } self.assertEqual(expected_query_map, project_2_query_map)
def test_print_migrations(self) -> None: migrations_file = self._migration_file_path_for_tag("tagC") migrations = [ DeleteFromRawTableMigration( migrations_file=migrations_file, update_datetime_filters=[_DATE_1, _DATE_2], filters=[("COL1", "31415")], ), DeleteFromRawTableMigration( migrations_file=migrations_file, update_datetime_filters=None, filters=[("COL2", "1234")], ), UpdateRawTableMigration( migrations_file=migrations_file, update_datetime_filters=[ _DATE_1, ], filters=[("COL1", "31415")], updates=[("COL1", "91011")], ), UpdateRawTableMigration( migrations_file=migrations_file, update_datetime_filters=None, filters=[("COL1", "31415")], updates=[("COL2", "91011")], ), ] # Shouldn't crash with local_project_id_override("recidiviz-456"): RawTableMigrationGenerator.print_list(migrations)
def test_update_migration(self) -> None: migration = UpdateRawTableMigration( migrations_file=self._migration_file_path_for_tag("tagC"), update_datetime_filters=[ _DATE_2, ], filters=[("COL1", "31415")], updates=[("COL1", "91011")], ) with local_project_id_override("recidiviz-456"): project_1_query_map = migration.migration_queries_by_update_datetime( ) with local_project_id_override("recidiviz-789"): project_2_query_map = migration.migration_queries_by_update_datetime( ) expected_query_map = { _DATE_2: """UPDATE `recidiviz-456.us_xx_raw_data.tagC` SET COL1 = '91011' WHERE COL1 = '31415' AND update_datetime = '2020-08-16T01:02:03';""" } self.assertEqual(expected_query_map, project_1_query_map) expected_query_map = { _DATE_2: """UPDATE `recidiviz-789.us_xx_raw_data.tagC` SET COL1 = '91011' WHERE COL1 = '31415' AND update_datetime = '2020-08-16T01:02:03';""" } self.assertEqual(expected_query_map, project_2_query_map)
def test_dont_merge_delete_and_update_migrations(self) -> None: migrations_file = self._migration_file_path_for_tag("tagC") migrations: List[RawTableMigration] = [ DeleteFromRawTableMigration( migrations_file=migrations_file, update_datetime_filters=[_DATE_1], filters=[("COL1", "1234")], ), UpdateRawTableMigration( migrations_file=migrations_file, update_datetime_filters=[_DATE_1], filters=[("COL1", "456")], updates=[("COL2", "4567")], ), ] with local_project_id_override("recidiviz-456"): queries_map = RawTableMigrationGenerator.migration_queries( migrations) expected_queries_map = [ f"""DELETE FROM `recidiviz-456.us_xx_raw_data.tagC` WHERE STRUCT(COL1, update_datetime) IN ( STRUCT('1234', '{_DATE_1.isoformat()}') );""", f"""UPDATE `recidiviz-456.us_xx_raw_data.tagC` original SET COL2 = updates.new__COL2 FROM (SELECT * FROM UNNEST([ STRUCT('456' AS COL1, CAST('{_DATE_1.isoformat()}' AS DATETIME) AS update_datetime, '4567' AS new__COL2) ])) updates WHERE original.COL1 = updates.COL1 AND original.update_datetime = updates.update_datetime;""", ] self.assertEqual(expected_queries_map, queries_map)
def test_update_migration_update_datetime_agnostic(self) -> None: migration = UpdateRawTableMigration( migrations_file=self._migration_file_path_for_tag("tagC"), update_datetime_filters=None, filters=[("COL1", "31415")], updates=[("COL1", "91011")], ) with local_project_id_override("recidiviz-456"): project_1_queries = RawTableMigrationGenerator.migration_queries( [migration]) with local_project_id_override("recidiviz-789"): project_2_queries = RawTableMigrationGenerator.migration_queries( [migration]) expected_project_1_queries = [ """UPDATE `recidiviz-456.us_xx_raw_data.tagC` original SET COL1 = updates.new__COL1 FROM (SELECT * FROM UNNEST([ STRUCT('31415' AS COL1, '91011' AS new__COL1) ])) updates WHERE original.COL1 = updates.COL1;""" ] self.assertEqual(expected_project_1_queries, project_1_queries) expected_project_2_queries = [ """UPDATE `recidiviz-789.us_xx_raw_data.tagC` original SET COL1 = updates.new__COL1 FROM (SELECT * FROM UNNEST([ STRUCT('31415' AS COL1, '91011' AS new__COL1) ])) updates WHERE original.COL1 = updates.COL1;""" ] self.assertEqual(expected_project_2_queries, project_2_queries)
def test_update_migration_date_filters(self) -> None: migration = UpdateRawTableMigration( migrations_file=self._migration_file_path_for_tag("mytag"), update_datetime_filters=[_DATE_1, _DATE_2], filters=[ ("COL2", "2171"), ("COL1", "31415"), ], updates=[ ("COL3", "91011"), ("COL1", "654"), ], ) with local_project_id_override("recidiviz-789"): self.assertEqual("recidiviz-789.us_xx_raw_data.mytag", migration.raw_table) self.assertEqual("mytag", migration.file_tag) self.assertEqual( ("COL1", "COL2", "update_datetime"), migration.ordered_filter_keys ) self.assertEqual("31415", migration.filters["COL1"]) self.assertEqual("2171", migration.filters["COL2"]) expected_filters_values = [ ("31415", "2171", "2020-04-14T00:31:00"), ("31415", "2171", "2020-08-16T01:02:03"), ] self.assertEqual(expected_filters_values, migration.ordered_filter_values) self.assertEqual(("COL1", "COL3"), migration.ordered_update_keys) self.assertEqual("91011", migration.updates["COL3"])
def test_update_migration_multiples(self) -> None: migration = UpdateRawTableMigration( migrations_file=self._migration_file_path_for_tag( "file_tag_first"), update_datetime_filters=[_DATE_1, _DATE_2], filters=[("col_name_1a", "12345"), ("col_name_1b", "4567")], updates=[("col_name_1a", "4567"), ("col_name_1b", "12345")], ) with local_project_id_override("recidiviz-456"): query_map = migration.migration_queries_by_update_datetime() expected_query_map = { _DATE_1: """UPDATE `recidiviz-456.us_xx_raw_data.file_tag_first` SET col_name_1a = '4567', col_name_1b = '12345' WHERE col_name_1a = '12345' AND col_name_1b = '4567' AND update_datetime = '2020-04-14T00:31:00';""", _DATE_2: """UPDATE `recidiviz-456.us_xx_raw_data.file_tag_first` SET col_name_1a = '4567', col_name_1b = '12345' WHERE col_name_1a = '12345' AND col_name_1b = '4567' AND update_datetime = '2020-08-16T01:02:03';""", } self.assertEqual(expected_query_map, query_map)
def test_update_migration_no_date_filter(self) -> None: migration = UpdateRawTableMigration( migrations_file=self._migration_file_path_for_tag("mytag"), update_datetime_filters=None, filters=[ ("COL2", "2171"), ("COL1", "31415"), ], updates=[ ("COL3", "91011"), ("COL1", "654"), ], ) with local_project_id_override("recidiviz-789"): self.assertEqual("recidiviz-789.us_xx_raw_data.mytag", migration.raw_table) self.assertEqual("mytag", migration.file_tag) self.assertEqual(("COL1", "COL2"), migration.ordered_filter_keys) self.assertEqual("31415", migration.filters["COL1"]) self.assertEqual("2171", migration.filters["COL2"]) self.assertEqual([("31415", "2171")], migration.ordered_filter_values) self.assertEqual(("COL1", "COL3"), migration.ordered_update_keys) self.assertEqual("91011", migration.updates["COL3"])
def test_update_migration_multiples(self) -> None: migration = UpdateRawTableMigration( migrations_file=self._migration_file_path_for_tag( "file_tag_first"), update_datetime_filters=[_DATE_1, _DATE_2], filters=[("col_name_1a", "12345"), ("col_name_1b", "4567")], updates=[("col_name_1a", "4567"), ("col_name_1b", "12345")], ) with local_project_id_override("recidiviz-456"): queries = RawTableMigrationGenerator.migration_queries([migration]) expected_queries = [ """UPDATE `recidiviz-456.us_xx_raw_data.file_tag_first` original SET col_name_1a = updates.new__col_name_1a, col_name_1b = updates.new__col_name_1b FROM (SELECT * FROM UNNEST([ STRUCT('12345' AS col_name_1a, '4567' AS col_name_1b, CAST('2020-04-14T00:31:00' AS DATETIME) AS update_datetime, '4567' AS new__col_name_1a, '12345' AS new__col_name_1b), STRUCT('12345' AS col_name_1a, '4567' AS col_name_1b, CAST('2020-08-16T01:02:03' AS DATETIME) AS update_datetime, '4567' AS new__col_name_1a, '12345' AS new__col_name_1b) ])) updates WHERE original.col_name_1a = updates.col_name_1a AND original.col_name_1b = updates.col_name_1b AND original.update_datetime = updates.update_datetime;""", ] for query in queries: print(query) self.assertEqual(expected_queries, queries)
import datetime from typing import List from recidiviz.ingest.direct.controllers.direct_ingest_raw_table_migration import ( RawTableMigration, UpdateRawTableMigration, DeleteFromRawTableMigration, ) COL1 = "COL1" DATE_1 = datetime.datetime.fromisoformat("2020-06-10T00:00:00") DATE_2 = datetime.datetime.fromisoformat("2020-09-21T00:00:00") MIGRATIONS: List[RawTableMigration] = [ UpdateRawTableMigration( migrations_file=__file__, update_datetime_filters=[ DATE_1, DATE_2, ], filters=[(COL1, "123")], updates=[(COL1, "456")], ), DeleteFromRawTableMigration( migrations_file=__file__, update_datetime_filters=None, filters=[(COL1, "789")], ), ]