def test_create_database(self): self.skip_test_if_no_creds() db = read_database_folder( os.path.join(os.path.dirname(__file__), "data/data_types/")) db.create_glue_database(delete_if_exists=True) sql = """ select * from test_data_types.test_table """ run_athena_sql(sql) db = get_existing_database_from_glue_catalogue("test_data_types") tab = TableMeta( name="test_table_2", location="database/test/test_table/", data_format="json", ) tab.add_column( "robin_entity_id", "struct<arr_key:array<character>,dict_key:struct<nest_arr:array<long>,nest_dict:struct<a_key:character,b_key:character>>>", description="an ID for each entity", ) db.add_table(tab) db.update_glue_database() sql = """ select * from test_data_types.test_table_2 """ run_athena_sql(sql)
def test_glue_database_creation(self): session = boto3.Session() credentials = session.get_credentials() has_access_key = True try: ac = credentials.access_key except: has_access_key = False if has_access_key: db = read_database_folder('example/meta_data/db1/') db_suffix = '_unit_test_' db.name = db.name + db_suffix db.create_glue_database() resp = _glue_client.get_tables(DatabaseName=db.name) test_created = all( [r['Name'] in db.table_names for r in resp['TableList']]) self.assertTrue( test_created, msg= "Note this requires user to have correct credentials to create a glue database" ) self.assertEqual(db.delete_glue_database(), 'database deleted') self.assertEqual( db.delete_glue_database(), 'Cannot delete as database not found in glue catalogue') else: print( "\n***\nCANNOT RUN THIS UNIT TEST AS DO NOT HAVE ACCESS TO AWS.\n***\nskipping ..." ) self.assertTrue(True)
def test_db_value_properties(self): db = read_database_folder('example/meta_data/db1/') db.name = 'new_name' self.assertEqual(db.name, 'new_name') db.description = 'new description' self.assertEqual(db.description, 'new description') db.bucket = 'new-bucket' self.assertEqual(db.bucket, 'new-bucket') db.base_folder = 'new/folder/location' self.assertEqual(db.base_folder, 'new/folder/location')
def test_db_value_properties(self): db = read_database_folder("example/meta_data/db1/") db.name = "new_name" self.assertEqual(db.name, "new_name") db.description = "new description" self.assertEqual(db.description, "new description") db.bucket = "new-bucket" self.assertEqual(db.bucket, "new-bucket") db.base_folder = "new/folder/location" self.assertEqual(db.base_folder, "new/folder/location")
def test_add_remove_table(self): db = read_database_folder('example/meta_data/db1/') self.assertRaises(ValueError, db.remove_table, 'not_a_table') db.remove_table('employees') tns = db.table_names self.assertEqual(set(tns), set(['teams', 'pay'])) emp_table = read_table_json('example/meta_data/db1/employees.json') db.add_table(emp_table) t = all(t in ['teams', 'employees', 'pay'] for t in db.table_names) self.assertTrue(t) self.assertRaises(ValueError, db.add_table, 'not a table obj') self.assertRaises(ValueError, db.add_table, emp_table)
def test_add_remove_table(self): db = read_database_folder("example/meta_data/db1/") self.assertRaises(ValueError, db.remove_table, "not_a_table") db.remove_table("employees") tns = db.table_names self.assertEqual(set(tns), set(["teams", "pay"])) emp_table = read_table_json("example/meta_data/db1/employees.json") db.add_table(emp_table) t = all(t in ["teams", "employees", "pay"] for t in db.table_names) self.assertTrue(t) self.assertRaises(ValueError, db.add_table, "not a table obj") self.assertRaises(ValueError, db.add_table, emp_table)
def test_db_test_column_types_align(self): db = read_database_folder("example/meta_data/db1/") # Should pass db.test_column_types_align() db.table("pay").update_column(column_name="employee_id", type="character") # Should pass db.test_column_types_align(exclude_tables=["pay"]) # Should fail with self.assertRaises(MetaColumnTypeMismatch): db.test_column_types_align()
def test_glue_database_creation(self): self.skip_test_if_no_creds() db = read_database_folder("example/meta_data/db1/") db_suffix = "_unit_test_" db.name = db.name + db_suffix db.create_glue_database() resp = _glue_client.get_tables(DatabaseName=db.name) test_created = all( [r["Name"] in db.table_names for r in resp["TableList"]]) self.assertTrue( test_created, msg= ("Note this requires user to have correct credentials to create a glue " "database"), ) self.assertEqual(db.delete_glue_database(), "database deleted") self.assertEqual(db.delete_glue_database(), "database not found in glue catalogue")
def test_table_to_dict(self): db = read_database_folder("example/meta_data/db1/") expected_dict = read_json("example/meta_data/db1/teams.json") test_dict = db.table("teams").to_dict() # Null out schema as may need changing when on branch but still need to unit # test expected_dict["$schema"] = "" test_dict["$schema"] = "" self.assertDictEqual(test_dict, expected_dict) # Test file with glue specific expected_dict2 = read_json("example/meta_data/db1/pay.json") test_dict2 = db.table("pay").to_dict() # Null out schema as may need changing when on branch but still need to unit # test expected_dict2["$schema"] = "" test_dict2["$schema"] = "" self.assertDictEqual(test_dict2, expected_dict2)
def test_db_table(self): db = read_database_folder('example/meta_data/db1/') self.assertTrue(isinstance(db.table('employees'), TableMeta)) self.assertRaises(ValueError, db.table, 'not_a_table_object')
def test_db_s3_database_path(self): db = read_database_folder('example/meta_data/db1/') self.assertEqual(db.s3_database_path, 's3://my-bucket/database/database1')
def test_db_glue_name(self): db = read_database_folder('example/meta_data/db1/') self.assertEqual(db.name, 'workforce') db_dev = read_database_folder('example/meta_data/db1/') self.assertEqual(db_dev.name, 'workforce')
def test_db_name_validation(self): db = read_database_folder('example/meta_data/db1/') with self.assertRaises(ValueError): db.name = 'bad-name'
def test_db_table_names(self): db = read_database_folder('example/meta_data/db1/') t = all(t in ['teams', 'employees', 'pay'] for t in db.table_names) self.assertTrue(t)
def test_read_json(self): db = read_database_folder('example/meta_data/db1/') self.assertEqual(db.name, 'workforce') self.assertEqual(db.description, 'Example database') self.assertEqual(db.bucket, 'my-bucket') self.assertEqual(db.base_folder, 'database/database1')
def main(): db = read_database_folder('meta_data/curated/') db.delete_glue_database() db.create_glue_database() db.refresh_all_table_partitions()
def test_can_create_glue_table(self, mock_client_create_table): self.skip_test_if_no_creds() db = read_database_folder( os.path.join(os.path.dirname(__file__), "data/data_types/")) db.create_glue_database(delete_if_exists=True) self.assertTrue(mock_client_create_table.called)
def test_location(self): db = read_database_folder('example/meta_data/db1/') tbl = db.table('teams') gtd = tbl.glue_table_definition() location = gtd["StorageDescriptor"]["Location"] self.assertTrue(location == 's3://my-bucket/database/database1/teams/')
def test_table_to_dict(self): db = read_database_folder('example/meta_data/db1/') test_dict = read_json('example/meta_data/db1/teams.json') self.assertDictEqual(test_dict, db.table('teams').to_dict())
def test_db_table_names(self): db = read_database_folder("example/meta_data/db1/") t = all(t in ["teams", "employees", "pay"] for t in db.table_names) self.assertTrue(t)
if __name__ == "__main__": from etl_manager import meta db_meta = meta.read_database_folder("glue/meta_data/occupeye_db/") db_meta.create_glue_database()
def test_read_json(self): db = read_database_folder("example/meta_data/db1/") self.assertEqual(db.name, "workforce") self.assertEqual(db.description, "Example database") self.assertEqual(db.bucket, "my-bucket") self.assertEqual(db.base_folder, "database/database1")