def test_db_to_dict(self): db = DatabaseMeta(name='workforce', bucket='my-bucket', base_folder='database/database1', description='Example database') db_dict = read_json('example/meta_data/db1/database.json') self.assertDictEqual(db_dict, db.to_dict())
def test_db_write_to_json(self): db = DatabaseMeta(name='workforce', bucket='my-bucket', base_folder='database/database1', description='Example database') t = TableMeta(name='table1', location='somewhere') db.add_table(t) with tempfile.TemporaryDirectory() as tmpdirname: db.write_to_json(tmpdirname) dbr = read_json(os.path.join(tmpdirname, 'database.json')) tr = read_json(os.path.join(tmpdirname, 'table1.json')) self.assertDictEqual(dbr, db.to_dict()) self.assertDictEqual(tr, t.to_dict()) with tempfile.TemporaryDirectory() as tmpdirname: db.write_to_json(tmpdirname, write_tables=False) dbr = read_json(os.path.join(tmpdirname, 'database.json')) self.assertDictEqual(dbr, db.to_dict()) # Check that only db has been written with self.assertRaises(FileNotFoundError): tr = read_json(os.path.join(tmpdirname, 'table1.json'))
def test_create_tables_using_etl_manager_api(self, mock_client_create_table): self.skip_test_if_no_creds() # Create database meta object db = DatabaseMeta( name="test_data_types", bucket="alpha-test-meta-data", base_folder="database/test", ) # Create table meta object tab = TableMeta(name="test_table", location="test_table/", data_format="json") path = os.path.join(os.path.dirname(__file__), "data/data_types/test_table.json") with open(path) as f: table_dict = json.load(f) for c in table_dict["columns"]: tab.add_column(c["name"], c["type"], description=c["description"]) self.assertRaises(ValueError, tab.add_column, "bad_col", "array()", "") db.add_table(tab)
def test_db_to_dict(self): db = DatabaseMeta( name="workforce", bucket="my-bucket", base_folder="database/database1", description="Example database", ) db_dict = read_json("example/meta_data/db1/database.json") self.assertDictEqual(db_dict, db.to_dict())
def test_init(self): db = DatabaseMeta(name='workforce', bucket='my-bucket', base_folder='database/database1', description='Example database') self.assertEqual(db.name, 'workforce') self.assertEqual(db.description, 'Example database') self.assertEqual(db.bucket, 'my-bucket') self.assertEqual(db.base_folder, 'database/database1')
def test_init(self): db = DatabaseMeta( name="workforce", bucket="my-bucket", base_folder="database/database1", description="Example database", ) self.assertEqual(db.name, "workforce") self.assertEqual(db.description, "Example database") self.assertEqual(db.bucket, "my-bucket") self.assertEqual(db.base_folder, "database/database1")
def create_glue_database(self): """Creates glue database""" # Create database based on db_schema db = DatabaseMeta(**self.db_schema) for table_name, data_paths in self.meta_and_files.items(): tm = read_table_json(data_paths["meta_path"], database=db) tm.data_format = "parquet" if tm.partitions: raise AttributeError("Automated lookup tables can only be " "partitioned by their GitHub release") # Add a release column as the first file partition to every table tm.add_column( name="release", type="character", description="github release tag of this lookup", ) tm.partitions = ["release"] db.add_table(tm) db.create_glue_database(delete_if_exists=True) db.refresh_all_table_partitions()
def database_path(self): db = DatabaseMeta(**self.db_schema) return db.s3_database_path
from etl_manager.meta import DatabaseMeta, TableMeta db = DatabaseMeta(name="matrix_db", bucket="alpha-dag-matrix") # Create table meta object bookings = TableMeta(name="bookings", location="bookings", data_format="parquet") # Add column defintions to the table bookings.add_column(name="id", type="character", description="Booking id") bookings.add_column( name="time_from", type="datetime", description="Start time of booking" ) bookings.add_column(name="time_to", type="datetime", description="End time of booking") bookings.add_column( name="created", type="datetime", description="Time the booking was created" ) bookings.add_column( name="cancelled_time", type="datetime", description="Time of cancellation" ) bookings.add_column( name="location_id", type="character", description="id to match to location" ) bookings.add_column( name="owner_id", type="character", description="id of user who owns the booking" ) bookings.add_column( name="booked_by_id", type="character", description="id of user who created the booking", )