def test_db_to_dict(self):
     db = DatabaseMeta(name='workforce',
                       bucket='my-bucket',
                       base_folder='database/database1',
                       description='Example database')
     db_dict = read_json('example/meta_data/db1/database.json')
     self.assertDictEqual(db_dict, db.to_dict())
    def test_db_write_to_json(self):
        db = DatabaseMeta(name='workforce',
                          bucket='my-bucket',
                          base_folder='database/database1',
                          description='Example database')
        t = TableMeta(name='table1', location='somewhere')
        db.add_table(t)

        with tempfile.TemporaryDirectory() as tmpdirname:
            db.write_to_json(tmpdirname)
            dbr = read_json(os.path.join(tmpdirname, 'database.json'))
            tr = read_json(os.path.join(tmpdirname, 'table1.json'))

        self.assertDictEqual(dbr, db.to_dict())
        self.assertDictEqual(tr, t.to_dict())

        with tempfile.TemporaryDirectory() as tmpdirname:
            db.write_to_json(tmpdirname, write_tables=False)

            dbr = read_json(os.path.join(tmpdirname, 'database.json'))
            self.assertDictEqual(dbr, db.to_dict())

            # Check that only db has been written
            with self.assertRaises(FileNotFoundError):
                tr = read_json(os.path.join(tmpdirname, 'table1.json'))
    def test_create_tables_using_etl_manager_api(self,
                                                 mock_client_create_table):
        self.skip_test_if_no_creds()
        # Create database meta object
        db = DatabaseMeta(
            name="test_data_types",
            bucket="alpha-test-meta-data",
            base_folder="database/test",
        )

        # Create table meta object

        tab = TableMeta(name="test_table",
                        location="test_table/",
                        data_format="json")

        path = os.path.join(os.path.dirname(__file__),
                            "data/data_types/test_table.json")
        with open(path) as f:
            table_dict = json.load(f)

        for c in table_dict["columns"]:
            tab.add_column(c["name"], c["type"], description=c["description"])

        self.assertRaises(ValueError, tab.add_column, "bad_col", "array()", "")

        db.add_table(tab)
Exemple #4
0
 def test_db_to_dict(self):
     db = DatabaseMeta(
         name="workforce",
         bucket="my-bucket",
         base_folder="database/database1",
         description="Example database",
     )
     db_dict = read_json("example/meta_data/db1/database.json")
     self.assertDictEqual(db_dict, db.to_dict())
 def test_init(self):
     db = DatabaseMeta(name='workforce',
                       bucket='my-bucket',
                       base_folder='database/database1',
                       description='Example database')
     self.assertEqual(db.name, 'workforce')
     self.assertEqual(db.description, 'Example database')
     self.assertEqual(db.bucket, 'my-bucket')
     self.assertEqual(db.base_folder, 'database/database1')
Exemple #6
0
 def test_init(self):
     db = DatabaseMeta(
         name="workforce",
         bucket="my-bucket",
         base_folder="database/database1",
         description="Example database",
     )
     self.assertEqual(db.name, "workforce")
     self.assertEqual(db.description, "Example database")
     self.assertEqual(db.bucket, "my-bucket")
     self.assertEqual(db.base_folder, "database/database1")
    def create_glue_database(self):
        """Creates glue database"""
        # Create database based on db_schema
        db = DatabaseMeta(**self.db_schema)
        for table_name, data_paths in self.meta_and_files.items():
            tm = read_table_json(data_paths["meta_path"], database=db)
            tm.data_format = "parquet"
            if tm.partitions:
                raise AttributeError("Automated lookup tables can only be "
                                     "partitioned by their GitHub release")
            # Add a release column as the first file partition to every table
            tm.add_column(
                name="release",
                type="character",
                description="github release tag of this lookup",
            )
            tm.partitions = ["release"]
            db.add_table(tm)

        db.create_glue_database(delete_if_exists=True)
        db.refresh_all_table_partitions()
 def database_path(self):
     db = DatabaseMeta(**self.db_schema)
     return db.s3_database_path
from etl_manager.meta import DatabaseMeta, TableMeta


db = DatabaseMeta(name="matrix_db", bucket="alpha-dag-matrix")

# Create table meta object
bookings = TableMeta(name="bookings", location="bookings", data_format="parquet")

# Add column defintions to the table
bookings.add_column(name="id", type="character", description="Booking id")
bookings.add_column(
    name="time_from", type="datetime", description="Start time of booking"
)
bookings.add_column(name="time_to", type="datetime", description="End time of booking")
bookings.add_column(
    name="created", type="datetime", description="Time the booking was created"
)
bookings.add_column(
    name="cancelled_time", type="datetime", description="Time of cancellation"
)
bookings.add_column(
    name="location_id", type="character", description="id to match to location"
)
bookings.add_column(
    name="owner_id", type="character", description="id of user who owns the booking"
)
bookings.add_column(
    name="booked_by_id",
    type="character",
    description="id of user who created the booking",
)