Ejemplos de BigQueryExecutor en Python, ejemplos de pygyver.etl.dw.BigQueryExecutor en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: test_pipeline.py Proyecto: andras-mdc/pygyver

 def setUp(self):
     logging.basicConfig(level=logging.DEBUG)
     self.bq_client = BigQueryExecutor()
     self.p_ex = pl.PipelineExecutor("tests/yaml/test_performance.yaml")
     self.p_ex.dataset_prefix = "1008_"
     add_dataset_prefix(self.p_ex.yaml, self.p_ex.dataset_prefix)
     self.bq_client.create_dataset(dataset_id=str(self.p_ex.dataset_prefix) + "test")

Ejemplo n.º 2

0

Mostrar archivo

Archivo: test_pipeline.py Proyecto: andras-mdc/pygyver

class TestPipelineDryRunWithArgs(unittest.TestCase):

    def setUp(self):
        self.bq_client = BigQueryExecutor()
        self.bq_client.create_dataset(
            dataset_id='4001_my_dataset_dry_run_with_args'
        )
        self.bq_client.create_dataset(
            dataset_id='4001_test'
        )
        self.p_ex = pl.PipelineExecutor(
            yaml_file= "tests/yaml/test_dry_run_with_args.yaml",
            my_string_arg='one',
            my_dataset_arg='my_dataset_dry_run_with_args'
        )
        self.p_ex.dataset_prefix = "4001_"

        add_dataset_prefix(obj=self.p_ex.yaml, dataset_prefix=self.p_ex.dataset_prefix, kwargs={'my_string_arg': 'one', 'my_dataset_arg': 'my_dataset_dry_run_with_args'})


    def test_dry_run(self):
        self.p_ex.run()
        self.assertTrue(
            self.bq_client.table_exists(
                dataset_id='4001_my_dataset_dry_run_with_args',
                table_id='table1'
            )
        )

    def tearDown(self):
        self.p_ex.dry_run_clean()

Ejemplo n.º 3

0

Mostrar archivo

 def __init__(self, yaml_file, dry_run=False, *args, **kwargs):
     self.kwargs = kwargs
     self.yaml = read_yaml_file(yaml_file)
     self.dataset_prefix = None
     if dry_run:
         self.dataset_prefix = f'{randint(1, 99999999):08}_'
         add_dataset_prefix(obj=self.yaml, dataset_prefix=self.dataset_prefix, kwargs=self.kwargs)
     self.bq = BigQueryExecutor()
     self.prod_project_id = bq_default_prod_project()

Ejemplo n.º 4

0

Mostrar archivo

Archivo: test_pipeline.py Proyecto: andras-mdc/pygyver

    def setUp(self):
        self.bq_client = BigQueryExecutor()
        self.p_ex = pl.PipelineExecutor("tests/yaml/unit_tests_fail.yaml")
        self.p_ex.dataset_prefix = "1001_"
        add_dataset_prefix(self.p_ex.yaml, self.p_ex.dataset_prefix)

        if self.bq_client.table_exists(dataset_id='reporting', table_id='out_product'):
            self.bq_client.delete_table(dataset_id='reporting', table_id='out_product')
        if self.bq_client.table_exists(dataset_id='reporting', table_id='out_saleorder'):
            self.bq_client.delete_table(dataset_id='reporting', table_id='out_saleorder')

Ejemplo n.º 5

0

Mostrar archivo

Archivo: test_pipeline.py Proyecto: andras-mdc/pygyver

class TestRunReleases(unittest.TestCase):
    def setUp(self):
        logging.basicConfig(level=logging.DEBUG)
        self.bq_client = BigQueryExecutor()
        self.p_ex = pl.PipelineExecutor("tests/yaml/test_run.yaml")

    def test_run_releases(self):
        self.p_ex.run_releases(release_date="2020-01-01")
        self.assertTrue(
            self.bq_client.table_exists(
                table_id="table1",
                dataset_id="test"
            ),
            "Table was not created"
        )

    def test_run_releases_with_dataset_prefix(self):
        self.p_ex.dataset_prefix = "2001_"
        self.p_ex.run_releases(release_date="2020-01-01")
        self.assertTrue(
            self.bq_client.table_exists(
                table_id="table1",
                dataset_id="2001_test"
            ),
            "Table was not created"
        )

    def tearDown(self):
        if self.bq_client.table_exists(table_id='table1', dataset_id='test'):
            self.bq_client.delete_table(table_id='table1', dataset_id='test')
        if self.bq_client.dataset_exists("2001_test"):
            self.bq_client.delete_dataset("2001_test", delete_contents=True)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: test_pipeline.py Proyecto: andras-mdc/pygyver

    def setUp(self):
        self.bq_client = BigQueryExecutor()
        self.bq_client.create_dataset(
            dataset_id='4001_my_dataset_dry_run_with_args'
        )
        self.bq_client.create_dataset(
            dataset_id='4001_test'
        )
        self.p_ex = pl.PipelineExecutor(
            yaml_file= "tests/yaml/test_dry_run_with_args.yaml",
            my_string_arg='one',
            my_dataset_arg='my_dataset_dry_run_with_args'
        )
        self.p_ex.dataset_prefix = "4001_"

        add_dataset_prefix(obj=self.p_ex.yaml, dataset_prefix=self.p_ex.dataset_prefix, kwargs={'my_string_arg': 'one', 'my_dataset_arg': 'my_dataset_dry_run_with_args'})

Ejemplo n.º 7

0

Mostrar archivo

Archivo: test_pipeline.py Proyecto: andras-mdc/pygyver

class TestPipelinePerformance(unittest.TestCase):
    def setUp(self):
        logging.basicConfig(level=logging.DEBUG)
        self.bq_client = BigQueryExecutor()
        self.p_ex = pl.PipelineExecutor("tests/yaml/test_performance.yaml")
        self.p_ex.dataset_prefix = "1008_"
        add_dataset_prefix(self.p_ex.yaml, self.p_ex.dataset_prefix)
        self.bq_client.create_dataset(dataset_id=str(self.p_ex.dataset_prefix) + "test")

    def test_dry_pipeline_performance(self):


        start_time = time.time()
        self.p_ex.run()
        end_time = time.time()
        self.assertLess(end_time - start_time, 10, "pipeline performance is bad")

    def tearDown(self):
        self.p_ex.dry_run_clean()

Ejemplo n.º 8

0

Mostrar archivo

Archivo: test_pipeline.py Proyecto: andras-mdc/pygyver

class TestPipelineDryRun(unittest.TestCase):
    def setUp(self):
        self.bq_client = BigQueryExecutor()
        self.p_ex = pl.PipelineExecutor("tests/yaml/test_dry_run.yaml")
        self.p_ex.dataset_prefix = "1001_"
        add_dataset_prefix(self.p_ex.yaml, self.p_ex.dataset_prefix)
        self.bq_client.create_dataset(dataset_id=str(self.p_ex.dataset_prefix) + "test")

    def test_dry_run(self):
        self.p_ex.run()
        self.assertTrue(
            self.bq_client.table_exists(
                dataset_id='1001_test',
                table_id='table1'
            )
        )

    def tearDown(self):
        self.p_ex.dry_run_clean()

Ejemplo n.º 9

0

Mostrar archivo

Archivo: test_pipeline.py Proyecto: andras-mdc/pygyver

class TestPipelineCopyTableStructure(unittest.TestCase):

    def setUp(self):
        self.bq_client = BigQueryExecutor()
        self.p_ex = pl.PipelineExecutor("tests/yaml/unit_tests_fail.yaml")
        self.p_ex.dataset_prefix = "1001_"
        add_dataset_prefix(self.p_ex.yaml, self.p_ex.dataset_prefix)

        if self.bq_client.table_exists(dataset_id='reporting', table_id='out_product'):
            self.bq_client.delete_table(dataset_id='reporting', table_id='out_product')
        if self.bq_client.table_exists(dataset_id='reporting', table_id='out_saleorder'):
            self.bq_client.delete_table(dataset_id='reporting', table_id='out_saleorder')

    def test_copy_prod_structure(self):
        self.p_ex.copy_prod_structure(['copper-actor-127213.reporting.out_product', 'reporting.out_saleorder'])
        self.assertTrue(
            self.bq_client.table_exists(dataset_id='1001_reporting', table_id='out_product') and
            self.bq_client.table_exists(dataset_id='1001_reporting', table_id='out_saleorder'),
            "all table's structure are copied"
        )

Ejemplo n.º 10

0

Mostrar archivo

Archivo: test_pipeline.py Proyecto: andras-mdc/pygyver

class TestPipelineUnitTestsErrorRaised(unittest.TestCase):

    def setUp(self):
        self.db = BigQueryExecutor()
        self.p_ex = pl.PipelineExecutor("tests/yaml/unit_tests_fail.yaml")

    def test_run_unit_tests_error(self):
        with self.assertRaises(AssertionError):
            self.p_ex.run_unit_tests()

    def tearDown(self):
        self.db.delete_table(
            table_id='table2',
            dataset_id='test'
        )
        self.db.delete_table(
            table_id='table3',
            dataset_id='test'
        )
        self.db.delete_table(
            table_id='table4',
            dataset_id='test'
        )

Ejemplo n.º 11

0

Mostrar archivo

Archivo: test_pipeline.py Proyecto: andras-mdc/pygyver

 def setUp(self):
     self.db = BigQueryExecutor()
     self.p_ex = pl.PipelineExecutor("tests/yaml/unit_tests_fail.yaml")

Ejemplo n.º 12

0

Mostrar archivo

Archivo: test_pipeline.py Proyecto: andras-mdc/pygyver

 def setUp(self):
     self.bq_client = BigQueryExecutor()
     self.p_ex = pl.PipelineExecutor("tests/yaml/test_dry_run.yaml")
     self.p_ex.dataset_prefix = "1001_"
     add_dataset_prefix(self.p_ex.yaml, self.p_ex.dataset_prefix)
     self.bq_client.create_dataset(dataset_id=str(self.p_ex.dataset_prefix) + "test")

Ejemplo n.º 13

0

Mostrar archivo

class PipelineExecutor:
    def __init__(self, yaml_file, dry_run=False, *args, **kwargs):
        self.kwargs = kwargs
        self.yaml = read_yaml_file(yaml_file)
        self.dataset_prefix = None
        if dry_run:
            self.dataset_prefix = f'{randint(1, 99999999):08}_'
            add_dataset_prefix(obj=self.yaml, dataset_prefix=self.dataset_prefix, kwargs=self.kwargs)
        self.bq = BigQueryExecutor()
        self.prod_project_id = bq_default_prod_project()


    def remove_dataset(self, dataset_id):
        if self.bq.dataset_exists(dataset_id):
            self.bq.delete_dataset(dataset_id, delete_contents=True)

    def dry_run_clean(self, table_list=''):
        if self.dataset_prefix is not None:
            if bq_default_project() != self.prod_project_id:
                args_dataset = []

                if table_list == '':
                    table_list = self.yaml.get('table_list', '')

                for table in table_list:
                    if table.count('.') == 1:
                        dataset_id = table.split(".")[0]
                    else:
                        dataset_id = table.split(".")[1]
                    dict_ = {
                        "dataset_id": dataset_id
                    }
                    apply_kwargs(dict_, self.kwargs)
                    args_dataset.append(
                        dict_
                    )

                for dataset in args_dataset:
                    value = dataset.get('dataset_id', '')
                    dataset['dataset_id'] = self.dataset_prefix + value

                args_dataset = [dict(t) for t in {tuple(d.items()) for d in args_dataset}]

                if args_dataset != []:
                    execute_parallel(
                        self.remove_dataset,
                        args_dataset,
                        message='delete dataset: ',
                        log='dataset_id'
                    )

    def create_tables(self, batch):
        args = []
        batch_content = batch.get('tables', '')
        args = extract_args(content=batch_content, to_extract='create_table', kwargs=self.kwargs)
        for a in args:
            apply_kwargs(a, self.kwargs)
            a.update({"dataset_prefix": self.dataset_prefix})
        if args != []:
            execute_parallel(
                self.bq.create_table,
                args,
                message='Creating table:',
                log='table_id'
            )

    def create_gs_tables(self, batch):
        args =[]
        batch_content = batch.get('sheets', '')
        args = extract_args(content=batch_content, to_extract='create_gs_table', kwargs=self.kwargs)
        if args == []:
            raise Exception("create_gs_table in YAML file is not well defined")
        execute_parallel(
                self.bq.create_gs_table,
                args,
                message='Creating live Google Sheet connection table in BigQuery:',
                log='table_id'
            )


    def create_partition_tables(self, batch):
        args = []
        batch_content = batch.get('tables', '')
        args = extract_args(
            content=batch_content,
            to_extract='create_partition_table',
            kwargs=self.kwargs
        )
        for a in args:
            apply_kwargs(a, self.kwargs)
            a.update({"dataset_prefix": self.dataset_prefix})
        if args != []:
            execute_parallel(
                self.bq.create_partition_table,
                args,
                message='Creating partition table:',
                log='table_id'
            )

    def load_google_sheets(self, batch):
        args = []
        batch_content = batch.get('sheets', '')
        args = extract_args(batch_content, 'load_google_sheet')
        if args == []:
            raise Exception("load_google_sheet in yaml is not well defined")
        execute_parallel(
            self.bq.load_google_sheet,
            args,
            message='Loading table:',
            log='table_id'
        )

    def run_checks(self, batch):
        args, args_pk = [], []
        batch_content = batch.get('tables', '')
        args = extract_args(batch_content, 'create_table')
        args_pk = [x.get('pk', []) for x in batch_content]
        for a, b in zip(args, args_pk):
            a.update({
                "dataset_prefix": self.dataset_prefix,
                "primary_key": b
                }
            )
        execute_parallel(
            self.bq.assert_unique,
            args,
            message='Run pk_check on:',
            log='table_id'
        )

    def run_batch(self, batch):
        """ Executes a batch. """

        if 'tables' in batch:
            if extract_args(batch['tables'], 'create_table'):
                self.create_tables(batch)
                self.run_checks(batch)
            if extract_args(batch['tables'], 'create_partition_table'):
                self.create_partition_tables(batch)

        if 'sheets' in batch:
            if extract_args(batch['sheets'], 'load_google_sheet'):
                self.load_google_sheets(batch)
            if extract_args(batch['sheets'], 'create_gs_table'):
                self.create_gs_tables(batch)


    def run_batches(self):
        batch_list = self.yaml.get('batches', '')
        for batch in batch_list:
            apply_kwargs(batch, self.kwargs)
            self.run_batch(batch)

    def run_python_file(self, python_file):
        # _dataset_prefix string is unused in run_python_file()
        # but it makes PipelineExecutor's dataset_prefix available to the release script, using:
        # from pygyver.etl.lib import get_dataset_prefix
        _dataset_prefix = self.dataset_prefix

        logging.info(f"Running {python_file}")
        module_name = PurePath(python_file).stem
        module_full_path = PurePath(os.getenv("PROJECT_ROOT")) / python_file
        spec = spec_from_file_location(module_name, module_full_path)
        module = module_from_spec(spec)
        spec.loader.exec_module(module)

    def run_releases(self, release_date=date.today().strftime("%Y-%m-%d")):
        release_list = self.yaml.get('releases', [])
        for release in release_list:
            if str(release.get('date', '')) == release_date:
                logging.info(f"Release {release_date}: {release.get('description', '')}")
                for python_file in release.get('python_files', []):
                    self.run_python_file(python_file)

    def run(self):
        self.run_releases()
        self.run_batches()

    def run_unit_tests(self, batch_list=None):
        batch_list = batch_list or self.yaml.get('batches', '')
        list_unit_test = extract_unit_tests(batch_list, self.kwargs)
        args = extract_unit_test_value(list_unit_test)
        if args != []:
            execute_parallel(
                self.bq.assert_acceptance,
                args,
                message='Asserting sql',
                log='file'
            )

    def copy_prod_structure(self, table_list=''):
        args, args_dataset, datasets = [], [], []

        if table_list == '':
            table_list = self.yaml.get('table_list', '')

        for table in table_list:
            if table.count('.') == 1:
                _dict = {
                    "source_project_id" : self.prod_project_id,
                    "source_dataset_id" : table.split(".")[0],
                    "source_table_id": table.split(".")[1],
                    "dest_dataset_id" : self.dataset_prefix + table.split(".")[0],
                    "dest_table_id": table.split(".")[1]
                }
            else:
                _dict = {
                    "source_project_id" : table.split(".")[0],
                    "source_dataset_id" : table.split(".")[1],
                    "source_table_id": table.split(".")[2],
                    "dest_dataset_id" : self.dataset_prefix + table.split(".")[1],
                    "dest_table_id": table.split(".")[2]
                }
            apply_kwargs(_dict, self.kwargs)
            args.append(_dict)

        # extract datasets from table_list
        for table in table_list:
            if table.count('.') == 1:
                datasets.append(self.dataset_prefix + table.split(".")[0])
            else:
                datasets.append(self.dataset_prefix + table.split(".")[1])

        for dataset in np.unique(datasets):
            _dict = {"dataset_id" : dataset}
            apply_kwargs(_dict, self.kwargs)
            args_dataset.append(
                _dict
            )

        if args_dataset != []:
            execute_parallel(
                self.bq.create_dataset,
                args_dataset,
                message='create dataset for: ',
                log='dataset_id'
            )

        if args != []:
            execute_parallel(
                self.bq.copy_table_structure,
                args,
                message='copy table structure for: ',
                log='source_table_id'
            )

    def run_test(self):
        self.run_unit_tests()

Ejemplo n.º 14

0

Mostrar archivo

Archivo: test_pipeline.py Proyecto: andras-mdc/pygyver

 def setUp(self):
     logging.basicConfig(level=logging.DEBUG)
     self.bq_client = BigQueryExecutor()
     self.p_ex = pl.PipelineExecutor("tests/yaml/test_run.yaml")

Ejemplo n.º 15

0

Mostrar archivo

Archivo: test_pipeline.py Proyecto: andras-mdc/pygyver

 def setUp(self):
     self.bq_client = BigQueryExecutor()
     self.p_ex = pl.PipelineExecutor("tests/yaml/test_run.yaml")
     self.bq_client.create_dataset(dataset_id='test')

Ejemplo n.º 16

0

Mostrar archivo

Archivo: test_pipeline.py Proyecto: andras-mdc/pygyver

class TestPipelineExecutorRun(unittest.TestCase):

    def setUp(self):
        self.bq_client = BigQueryExecutor()
        self.p_ex = pl.PipelineExecutor("tests/yaml/test_run.yaml")
        self.bq_client.create_dataset(dataset_id='test')

    def test_run_completed_no_error(self):

        self.p_ex.run()

        self.assertTrue(
            self.bq_client.table_exists(
                table_id='ref_sheet1',
                dataset_id="test"
            ),
            "test.ref_sheet1 exists"
        )

        self.assertTrue(
            self.bq_client.table_exists(
                table_id='ref_sheet2',
                dataset_id="test"
            ),
            "test.ref_sheet2 exists"
        )

        self.assertTrue(
            self.bq_client.table_exists(
                table_id='gs_test_table1',
                dataset_id="test"
            ),
            "test.gs_test_table1 exists"
        )

        self.assertTrue(
            self.bq_client.table_exists(
                table_id='table1',
                dataset_id="test"
            ),
            "test.table1 exists"
        )

        self.assertTrue(
            self.bq_client.table_exists(
                table_id='table2',
                dataset_id="test"
            ),
            "test.table2 exists"
        )


    def tearDown(self):
        if self.bq_client.table_exists(table_id='table1', dataset_id='test'):
            self.bq_client.delete_table(table_id='table1', dataset_id='test')
        if self.bq_client.table_exists(table_id='table2', dataset_id='test'):
            self.bq_client.delete_table(table_id='table2', dataset_id='test')
        if self.bq_client.table_exists(table_id='ref_sheet1', dataset_id='test'):
            self.bq_client.delete_table(table_id='ref_sheet1', dataset_id='test')
        if self.bq_client.table_exists(table_id='ref_sheet2', dataset_id='test'):
            self.bq_client.delete_table(table_id='ref_sheet2', dataset_id='test')
        if self.bq_client.table_exists(table_id='gs_test_table1', dataset_id='test'):
            self.bq_client.delete_table(table_id='gs_test_table1', dataset_id='test')

Ejemplo n.º 17

0

Mostrar archivo

Archivo: test_pipeline.py Proyecto: andras-mdc/pygyver

 def setUp(self):
     self.bq_exec = BigQueryExecutor()
     self.p_ex = pl.PipelineExecutor("tests/yaml/test_dummy.yaml")
     self.bq_client = bigquery.Client()

Ejemplo n.º 18

0

Mostrar archivo

Archivo: test_pipeline.py Proyecto: andras-mdc/pygyver

class TestPipelineExecutorRunBatch(unittest.TestCase):

    def setUp(self):
        self.bq_exec = BigQueryExecutor()
        self.p_ex = pl.PipelineExecutor("tests/yaml/test_dummy.yaml")
        self.bq_client = bigquery.Client()

    def test_run_batch_create_tables(self):
        batch = {
            "desc": "create table1 & table2 in staging",
            "tables":
            [
                {
                    "table_desc": "table1",
                    "create_table": {
                        "table_id": "table1",
                        "dataset_id": "test",
                        "description": "some descriptive text here",
                        "file": "tests/sql/table1.sql"
                    },
                    "pk": ["col1", "col2"],
                    "mock_data": "sql/table1_mocked.sql"
                },
                {
                    "table_desc": "table2",
                    "create_table": {
                        "table_id": "table2",
                        "dataset_id": "test",
                        "file": "tests/sql/table2.sql"
                    },
                    "pk": ["col1", "col2"],
                    "mock_data": "sql/table1_mocked.sql"
                }
            ]
        }
        self.p_ex.run_batch(batch)
        self.assertTrue(
            self.bq_exec.table_exists(
                table_id='table1',
                dataset_id="test"),
            "Tables are created")

        table_ref = self.bq_exec.get_table_ref(dataset_id='test', table_id='table1', project_id=bq_default_project())
        table = self.bq_client.get_table(table_ref)  # API request

        self.assertTrue(
            table.description == "some descriptive text here",
            "The 'description' is not the same"
        )

        self.assertTrue(
            self.bq_exec.table_exists(
                table_id='table2',
                dataset_id="test"
                ),
            "Tables are created")


    def test_run_batch_create_gs_tables(self):
        batch = {
            "desc": "load test spreadsheet into bigquery",
            "sheets": [
                {
                    "table_desc": "ref gs_test_table1",
                    "create_gs_table": {
                         "table_id": "gs_test_table1",
                        "dataset_id": "test",
                        "sheet_name": "input",
                        "googlesheet_uri": "https://docs.google.com/spreadsheets/d/19Jmapr9G1nrMcW2QTpY7sOvKYaFXnw5krK6dD0GwEqU/edit#gid=0"
                    }
                }
            ]
        }
        self.p_ex.create_gs_tables(batch)
        self.assertTrue(
            self.bq_exec.table_exists(
                table_id='gs_test_table1',
                dataset_id="test"),
            "gs_test_table1 does NOT exists")

    def test_run_batch_load_google_sheets(self):
        batch = {
            "desc": "load test spreadsheet into bigquery",
            "sheets": [
                {
                    "table_desc": "ref sheet1",
                    "load_google_sheet": {
                        "table_id": "ref_sheet1",
                        "dataset_id": "test",
                        "sheet_name": "input",
                        "googlesheet_uri": "https://docs.google.com/spreadsheets/d/19Jmapr9G1nrMcW2QTpY7sOvKYaFXnw5krK6dD0GwEqU/edit#gid=0"
                    }
                },
                {
                    "table_desc": "ref sheet2",
                    "load_google_sheet": {
                        "table_id": "ref_sheet2",
                        "dataset_id": "test",
                        "sheet_name": "input",
                        "description": "foo bar",
                        "googlesheet_uri": "https://docs.google.com/spreadsheets/d/19Jmapr9G1nrMcW2QTpY7sOvKYaFXnw5krK6dD0GwEqU/edit#gid=0"
                    }
                }
            ]
        }


        self.p_ex.load_google_sheets(batch)
        self.assertTrue(
            self.bq_exec.table_exists(
                table_id='ref_sheet1',
                dataset_id="test"),
            "ref_sheet1 does NOT exists")

        self.assertTrue(
            self.bq_exec.table_exists(
                table_id='ref_sheet2',
                dataset_id="test"),
            "ref_sheet2 does NOT exists")

        table_ref = self.bq_exec.get_table_ref(dataset_id='test', table_id='ref_sheet2', project_id=bq_default_project())
        table = self.bq_client.get_table(table_ref)  # API request

        self.assertTrue(
            table.description == "foo bar",
            "The 'description' is not the same"
        )

    def test_run_batch_create_partition_tables(self):
        # A table must be created first, only then it can be partitioned
        batch = {
            "desc": "create partition_table1",
            "tables":
                [
                    {
                        "table_desc": "creating table",
                        "create_table": {
                            "table_id": "partition_table1",
                            "dataset_id": "test",
                            "file": "tests/sql/table1.sql"
                        },
                        "pk": ["col1", "col2"],
                        "mock_data": "sql/table1_mocked.sql"
                    },
                    {
                        "table_desc": "creating partition table",
                        "create_partition_table": {
                            "table_id": "partition_table1",
                            "dataset_id": "test",
                            "description": "some descriptive text here",
                            "file": "tests/sql/table1.sql",
                            "partition_dates": []
                        },
                        "pk": ["col1", "col2"],
                        "mock_data": "sql/table1_mocked.sql"
                    }
                ]
        }

        self.p_ex.run_batch(batch)
        self.assertTrue(
            self.bq_exec.table_exists(
                table_id='partition_table1',
                dataset_id="test"),
            "Partition table is created")

        table_ref = self.bq_exec.get_table_ref(dataset_id='test', table_id='partition_table1', project_id=bq_default_project())
        table = self.bq_client.get_table(table_ref)  # API request

        self.assertTrue(
            table.description == "some descriptive text here",
            "The 'description' is not the same"
        )


    def tearDown(self):
        if self.bq_exec.table_exists(table_id='table1', dataset_id='test'):
            self.bq_exec.delete_table(table_id='table1', dataset_id='test')
        if self.bq_exec.table_exists(table_id='table2', dataset_id='test'):
            self.bq_exec.delete_table(table_id='table2', dataset_id='test')
        if self.bq_exec.table_exists(table_id='test_run_batch_table_1', dataset_id='test'):
            self.bq_exec.delete_table(table_id='test_run_batch_table_1',dataset_id='test')
        if self.bq_exec.table_exists(table_id='test_run_batch_table_2', dataset_id='test'):
            self.bq_exec.delete_table(table_id='test_run_batch_table_2', dataset_id='test')
        if self.bq_exec.table_exists(table_id='gs_test_table1', dataset_id='test'):
            self.bq_exec.delete_table(table_id='gs_test_table1', dataset_id='test')
        if self.bq_exec.table_exists(table_id='ref_sheet1', dataset_id='test'):
            self.bq_exec.delete_table(table_id='ref_sheet1', dataset_id='test')
        if self.bq_exec.table_exists(table_id='ref_sheet2', dataset_id='test'):
            self.bq_exec.delete_table(table_id='ref_sheet2', dataset_id='test')
        if self.bq_exec.table_exists(table_id='partition_table1', dataset_id='test'):
            self.bq_exec.delete_table(table_id='partition_table1', dataset_id='test')

Ejemplo n.º 19

0

Mostrar archivo

Archivo: test_facebook.py Proyecto: andras-mdc/pygyver

""" Facebook Tests """
import unittest
import pandas as pd
import mock
from pandas.testing import assert_series_equal
from pandas.testing import assert_frame_equal
from pygyver.etl.facebook import transform_campaign_budget
from pygyver.etl.facebook import build_predicted_revenue_events, calculate_batches, \
    split_events_to_batches, FacebookExecutor
from pygyver.etl.dw import BigQueryExecutor
from pygyver.etl.dw import read_sql
from facebook_business.adobjects.serverside.event_request import EventResponse
from facebook_business.exceptions import FacebookRequestError

db = BigQueryExecutor()

error_json = {
    "error": {
        "fbtrace_id": "test_fb_trace_id",
        "message": "Some generic message",
        "error_user_msg": "A more detailed message"
    }
}

context = mock.Mock().files = {'test': 'test'}


def get_predicted_revenue_mock():

    sql = read_sql(file='tests/sql/unit_predicted_revenue_mocked.sql')

Ejemplo n.º 20

0

Mostrar archivo

from pygyver.etl.lib import get_dataset_prefix
from pygyver.etl.dw import BigQueryExecutor

dataset_prefix = get_dataset_prefix()
target_dataset_id = (dataset_prefix if dataset_prefix else "") + "test"

bq = BigQueryExecutor()
bq.create_dataset(target_dataset_id)
bq.create_table(dataset_id=target_dataset_id,
                table_id="table1",
                file="tests/sql/table1.sql")