Esempio n. 1
0
    def dry_run_clean(self, table_list=''):
        if self.dataset_prefix is not None:
            if bq_default_project() != self.prod_project_id:
                args_dataset = []

                if table_list == '':
                    table_list = self.yaml.get('table_list', '')

                for table in table_list:
                    if table.count('.') == 1:
                        dataset_id = table.split(".")[0]
                    else:
                        dataset_id = table.split(".")[1]
                    dict_ = {
                        "dataset_id": dataset_id
                    }
                    apply_kwargs(dict_, self.kwargs)
                    args_dataset.append(
                        dict_
                    )

                for dataset in args_dataset:
                    value = dataset.get('dataset_id', '')
                    dataset['dataset_id'] = self.dataset_prefix + value

                args_dataset = [dict(t) for t in {tuple(d.items()) for d in args_dataset}]

                if args_dataset != []:
                    execute_parallel(
                        self.remove_dataset,
                        args_dataset,
                        message='delete dataset: ',
                        log='dataset_id'
                    )
Esempio n. 2
0
    def copy_prod_structure(self, table_list=''):
        args, args_dataset, datasets = [], [], []

        if table_list == '':
            table_list = self.yaml.get('table_list', '')

        for table in table_list:
            if table.count('.') == 1:
                _dict = {
                    "source_project_id" : self.prod_project_id,
                    "source_dataset_id" : table.split(".")[0],
                    "source_table_id": table.split(".")[1],
                    "dest_dataset_id" : self.dataset_prefix + table.split(".")[0],
                    "dest_table_id": table.split(".")[1]
                }
            else:
                _dict = {
                    "source_project_id" : table.split(".")[0],
                    "source_dataset_id" : table.split(".")[1],
                    "source_table_id": table.split(".")[2],
                    "dest_dataset_id" : self.dataset_prefix + table.split(".")[1],
                    "dest_table_id": table.split(".")[2]
                }
            apply_kwargs(_dict, self.kwargs)
            args.append(_dict)

        # extract datasets from table_list
        for table in table_list:
            if table.count('.') == 1:
                datasets.append(self.dataset_prefix + table.split(".")[0])
            else:
                datasets.append(self.dataset_prefix + table.split(".")[1])

        for dataset in np.unique(datasets):
            _dict = {"dataset_id" : dataset}
            apply_kwargs(_dict, self.kwargs)
            args_dataset.append(
                _dict
            )

        if args_dataset != []:
            execute_parallel(
                self.bq.create_dataset,
                args_dataset,
                message='create dataset for: ',
                log='dataset_id'
            )

        if args != []:
            execute_parallel(
                self.bq.copy_table_structure,
                args,
                message='copy table structure for: ',
                log='source_table_id'
            )
Esempio n. 3
0
 def create_tables(self, batch):
     args = []
     batch_content = batch.get('tables', '')
     args = extract_args(content=batch_content, to_extract='create_table', kwargs=self.kwargs)
     for a in args:
         apply_kwargs(a, self.kwargs)
         a.update({"dataset_prefix": self.dataset_prefix})
     if args != []:
         execute_parallel(
             self.bq.create_table,
             args,
             message='Creating table:',
             log='table_id'
         )
Esempio n. 4
0
def extract_unit_tests(batch_list=None, kwargs={}):
    """ return the list of unit test: unit test -> file, mock_file, output_table_name(opt) """

    # initiate args and argsmock
    args, args_mock = [], []

    # extracts files paths for unit tests
    for batch in batch_list:
        apply_kwargs(batch, kwargs)
        for table in batch.get('tables', ''):
            if (table.get('create_table', '') != '' or table.get('create_partition_table', '') != '') and table.get('mock_data', '') != '':
                if table.get('create_table', '') != '':
                    args.append(table.get('create_table', ''))
                if table.get('create_partition_table', '') != '':
                    args.append(table.get('create_partition_table', ''))
                args_mock.append(table.get('mock_data', ''))

    for a, b in zip(args, args_mock):
        a.update(b)
    return args
Esempio n. 5
0
 def run_batches(self):
     batch_list = self.yaml.get('batches', '')
     for batch in batch_list:
         apply_kwargs(batch, self.kwargs)
         self.run_batch(batch)