def test_fetch_args_kwargs(self): expected_kwargs = { 'mapping_dataset_id': self.mapping_dataset_id, 'combined_dataset_id': self.combined_dataset_id } expected_kwargs_list = [] for k, v in expected_kwargs.items(): expected_kwargs_list.extend([f'--{k}', v]) test_args = [ '-p', self.project_id, '-d', self.dataset_id, '-b', self.sandbox_dataset_id, '--data_stage', 'ehr' ] expected_args = { 'project_id': self.project_id, 'dataset_id': self.dataset_id, 'sandbox_dataset_id': self.sandbox_dataset_id, 'data_stage': DataStage.EHR, 'console_log': False, 'list_queries': False } parser = cc.get_parser() actual_args, actual_kwargs = cc.fetch_args_kwargs( parser, test_args + expected_kwargs_list) self.assertDictEqual(actual_args.__dict__, expected_args) self.assertDictEqual(expected_kwargs, actual_kwargs) actual_args, actual_kwargs = cc.fetch_args_kwargs( parser, test_args + ['--v', '-1']) self.assertDictEqual(actual_args.__dict__, expected_args) self.assertDictEqual({'v': '-1'}, actual_kwargs) test_args_incorrect = test_args + ['-v', 'value'] self.assertRaises(RuntimeError, cc.fetch_args_kwargs, parser, test_args_incorrect) test_args_incorrect = test_args + ['--v', 'v', '--odd'] self.assertRaises(RuntimeError, cc.fetch_args_kwargs, parser, test_args_incorrect)
def main(raw_args=None): """ Truncate and store fitbit data. Assumes you are passing arguments either via command line or a list. """ parser = get_fitbit_parser() args, kwargs = clean_cdr.fetch_args_kwargs(parser, raw_args) pipeline_logging.configure(level=logging.INFO, add_console_handler=args.console_log) # Identify the cleaning classes being run for specified data_stage # and validate if all the required arguments are supplied cleaning_classes = clean_cdr.DATA_STAGE_RULES_MAPPING[consts.FITBIT] clean_cdr.validate_custom_params(cleaning_classes, **kwargs) # get credentials and create client impersonation_creds = auth.get_impersonation_credentials( args.run_as_email, SCOPES) client = bq.get_client(args.project_id, credentials=impersonation_creds) # create staging, sandbox, backup and clean datasets with descriptions and labels fitbit_datasets = create_fitbit_datasets(client, args.release_tag) copy_fitbit_tables_from_views(client, args.fitbit_dataset, fitbit_datasets[consts.BACKUP], table_prefix='v_') bq.copy_datasets(client, fitbit_datasets[consts.BACKUP], fitbit_datasets[consts.STAGING]) common_cleaning_args = [ '-p', args.project_id, '-d', fitbit_datasets[consts.STAGING], '-b', fitbit_datasets[consts.SANDBOX], '-s', '-a', consts.FITBIT ] fitbit_cleaning_args = args_parser.add_kwargs_to_args( common_cleaning_args, kwargs) clean_cdr.main(args=fitbit_cleaning_args) # Snapshot the staging dataset to final dataset bq.build_and_copy_contents(client, fitbit_datasets[consts.STAGING], fitbit_datasets[consts.CLEAN])