Example #1
0
    def test_fetch_args_kwargs(self):
        expected_kwargs = {
            'mapping_dataset_id': self.mapping_dataset_id,
            'combined_dataset_id': self.combined_dataset_id
        }
        expected_kwargs_list = []
        for k, v in expected_kwargs.items():
            expected_kwargs_list.extend([f'--{k}', v])

        test_args = [
            '-p', self.project_id, '-d', self.dataset_id, '-b',
            self.sandbox_dataset_id, '--data_stage', 'ehr'
        ]
        expected_args = {
            'project_id': self.project_id,
            'dataset_id': self.dataset_id,
            'sandbox_dataset_id': self.sandbox_dataset_id,
            'data_stage': DataStage.EHR,
            'console_log': False,
            'list_queries': False
        }
        parser = cc.get_parser()
        actual_args, actual_kwargs = cc.fetch_args_kwargs(
            parser, test_args + expected_kwargs_list)
        self.assertDictEqual(actual_args.__dict__, expected_args)
        self.assertDictEqual(expected_kwargs, actual_kwargs)

        actual_args, actual_kwargs = cc.fetch_args_kwargs(
            parser, test_args + ['--v', '-1'])
        self.assertDictEqual(actual_args.__dict__, expected_args)
        self.assertDictEqual({'v': '-1'}, actual_kwargs)

        test_args_incorrect = test_args + ['-v', 'value']
        self.assertRaises(RuntimeError, cc.fetch_args_kwargs, parser,
                          test_args_incorrect)

        test_args_incorrect = test_args + ['--v', 'v', '--odd']
        self.assertRaises(RuntimeError, cc.fetch_args_kwargs, parser,
                          test_args_incorrect)
Example #2
0
def main(raw_args=None):
    """
    Truncate and store fitbit data.

    Assumes you are passing arguments either via command line or a
    list.
    """
    parser = get_fitbit_parser()
    args, kwargs = clean_cdr.fetch_args_kwargs(parser, raw_args)

    pipeline_logging.configure(level=logging.INFO,
                               add_console_handler=args.console_log)

    # Identify the cleaning classes being run for specified data_stage
    # and validate if all the required arguments are supplied
    cleaning_classes = clean_cdr.DATA_STAGE_RULES_MAPPING[consts.FITBIT]
    clean_cdr.validate_custom_params(cleaning_classes, **kwargs)

    # get credentials and create client
    impersonation_creds = auth.get_impersonation_credentials(
        args.run_as_email, SCOPES)

    client = bq.get_client(args.project_id, credentials=impersonation_creds)

    # create staging, sandbox, backup and clean datasets with descriptions and labels
    fitbit_datasets = create_fitbit_datasets(client, args.release_tag)

    copy_fitbit_tables_from_views(client,
                                  args.fitbit_dataset,
                                  fitbit_datasets[consts.BACKUP],
                                  table_prefix='v_')
    bq.copy_datasets(client, fitbit_datasets[consts.BACKUP],
                     fitbit_datasets[consts.STAGING])

    common_cleaning_args = [
        '-p', args.project_id, '-d', fitbit_datasets[consts.STAGING], '-b',
        fitbit_datasets[consts.SANDBOX], '-s', '-a', consts.FITBIT
    ]
    fitbit_cleaning_args = args_parser.add_kwargs_to_args(
        common_cleaning_args, kwargs)

    clean_cdr.main(args=fitbit_cleaning_args)

    # Snapshot the staging dataset to final dataset
    bq.build_and_copy_contents(client, fitbit_datasets[consts.STAGING],
                               fitbit_datasets[consts.CLEAN])