예제 #1
0
def main(raw_args=None):
    """
    Entry point for the clean rules reporter module.

    If you provide a list of arguments and settings, these will be parsed.
    If you leave this blank, the command line arguments are parsed.  This allows
    this module to be easily called from other python modules.

    :param raw_args: The list of arguments to parse.  Defaults to parsing the
        command line.
    """
    args = parse_args(raw_args)
    engine.add_console_logging(args.console_log)

    if cdr_consts.DataStage.UNSPECIFIED.value in args.data_stage:
        args.data_stage = [
            s.value for s in cdr_consts.DataStage
            if s is not cdr_consts.DataStage.UNSPECIFIED
        ]
        LOGGER.info(
            f"Data stage was {cdr_consts.DataStage.UNSPECIFIED.value}, so all stages "
            f"will be reported on:  {args.data_stage}")

    write_csv_report(args.output_filepath, args.data_stage, args.fields)

    LOGGER.info("Finished the reporting module")
예제 #2
0
def main(args=None):
    """
    :param args: list of all the arguments to apply the cleaning rules
    :return:
    """
    args, kwargs = fetch_args_kwargs(args)

    rules = DATA_STAGE_RULES_MAPPING[args.data_stage.value]
    validate_custom_params(rules, **kwargs)

    if args.list_queries:
        clean_engine.add_console_logging()
        query_list = clean_engine.get_query_list(
            project_id=args.project_id,
            dataset_id=args.dataset_id,
            sandbox_dataset_id=args.sandbox_dataset_id,
            rules=rules,
            **kwargs)
        for query in query_list:
            LOGGER.info(query)
    else:
        clean_engine.add_console_logging(args.console_log)
        clean_engine.clean_dataset(project_id=args.project_id,
                                   dataset_id=args.dataset_id,
                                   sandbox_dataset_id=args.sandbox_dataset_id,
                                   rules=rules,
                                   **kwargs)
        parser.REQUIRED: True
    }, {
        parser.SHORT_ARGUMENT: '-v',
        parser.LONG_ARGUMENT: '--validation_dataset_id',
        parser.ACTION: 'store',
        parser.DEST: 'validation_dataset_id',
        parser.HELP: 'validation_dataset_id',
        parser.REQUIRED: True
    }]
    args = parser.default_parse_args(additional_arguments)
    return args


if __name__ == '__main__':
    import cdr_cleaner.clean_cdr_engine as clean_engine

    ARGS = parse_args()

    if ARGS.list_queries:
        clean_engine.add_console_logging()
        query_list = clean_engine.get_query_list(
            ARGS.project_id, ARGS.dataset_id, ARGS.sandbox_dataset_id,
            [(delete_records_for_non_matching_participants, )])
        for query in query_list:
            LOGGER.info(query)
    else:
        clean_engine.add_console_logging(ARGS.console_log)
        clean_engine.clean_dataset(
            ARGS.project_id, ARGS.dataset_id, ARGS.sandbox_dataset_id,
            [(delete_records_for_non_matching_participants, )])
예제 #4
0
    import argparse

    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-d',
                        '--data_stage',
                        required=True,
                        dest='data_stage',
                        action='store',
                        type=stage,
                        choices=list(
                            [s for s in stage if s is not stage.UNSPECIFIED]),
                        help='Specify the dataset')
    parser.add_argument('-s', action='store_true', help='Send logs to console')
    args = parser.parse_args()
    clean_engine.add_console_logging(args.s)
    if args.data_stage == stage.EHR:
        clean_ehr_dataset()
    elif args.data_stage == stage.UNIONED:
        clean_unioned_ehr_dataset()
    elif args.data_stage == stage.RDR:
        clean_rdr_dataset()
    elif args.data_stage == stage.COMBINED:
        clean_combined_dataset()
    elif args.data_stage == stage.DEID_BASE:
        clean_combined_de_identified_dataset()
    elif args.data_stage == stage.DEID_CLEAN:
        clean_combined_de_identified_clean_dataset()
    else:
        raise EnvironmentError(
            f'Dataset selection should be from [{stage.EHR}, {stage.UNIONED}, {stage.RDR}, {stage.COMBINED},'