Example #1
0
def parse_args():
    """
    Add file_path and sandbox dataset id to the default cdr_cleaner.args_parser argument list

    :return: an expanded argument list object
    """
    import cdr_cleaner.args_parser as parser

    additional_argument_1 = {
        parser.SHORT_ARGUMENT: '-n',
        parser.LONG_ARGUMENT: '--sandbox_dataset_id',
        parser.ACTION: 'store',
        parser.DEST: 'sandbox_dataset_id',
        parser.HELP: 'Please specify the sandbox_dataset_id',
        parser.REQUIRED: True
    }

    help_text = 'path to csv file (with header row) containing pids whose observation records are to be removed'
    additional_argument_2 = {
        parser.SHORT_ARGUMENT: '-f',
        parser.LONG_ARGUMENT: '--file_path',
        parser.ACTION: 'store',
        parser.DEST: 'file_path',
        parser.HELP: help_text,
        parser.REQUIRED: True
    }

    args = parser.default_parse_args(
        [additional_argument_1, additional_argument_2])

    return args
def parse_args():
    """
    This function expands the default argument list defined in cdr_cleaner.args_parser
    :return: an expanded argument list object
    """

    import cdr_cleaner.args_parser as parser

    additional_arguments = [{
        parser.SHORT_ARGUMENT: '-e',
        parser.LONG_ARGUMENT: '--ehr_dataset_id',
        parser.ACTION: 'store',
        parser.DEST: 'ehr_dataset_id',
        parser.HELP: 'ehr_dataset_id',
        parser.REQUIRED: True
    }, {
        parser.SHORT_ARGUMENT: '-v',
        parser.LONG_ARGUMENT: '--validation_dataset_id',
        parser.ACTION: 'store',
        parser.DEST: 'validation_dataset_id',
        parser.HELP: 'validation_dataset_id',
        parser.REQUIRED: True
    }]
    args = parser.default_parse_args(additional_arguments)
    return args
Example #3
0
def parse_args():
    """
    This function expands the default argument list defined in cdr_cleaner.args_parser
    :return: an expanded argument list object
    """
    import cdr_cleaner.args_parser as parser

    additional_argument = {parser.SHORT_ARGUMENT: '-n',
                           parser.LONG_ARGUMENT: '--snapshot_dataset_id',
                           parser.ACTION: 'store',
                           parser.DEST: 'snapshot_dataset_id',
                           parser.HELP: 'Create a snapshot of the dataset',
                           parser.REQUIRED: True}
    args = parser.default_parse_args([additional_argument])
    return args
def parse_args():
    """
    Add sandbox_dataset_id to the default cdr_cleaner.args_parser argument list

    :return: an expanded argument list object
    """
    import cdr_cleaner.args_parser as parser

    additional_argument = {
        parser.SHORT_ARGUMENT: '-n',
        parser.LONG_ARGUMENT: '--sandbox_dataset_id',
        parser.ACTION: 'store',
        parser.DEST: 'sandbox_dataset_id',
        parser.HELP: 'Please specify the sandbox_dataset_id',
        parser.REQUIRED: True
    }
    args = parser.default_parse_args([additional_argument])
    return args

if __name__ == '__main__':
    import cdr_cleaner.args_parser as parser
    import cdr_cleaner.clean_cdr_engine as clean_engine

    combined_dataset_arg = {
        parser.SHORT_ARGUMENT: '-c',
        parser.LONG_ARGUMENT: '--combined_dataset_id',
        parser.ACTION: 'store',
        parser.DEST: 'combined_dataset_id',
        parser.HELP: 'Identifies the combined dataset',
        parser.REQUIRED: True
    }

    ARGS = parser.default_parse_args([combined_dataset_arg])

    if ARGS.list_queries:
        clean_engine.add_console_logging()
        query_list = clean_engine.get_query_list(
            ARGS.project_id,
            ARGS.dataset_id,
            ARGS.sandbox_dataset_id, [(RemoveFitbitDataIfMaxAgeExceeded, )],
            combined_dataset_id=ARGS.combined_dataset_id)
        for query in query_list:
            LOGGER.info(query)
    else:
        clean_engine.add_console_logging(ARGS.console_log)
        clean_engine.clean_dataset(
            ARGS.project_id,
            ARGS.dataset_id,
                    project=self.project_id,
                    dataset=self.dataset_id,
                    domain_table=table,
                    string_fields=string_fields)
                result = client.query(validation_query).result()
                if result.total_rows > 0:
                    raise RuntimeError(
                        f'{table} has {result.total_rows} records that have non-null string values'
                    )


if __name__ == '__main__':
    import cdr_cleaner.args_parser as parser
    import cdr_cleaner.clean_cdr_engine as clean_engine

    ARGS = parser.default_parse_args()

    if ARGS.list_queries:
        clean_engine.add_console_logging()
        query_list = clean_engine.get_query_list(ARGS.project_id,
                                                 ARGS.dataset_id,
                                                 ARGS.sandbox_dataset_id,
                                                 [(StringFieldsSuppression, )])
        for query in query_list:
            LOGGER.info(query)
    else:
        clean_engine.add_console_logging(ARGS.console_log)
        clean_engine.clean_dataset(ARGS.project_id, ARGS.dataset_id,
                                   ARGS.sandbox_dataset_id,
                                   [(StringFieldsSuppression, )])
Example #7
0
        parser.ACTION: 'store',
        parser.DEST: 'mapping_dataset_id',
        parser.HELP: 'Identifies the dataset containing pid-rid map table',
        parser.REQUIRED: True
    }

    mapping_table_arg = {
        parser.SHORT_ARGUMENT: '-t',
        parser.LONG_ARGUMENT: '--mapping_table_id',
        parser.ACTION: 'store',
        parser.DEST: 'mapping_table_id',
        parser.HELP: 'Identifies the pid-rid map table, typically _deid_map',
        parser.REQUIRED: True
    }

    ARGS = parser.default_parse_args([mapping_dataset_arg, mapping_table_arg])

    if ARGS.list_queries:
        clean_engine.add_console_logging()
        query_list = clean_engine.get_query_list(
            ARGS.project_id,
            ARGS.dataset_id,
            ARGS.sandbox_dataset_id, [(PIDtoRID, )],
            mapping_dataset_id=ARGS.mapping_dataset_id,
            mapping_table_id=ARGS.mapping_table_id)
        for query in query_list:
            LOGGER.info(query)
    else:
        clean_engine.add_console_logging(ARGS.console_log)
        clean_engine.clean_dataset(ARGS.project_id,
                                   ARGS.dataset_id,