Esempio n. 1
0
def _gather_unioned_ehr_queries(project_id, dataset_id, sandbox_dataset_id):
    """
    gathers all the queries required to clean unioned_ehr dataset

    :param project_id: project name
    :param dataset_id: unioned_ehr dataset name
    :return: returns list of queries
    """
    query_list = []
    query_list.extend(
        id_dedup.get_id_deduplicate_queries(project_id, dataset_id))
    query_list.extend(
        clean_years.get_year_of_birth_queries(project_id, dataset_id))
    query_list.extend(
        neg_ages.get_negative_ages_queries(project_id, dataset_id))
    query_list.extend(
        bad_end_dates.get_bad_end_date_queries(project_id, dataset_id))
    query_list.extend(
        valid_death_dates.get_valid_death_date_queries(project_id, dataset_id))
    query_list.extend(
        drug_refills_supply.get_days_supply_refills_queries(
            project_id, dataset_id))
    query_list.extend(
        populate_routes.get_route_mapping_queries(project_id, dataset_id))
    query_list.extend(
        fix_datetimes.get_fix_incorrect_datetime_to_date_queries(
            project_id, dataset_id))
    query_list.extend(
        remove_records_with_wrong_date.
        get_remove_records_with_wrong_date_queries(project_id, dataset_id))
    query_list.extend(
        invalid_procedure_source.get_remove_invalid_procedure_source_queries(
            project_id, dataset_id))
    return query_list
Esempio n. 2
0
def _gather_ehr_rdr_queries(project_id, dataset_id):
    """
    gathers all the queries required to clean ehr_rdr dataset

    :param project_id: project name
    :param dataset_id: ehr_rdr dataset name
    :return: returns list of queries
    """
    query_list = []
    query_list.extend(
        replace_standard_concept_ids.replace_standard_id_in_domain_tables(
            project_id, dataset_id))
    query_list.extend(domain_alignment.domain_alignment(
        project_id, dataset_id))
    query_list.extend(
        id_dedup.get_id_deduplicate_queries(project_id, dataset_id))
    query_list.extend(
        null_foreign_key.null_invalid_foreign_keys(project_id, dataset_id))
    query_list.extend(
        clean_years.get_year_of_birth_queries(project_id, dataset_id))
    query_list.extend(
        neg_ages.get_negative_ages_queries(project_id, dataset_id))
    query_list.extend(
        bad_end_dates.get_bad_end_date_queries(project_id, dataset_id))
    query_list.extend(
        no_data_30days_after_death.no_data_30_days_after_death(
            project_id, dataset_id))
    query_list.extend(
        valid_death_dates.get_valid_death_date_queries(project_id, dataset_id))
    query_list.extend(
        drug_refills_supply.get_days_supply_refills_queries(
            project_id, dataset_id))
    return query_list
Esempio n. 3
0
def _gather_combined_de_identified_queries(project_id, dataset_id,
                                           sandbox_dataset_id):
    """
    gathers all the queries required to clean de_identified dataset

    :param project_id: project name
    :param dataset_id: de_identified dataset name
    :return: returns list of queries
    """
    query_list = []
    query_list.extend(
        id_dedup.get_id_deduplicate_queries(project_id, dataset_id))
    query_list.extend(
        neg_ages.get_negative_ages_queries(project_id, dataset_id))
    query_list.extend(
        bad_end_dates.get_bad_end_date_queries(project_id, dataset_id))
    query_list.extend(
        valid_death_dates.get_valid_death_date_queries(project_id, dataset_id))
    query_list.extend(
        fill_source_value.get_fill_freetext_source_value_fields_queries(
            project_id, dataset_id))
    query_list.extend(
        repopulate_person.get_repopulate_person_post_deid_queries(
            project_id, dataset_id))
    return query_list
Esempio n. 4
0
def _gather_ehr_rdr_de_identified_queries(project_id, dataset_id):
    """
    gathers all the queries required to clean de_identified dataset

    :param project_id: project name
    :param dataset_id: de_identified dataset name
    :return: returns list of queries
    """
    query_list = []
    query_list.extend(
        id_dedup.get_id_deduplicate_queries(project_id, dataset_id))
    query_list.extend(
        clean_years.get_year_of_birth_queries(project_id, dataset_id))
    query_list.extend(
        neg_ages.get_negative_ages_queries(project_id, dataset_id))
    query_list.extend(
        bad_end_dates.get_bad_end_date_queries(project_id, dataset_id))
    query_list.extend(
        person_validator.get_person_id_validation_queries(
            project_id, dataset_id))
    query_list.extend(
        valid_death_dates.get_valid_death_date_queries(project_id, dataset_id))
    query_list.extend(
        drug_refills_supply.get_days_supply_refills_queries(
            project_id, dataset_id))
    query_list.extend(
        fill_source_value.get_fill_freetext_source_value_fields_queries(
            project_id, dataset_id))
    return query_list
Esempio n. 5
0
def _gather_ehr_queries(project_id, dataset_id):
    """
    gathers all the queries required to clean ehr dataset

    :param project_id: project name
    :param dataset_id: ehr dataset name
    :return: returns list of queries
    """
    query_list = []
    query_list.extend(id_dedup.get_id_deduplicate_queries(project_id, dataset_id))
    return query_list
Esempio n. 6
0
def _gather_rdr_queries(project_id, dataset_id):
    """
    gathers all the queries required to clean rdr dataset

    :param project_id: project name
    :param dataset_id: rdr dataset name
    :return: returns list of queries
    """
    query_list = []
    query_list.extend(id_dedup.get_id_deduplicate_queries(project_id, dataset_id))
    query_list.extend(clean_years.get_year_of_birth_queries(project_id, dataset_id))
    query_list.extend(neg_ages.get_negative_ages_queries(project_id, dataset_id))
    query_list.extend(bad_end_dates.get_bad_end_date_queries(project_id, dataset_id))
    return query_list
Esempio n. 7
0
def _gather_combined_queries(project_id, dataset_id, sandbox_dataset_id):
    """
    gathers all the queries required to clean combined dataset

    :param project_id: project name
    :param dataset_id: combined dataset name
    :return: returns list of queries
    """
    query_list = []
    query_list.extend(
        replace_standard_concept_ids.replace_standard_id_in_domain_tables(
            project_id, dataset_id))
    query_list.extend(domain_alignment.domain_alignment(
        project_id, dataset_id))
    query_list.extend(
        drop_participants_without_ppi_or_ehr.get_queries(
            project_id, dataset_id))
    query_list.extend(
        id_dedup.get_id_deduplicate_queries(project_id, dataset_id))
    query_list.extend(
        clean_years.get_year_of_birth_queries(project_id, dataset_id))
    query_list.extend(
        neg_ages.get_negative_ages_queries(project_id, dataset_id))
    query_list.extend(
        bad_end_dates.get_bad_end_date_queries(project_id, dataset_id))
    query_list.extend(
        no_data_30days_after_death.no_data_30_days_after_death(
            project_id, dataset_id))
    query_list.extend(
        valid_death_dates.get_valid_death_date_queries(project_id, dataset_id))
    query_list.extend(
        drug_refills_supply.get_days_supply_refills_queries(
            project_id, dataset_id))
    query_list.extend(
        populate_routes.get_route_mapping_queries(project_id, dataset_id))
    query_list.extend(
        fix_datetimes.get_fix_incorrect_datetime_to_date_queries(
            project_id, dataset_id))
    query_list.extend(
        remove_records_with_wrong_date.
        get_remove_records_with_wrong_date_queries(project_id, dataset_id))
    query_list.extend(
        drop_duplicate_states.get_drop_duplicate_states_queries(
            project_id, dataset_id, sandbox_dataset_id))
    # TODO : Make null_invalid_foreign_keys able to run on de_identified dataset
    query_list.extend(
        null_foreign_key.null_invalid_foreign_keys(project_id, dataset_id))
    return query_list
Esempio n. 8
0
def _gather_unioned_ehr_queries(project_id, dataset_id):
    """
    gathers all the queries required to clean unioned_ehr dataset

    :param project_id: project name
    :param dataset_id: unioned_ehr dataset name
    :return: returns list of queries
    """
    query_list = []
    query_list.extend(id_dedup.get_id_deduplicate_queries(project_id, dataset_id))
    query_list.extend(clean_years.get_year_of_birth_queries(project_id, dataset_id))
    query_list.extend(neg_ages.get_negative_ages_queries(project_id, dataset_id))
    query_list.extend(bad_end_dates.get_bad_end_date_queries(project_id, dataset_id))
    query_list.extend(valid_death_dates.get_valid_death_date_queries(project_id, dataset_id))
    query_list.extend(drug_refills_supply.get_days_supply_refills_queries(project_id, dataset_id))
    return query_list