def _gather_unioned_ehr_queries(project_id, dataset_id, sandbox_dataset_id): """ gathers all the queries required to clean unioned_ehr dataset :param project_id: project name :param dataset_id: unioned_ehr dataset name :return: returns list of queries """ query_list = [] query_list.extend( id_dedup.get_id_deduplicate_queries(project_id, dataset_id)) query_list.extend( clean_years.get_year_of_birth_queries(project_id, dataset_id)) query_list.extend( neg_ages.get_negative_ages_queries(project_id, dataset_id)) query_list.extend( bad_end_dates.get_bad_end_date_queries(project_id, dataset_id)) query_list.extend( valid_death_dates.get_valid_death_date_queries(project_id, dataset_id)) query_list.extend( drug_refills_supply.get_days_supply_refills_queries( project_id, dataset_id)) query_list.extend( populate_routes.get_route_mapping_queries(project_id, dataset_id)) query_list.extend( fix_datetimes.get_fix_incorrect_datetime_to_date_queries( project_id, dataset_id)) query_list.extend( remove_records_with_wrong_date. get_remove_records_with_wrong_date_queries(project_id, dataset_id)) query_list.extend( invalid_procedure_source.get_remove_invalid_procedure_source_queries( project_id, dataset_id)) return query_list
def _gather_ehr_rdr_queries(project_id, dataset_id): """ gathers all the queries required to clean ehr_rdr dataset :param project_id: project name :param dataset_id: ehr_rdr dataset name :return: returns list of queries """ query_list = [] query_list.extend( replace_standard_concept_ids.replace_standard_id_in_domain_tables( project_id, dataset_id)) query_list.extend(domain_alignment.domain_alignment( project_id, dataset_id)) query_list.extend( id_dedup.get_id_deduplicate_queries(project_id, dataset_id)) query_list.extend( null_foreign_key.null_invalid_foreign_keys(project_id, dataset_id)) query_list.extend( clean_years.get_year_of_birth_queries(project_id, dataset_id)) query_list.extend( neg_ages.get_negative_ages_queries(project_id, dataset_id)) query_list.extend( bad_end_dates.get_bad_end_date_queries(project_id, dataset_id)) query_list.extend( no_data_30days_after_death.no_data_30_days_after_death( project_id, dataset_id)) query_list.extend( valid_death_dates.get_valid_death_date_queries(project_id, dataset_id)) query_list.extend( drug_refills_supply.get_days_supply_refills_queries( project_id, dataset_id)) return query_list
def _gather_combined_de_identified_queries(project_id, dataset_id, sandbox_dataset_id): """ gathers all the queries required to clean de_identified dataset :param project_id: project name :param dataset_id: de_identified dataset name :return: returns list of queries """ query_list = [] query_list.extend( id_dedup.get_id_deduplicate_queries(project_id, dataset_id)) query_list.extend( neg_ages.get_negative_ages_queries(project_id, dataset_id)) query_list.extend( bad_end_dates.get_bad_end_date_queries(project_id, dataset_id)) query_list.extend( valid_death_dates.get_valid_death_date_queries(project_id, dataset_id)) query_list.extend( fill_source_value.get_fill_freetext_source_value_fields_queries( project_id, dataset_id)) query_list.extend( repopulate_person.get_repopulate_person_post_deid_queries( project_id, dataset_id)) return query_list
def _gather_ehr_rdr_de_identified_queries(project_id, dataset_id): """ gathers all the queries required to clean de_identified dataset :param project_id: project name :param dataset_id: de_identified dataset name :return: returns list of queries """ query_list = [] query_list.extend( id_dedup.get_id_deduplicate_queries(project_id, dataset_id)) query_list.extend( clean_years.get_year_of_birth_queries(project_id, dataset_id)) query_list.extend( neg_ages.get_negative_ages_queries(project_id, dataset_id)) query_list.extend( bad_end_dates.get_bad_end_date_queries(project_id, dataset_id)) query_list.extend( person_validator.get_person_id_validation_queries( project_id, dataset_id)) query_list.extend( valid_death_dates.get_valid_death_date_queries(project_id, dataset_id)) query_list.extend( drug_refills_supply.get_days_supply_refills_queries( project_id, dataset_id)) query_list.extend( fill_source_value.get_fill_freetext_source_value_fields_queries( project_id, dataset_id)) return query_list
def _gather_ehr_queries(project_id, dataset_id): """ gathers all the queries required to clean ehr dataset :param project_id: project name :param dataset_id: ehr dataset name :return: returns list of queries """ query_list = [] query_list.extend(id_dedup.get_id_deduplicate_queries(project_id, dataset_id)) return query_list
def _gather_rdr_queries(project_id, dataset_id): """ gathers all the queries required to clean rdr dataset :param project_id: project name :param dataset_id: rdr dataset name :return: returns list of queries """ query_list = [] query_list.extend(id_dedup.get_id_deduplicate_queries(project_id, dataset_id)) query_list.extend(clean_years.get_year_of_birth_queries(project_id, dataset_id)) query_list.extend(neg_ages.get_negative_ages_queries(project_id, dataset_id)) query_list.extend(bad_end_dates.get_bad_end_date_queries(project_id, dataset_id)) return query_list
def _gather_combined_queries(project_id, dataset_id, sandbox_dataset_id): """ gathers all the queries required to clean combined dataset :param project_id: project name :param dataset_id: combined dataset name :return: returns list of queries """ query_list = [] query_list.extend( replace_standard_concept_ids.replace_standard_id_in_domain_tables( project_id, dataset_id)) query_list.extend(domain_alignment.domain_alignment( project_id, dataset_id)) query_list.extend( drop_participants_without_ppi_or_ehr.get_queries( project_id, dataset_id)) query_list.extend( id_dedup.get_id_deduplicate_queries(project_id, dataset_id)) query_list.extend( clean_years.get_year_of_birth_queries(project_id, dataset_id)) query_list.extend( neg_ages.get_negative_ages_queries(project_id, dataset_id)) query_list.extend( bad_end_dates.get_bad_end_date_queries(project_id, dataset_id)) query_list.extend( no_data_30days_after_death.no_data_30_days_after_death( project_id, dataset_id)) query_list.extend( valid_death_dates.get_valid_death_date_queries(project_id, dataset_id)) query_list.extend( drug_refills_supply.get_days_supply_refills_queries( project_id, dataset_id)) query_list.extend( populate_routes.get_route_mapping_queries(project_id, dataset_id)) query_list.extend( fix_datetimes.get_fix_incorrect_datetime_to_date_queries( project_id, dataset_id)) query_list.extend( remove_records_with_wrong_date. get_remove_records_with_wrong_date_queries(project_id, dataset_id)) query_list.extend( drop_duplicate_states.get_drop_duplicate_states_queries( project_id, dataset_id, sandbox_dataset_id)) # TODO : Make null_invalid_foreign_keys able to run on de_identified dataset query_list.extend( null_foreign_key.null_invalid_foreign_keys(project_id, dataset_id)) return query_list
def _gather_unioned_ehr_queries(project_id, dataset_id): """ gathers all the queries required to clean unioned_ehr dataset :param project_id: project name :param dataset_id: unioned_ehr dataset name :return: returns list of queries """ query_list = [] query_list.extend(id_dedup.get_id_deduplicate_queries(project_id, dataset_id)) query_list.extend(clean_years.get_year_of_birth_queries(project_id, dataset_id)) query_list.extend(neg_ages.get_negative_ages_queries(project_id, dataset_id)) query_list.extend(bad_end_dates.get_bad_end_date_queries(project_id, dataset_id)) query_list.extend(valid_death_dates.get_valid_death_date_queries(project_id, dataset_id)) query_list.extend(drug_refills_supply.get_days_supply_refills_queries(project_id, dataset_id)) return query_list