Example #1
0
def check_vehicle_accident_suppression(check_df,
                                       project_id,
                                       post_deid_dataset,
                                       pre_deid_dataset=None,
                                       mapping_dataset=None):
    """Run motor vehicle accident suppression check
    
    Parameters
    ----------
    check_df: pd.DataFrame
        Dataframe containing the checks that need to be done
    project_id: str
        Google Bigquery project_id
    post_dataset_id: str
        Bigquery dataset after de-id rules were run
    pre_deid_dataset: str
        Bigquery dataset before de-id rules were run

    Returns
    -------
    pd.DataFrame
    """
    icd9_vehicle_accident = run_check_by_row(
        check_df, QUERY_VEHICLE_ACCIDENT_SUPPRESSION_ICD9, project_id,
        post_deid_dataset)
    icd10_vehicle_accident = run_check_by_row(
        check_df, QUERY_VEHICLE_ACCIDENT_SUPPRESSION_ICD10, project_id,
        post_deid_dataset)
    return pd.concat([icd9_vehicle_accident, icd10_vehicle_accident],
                     sort=True)
Example #2
0
def check_mapping(check_df, project_id, post_dataset_id, pre_deid_dataset,
                  mapping_dataset):
    """Run mapping verification rules
    
    Parameters
    ----------
    check_df: pd.DataFrame
        Dataframe containing the checks that need to be done
    project_id: str
        Google Bigquery project_id
    post_dataset_id: str
        Bigquery dataset after de-id rules were run
    pre_deid_dataset: str
        Bigquery dataset before de-id rules were run

    Returns
    -------
    pd.DataFrame
    """
    # Correct type
    type_check = run_check_by_row(check_df, QUERY_ID_NOT_OF_CORRECT_TYPE,
                                  project_id, post_dataset_id,
                                  pre_deid_dataset,
                                  "ID datatype has been changed",
                                  mapping_dataset)

    # id changed by de-id
    id_map_check = run_check_by_row(check_df, QUERY_ID_NOT_CHANGED_BY_DEID,
                                    project_id, post_dataset_id,
                                    pre_deid_dataset,
                                    "Old ID still in the dataset",
                                    mapping_dataset)

    # new id not in mapping
    id_not_in_mapping_check = run_check_by_row(
        check_df, QUERY_ID_NOT_IN_MAPPING, project_id, post_dataset_id,
        pre_deid_dataset, "New ID not found in mapping table", mapping_dataset)

    # # new id properly mapped to old id
    id_properly_mapped_check = run_check_by_row(
        check_df, QUERY_ID_NOT_MAPPED_PROPERLY, project_id, post_dataset_id,
        pre_deid_dataset, "ID not properly mapped", mapping_dataset)

    return pd.concat([
        type_check, id_map_check, id_not_in_mapping_check,
        id_properly_mapped_check
    ],
                     sort=True)
Example #3
0
def check_field_cancer_concept_suppression(check_df,
                                           project_id,
                                           post_deid_dataset,
                                           pre_deid_dataset=None,
                                           mapping_dataset=None):
    """Run suppression check for some cancer concepts
    
    Parameters
    ----------
    check_df: pd.DataFrame
        Dataframe containing the checks that need to be done
    project_id: str
        Google Bigquery project_id
    post_dataset_id: str
        Bigquery dataset after de-id rules were run
    pre_deid_dataset: str
        Bigquery dataset before de-id rules were run

    Returns
    -------
    pd.DataFrame
    """
    cancer_concept = run_check_by_row(check_df,
                                      QUERY_CANCER_CONCEPT_SUPPRESSION,
                                      project_id, post_deid_dataset)
    return cancer_concept
Example #4
0
def check_field_geolocation_records_suppression(check_df,
                                                project_id,
                                                post_deid_dataset,
                                                pre_deid_dataset=None,
                                                mapping_dataset=None):
    return run_check_by_row(check_df, QUERY_GEOLOCATION_SUPPRESSION,
                            project_id, post_deid_dataset)
def check_table_suppression(check_df,
                            project_id,
                            post_dataset_id,
                            pre_deid_dataset=None,
                            mapping_dataset=None):
    """Run table suppression check
    
    Parameters
    ----------
    check_df: pd.DataFrame
        Dataframe containing the checks that need to be done
    project_id: str
        Google Bigquery project_id
    post_dataset_id: str
        Bigquery dataset after de-id rules were run
    pre_deid_dataset: str
        Bigquery dataset before de-id rules were run

    Returns
    -------
    pd.DataFrame
    """
    table_check = run_check_by_row(check_df, QUERY_SUPPRESSED_TABLE, project_id,
                                   post_dataset_id)

    return table_check.reset_index(drop=True)
Example #6
0
def check_field_suppression(check_df,
                            project_id,
                            post_dataset_id,
                            pre_deid_dataset=None,
                            mapping_dataset=None):
    """Run field suppression check
    
    Parameters
    ----------
    check_df: pd.DataFrame
        Dataframe containing the checks that need to be done
    project_id: str
        Google Bigquery project_id
    post_dataset_id: str
        Bigquery dataset after de-id rules were run
    pre_deid_dataset: str
        Bigquery dataset before de-id rules were run

    Returns
    -------
    pd.DataFrame
    """
    nullable_field = check_df[check_df['is_nullable'] == 'YES']
    required_numeric_field = check_df[(check_df['is_nullable'] == 'NO')
                                      & (check_df['data_type'] == 'INT64')]
    required_other_field = check_df[(check_df['is_nullable'] == 'NO')
                                    & (check_df['data_type'] != 'INT64')]

    nullable_field_check = run_check_by_row(
        nullable_field, QUERY_SUPPRESSED_NULLABLE_FIELD_NOT_NULL, project_id,
        post_dataset_id)

    required_numeric_field_check = run_check_by_row(
        required_numeric_field, QUERY_SUPPRESSED_NUMERIC_NOT_ZERO, project_id,
        post_dataset_id)

    required_other_field_check = run_check_by_row(
        required_other_field, QUERY_SUPPRESSED_REQUIRED_FIELD_NOT_EMPTY,
        project_id, post_dataset_id)

    return pd.concat([
        nullable_field_check, required_numeric_field_check,
        required_other_field_check
    ],
                     sort=True)
Example #7
0
def check_field_freetext_response_suppression(check_df,
                                              project_id,
                                              post_deid_dataset,
                                              pre_deid_dataset=None,
                                              mapping_dataset=None):
    free_text_concept = run_check_by_row(check_df,
                                         QUERY_SUPPRESSED_FREE_TEXT_RESPONSE,
                                         project_id, post_deid_dataset)
    return free_text_concept
Example #8
0
def check_mapping_zipcode_generalization(check_df, project_id, post_dataset_id,
                                         pre_deid_dataset, mapping_dataset):
    """Run zipcode generalization and transformation check
    
    Parameters
    ----------
    check_df: pd.DataFrame
        Dataframe containing the checks that need to be done
    project_id: str
        Google Bigquery project_id
    post_dataset_id: str
        Bigquery dataset after de-id rules were run
    pre_deid_dataset: str
        Bigquery dataset before de-id rules were run

    Returns
    -------
    pd.DataFrame
    """
    zip_check = run_check_by_row(check_df, QUERY_ZIP_CODE_GENERALIZATION,
                                 project_id, post_dataset_id, pre_deid_dataset,
                                 "Zip code value generalized and transformed")

    return zip_check