Esempio n. 1
0
    def test_get_ehr_person_values_bytes(self, mock_query, mock_response,
                                         mock_fields):
        # pre conditions
        mock_query.return_value = {}
        column_name = 'foo_field'
        column_value = b'hello'
        mock_response.return_value = [
            {
                consts.PERSON_ID_FIELD: 1,
                column_name: column_value,
            },
        ]

        mock_fields.return_value = [{
            'name': column_name,
            'type': consts.STRING_TYPE
        }]

        # test
        actual = reader.get_ehr_person_values('project-foo', 'ehr-bar',
                                              'table-doh', column_name)

        # post-conditions
        expected = {1: 'hello'}
        self.assertEqual(actual, expected)

        self.assertEqual(mock_query.call_count, 1)
        self.assertEqual(mock_response.call_count, 1)
        self.assertEqual(
            mock_query.assert_called_with(
                consts.EHR_PERSON_VALUES.format(project='project-foo',
                                                dataset='ehr-bar',
                                                table='table-doh',
                                                field=column_name)), None)
Esempio n. 2
0
    def test_get_ehr_person_values(self, mock_query, mock_response):
        # pre conditions
        mock_query.return_value = {}
        column_name = 'gender_concept_id'
        mock_response.return_value = [
            {
                consts.PERSON_ID_FIELD: 1,
                column_name: 'saLLy',
            },
            {
                consts.PERSON_ID_FIELD: 2,
                column_name: 'Rudy'
            },
            {
                consts.PERSON_ID_FIELD: 3,
                column_name: 'MaTiLdA'
            },
        ]

        # test
        actual = reader.get_ehr_person_values('project-foo', 'ehr-bar',
                                              'table-doh', column_name)

        # post-conditions
        expected = {1: 'saLLy', 2: 'Rudy', 3: 'MaTiLdA'}
        self.assertEqual(actual, expected)

        self.assertEqual(mock_query.call_count, 1)
        self.assertEqual(mock_response.call_count, 1)
        self.assertEqual(
            mock_query.assert_called_with(
                consts.EHR_PERSON_VALUES.format(project='project-foo',
                                                dataset='ehr-bar',
                                                table='table-doh',
                                                field=column_name)), None)
Esempio n. 3
0
def _compare_birth_dates(project, validation_dataset, pii_dataset, site,
                         concept_id_pii, pii_tables):
    """
    Compare birth dates for people.

    Converts birthdates and birth_datetimes to calendar objects.  Converts
    the calendar objects back to strings with the same format and compares
    these strings.

    :param project:  project to search for the datasets
    :param validation_dataset:  the auto generated match validation dataset
        created in this module.  queried to get the gender value
    :param pii_dataset:  dataset created from submitted hpo sites.  the pii tables
    :param site: string identifier of hpo
    :param concept_id_pii:  integer value of concept id for concept in the rdr_dataset

    :return: updated match_values dictionary
    """
    match_values = {}
    table_name = site + consts.EHR_PERSON_TABLE_SUFFIX

    if table_name in pii_tables:
        pii_birthdates = readers.get_rdr_match_values(project,
                                                      validation_dataset,
                                                      consts.ID_MATCH_TABLE,
                                                      concept_id_pii)

        try:
            ehr_birthdates = readers.get_ehr_person_values(
                project, pii_dataset, table_name, consts.BIRTH_DATETIME_FIELD)
        except (oauth2client.client.HttpAccessTokenRefreshError,
                googleapiclient.errors.HttpError):
            LOGGER.exception("Unable to read PII for: %s\tdata field:\t%s",
                             site, consts.BIRTH_DATETIME_FIELD)
            raise

        # compare birth_datetime from ppi info to ehr info and record results.
        for person_id, ehr_birthdate in ehr_birthdates.items():
            rdr_birthdate = pii_birthdates.get(person_id)
            ehr_birthdate = ehr_birthdates.get(person_id)

            if rdr_birthdate is None or ehr_birthdate is None:
                match_values[person_id] = consts.MISSING
            elif isinstance(rdr_birthdate, str) and isinstance(
                    ehr_birthdate, str):
                # convert values to datetime objects
                rdr_date = parse(rdr_birthdate)
                ehr_date = parse(ehr_birthdate)
                # convert datetime objects to Year/month/day strings and compare
                rdr_string = rdr_date.strftime(consts.DATE_FORMAT)
                ehr_string = ehr_date.strftime(consts.DATE_FORMAT)

                match_str = consts.MATCH if rdr_string == ehr_string else consts.MISMATCH
                match_values[person_id] = match_str
            else:
                match_values[person_id] = consts.MISMATCH
    else:
        raise RuntimeError('Table {} doesnt exist.'.format(table_name))

    return match_values
Esempio n. 4
0
def _compare_birth_dates(
        project,
        validation_dataset,
        pii_dataset,
        site,
        concept_id_pii
    ):
    """
    Compare birth dates for people.

    Converts birthdates and birth_datetimes to calendar objects.  Converts
    the calendar objects back to strings with the same format and compares
    these strings.

    :param project:  project to search for the datasets
    :param validation_dataset:  the auto generated match validation dataset
        created in this module.  queried to get the gender value
    :param pii_dataset:  dataset created from submitted hpo sites.  the pii tables
    :param hpo: string identifier of hpo
    :param concept_id_pii:  integer value of concept id for concept in the rdr_dataset

    :return: updated match_values dictionary
    """
    match_values = {}

    pii_birthdates = readers.get_rdr_match_values(
        project, validation_dataset, consts.ID_MATCH_TABLE, concept_id_pii
    )

    ehr_birthdates = readers.get_ehr_person_values(
        project,
        pii_dataset,
        site + consts.EHR_PERSON_TABLE_SUFFIX,
        consts.BIRTH_DATETIME_FIELD
    )

    # compare birth_datetime from ppi info to ehr info and record results.
    for person_id, ehr_birthdate in ehr_birthdates.iteritems():
        rdr_birthdate = pii_birthdates.get(person_id)
        ehr_birthdate = ehr_birthdates.get(person_id)

        if rdr_birthdate is None or ehr_birthdate is None:
            match_values[person_id] = consts.MISSING
        elif isinstance(rdr_birthdate, str) and isinstance(ehr_birthdate, str):
            # convert values to datetime objects
            rdr_date = parse(rdr_birthdate)
            ehr_date = parse(ehr_birthdate)
            # convert datetime objects to Year/month/day strings and compare
            rdr_string = rdr_date.strftime(consts.DATE)
            ehr_string = ehr_date.strftime(consts.DATE)

            match_str = consts.MATCH if rdr_string == ehr_string else consts.MISMATCH
            match_values[person_id] = match_str
        else:
            match_values[person_id] = consts.MISMATCH

    return match_values
Esempio n. 5
0
def _compare_genders(project, validation_dataset, pii_dataset, hpo,
                     concept_id_pii, pii_tables):
    """
    Compare genders for people.

    Converts birthdates and birth_datetimes to calendar objects.  Converts
    the calendar objects back to strings with the same format and compares
    these strings.

    :param project:  project to search for the datasets
    :param validation_dataset:  the auto generated match validation dataset
        created in this module.  queried to get the gender value
    :param pii_dataset:  dataset created from submitted hpo sites.  the pii tables
    :param hpo: string identifier of hpo
    :param concept_id_pii:  integer value of concept id for concept in the rdr_dataset

    :return: updated match_values dictionary
    """
    match_values = {}
    table_name = hpo + consts.EHR_PERSON_TABLE_SUFFIX

    if table_name in pii_tables:
        pii_genders = readers.get_rdr_match_values(project, validation_dataset,
                                                   consts.ID_MATCH_TABLE,
                                                   concept_id_pii)

        try:
            ehr_genders = readers.get_ehr_person_values(
                project, pii_dataset, table_name, consts.GENDER_FIELD)
        except (oauth2client.client.HttpAccessTokenRefreshError,
                googleapiclient.errors.HttpError):
            LOGGER.exception(
                f"Unable to read PII for: {hpo}\tdata field:\t{consts.GENDER_FIELD}"
            )
            raise

        # compare gender from ppi info to ehr info and record results.
        for person_id, ehr_gender in ehr_genders.items():
            rdr_gender = pii_genders.get(person_id, '')
            ehr_gender = consts.SEX_CONCEPT_IDS.get(ehr_gender, '')

            if rdr_gender is None or ehr_gender is None:
                match_str = consts.MISSING
            else:
                rdr_gender = rdr_gender.lower()
                ehr_gender = ehr_gender.lower()
                match_str = consts.MATCH if rdr_gender == ehr_gender else consts.MISMATCH

            match_values[person_id] = match_str
    else:
        raise RuntimeError('Table {} doesnt exist.'.format(table_name))

    return match_values
Esempio n. 6
0
def _compare_genders(
        project,
        validation_dataset,
        pii_dataset,
        hpo,
        concept_id_pii
    ):
    """
    Compare genders for people.

    Converts birthdates and birth_datetimes to calendar objects.  Converts
    the calendar objects back to strings with the same format and compares
    these strings.

    :param project:  project to search for the datasets
    :param validation_dataset:  the auto generated match validation dataset
        created in this module.  queried to get the gender value
    :param pii_dataset:  dataset created from submitted hpo sites.  the pii tables
    :param hpo: string identifier of hpo
    :param concept_id_pii:  integer value of concept id for concept in the rdr_dataset

    :return: updated match_values dictionary
    """
    match_values = {}

    pii_genders = readers.get_rdr_match_values(
        project, validation_dataset, consts.ID_MATCH_TABLE, concept_id_pii
    )

    ehr_genders = readers.get_ehr_person_values(
        project,
        pii_dataset,
        hpo + consts.EHR_PERSON_TABLE_SUFFIX,
        consts.GENDER_FIELD
    )

    # compare gender from ppi info to ehr info and record results.
    for person_id, ehr_gender in ehr_genders.iteritems():
        rdr_gender = pii_genders.get(person_id, '')
        ehr_gender = consts.SEX_CONCEPT_IDS.get(ehr_gender, '')

        rdr_gender = rdr_gender.lower()
        ehr_gender = ehr_gender.lower()

        if rdr_gender is None or ehr_gender is None:
            match_str = consts.MISSING
        else:
            match_str = consts.MATCH if rdr_gender == ehr_gender else consts.MISMATCH

        match_values[person_id] = match_str

    return match_values
Esempio n. 7
0
    def test_get_ehr_person_values_with_duplicate_keys(self, mock_query,
                                                       mock_response,
                                                       mock_fields):
        # pre conditions
        mock_query.return_value = {}
        column_name = 'birth_datetime'
        mock_response.return_value = [
            {
                consts.PERSON_ID_FIELD: 1,
                column_name: 'saLLy',
            },
            {
                consts.PERSON_ID_FIELD: 2,
                column_name: 'Rudy'
            },
            {
                consts.PERSON_ID_FIELD: 3,
                column_name: 'MaTiLdA'
            },
            {
                consts.PERSON_ID_FIELD: 2,
                column_name: 'Rudy'
            },
            {
                consts.PERSON_ID_FIELD: 3,
                column_name: 'mattie'
            },
        ]

        mock_fields.return_value = [{
            'name': column_name,
            'type': consts.DATE_TYPE
        }]

        # test
        actual = reader.get_ehr_person_values('project-foo', 'ehr-bar',
                                              'table-doh', column_name)

        # post-conditions
        expected = {1: 'saLLy', 2: 'Rudy', 3: 'MaTiLdA'}
        self.assertEqual(actual, expected)

        self.assertEqual(mock_query.call_count, 1)
        self.assertEqual(mock_response.call_count, 1)
        self.assertEqual(
            mock_query.assert_called_with(
                consts.EHR_PERSON_VALUES.format(project='project-foo',
                                                dataset='ehr-bar',
                                                table='table-doh',
                                                field=column_name)), None)
Esempio n. 8
0
    def test_get_ehr_person_values_birthdates(self, mock_query, mock_response,
                                              mock_fields):
        # pre conditions
        mock_query.return_value = {}
        column_name = 'birth_datetime'
        mock_response.return_value = [
            {
                consts.PERSON_ID_FIELD: 1,
                column_name: 16520400.0,
            },
            {
                consts.PERSON_ID_FIELD: 2,
                column_name: -662670000.0,
            },
            {
                consts.PERSON_ID_FIELD: 3,
                column_name: 12459600.0,
            },
        ]

        mock_fields.return_value = [{
            'name': column_name,
            'type': consts.TIMESTAMP_TYPE
        }]

        # test
        actual = reader.get_ehr_person_values('project-foo', 'ehr-bar',
                                              'table-doh', column_name)

        # post-conditions
        expected = {1: '1970-07-11', 2: '1949-01-01', 3: '1970-05-25'}
        self.assertEqual(actual, expected)

        self.assertEqual(mock_query.call_count, 1)
        self.assertEqual(mock_response.call_count, 1)
        self.assertEqual(
            mock_query.assert_called_with(
                consts.EHR_PERSON_VALUES.format(project='project-foo',
                                                dataset='ehr-bar',
                                                table='table-doh',
                                                field=column_name)), None)