class BQPDRQuestionnaireResponseGenerator(BigQueryGenerator):
    """
  Generate a questionnaire module response BQRecord
  """
    ro_dao = None

    def make_bqrecord(self,
                      p_id,
                      module_id,
                      latest=False,
                      convert_to_enum=False):
        """
    Generate a list of questionnaire module BQRecords for the given participant id.
    :param p_id: participant id
    :param module_id: A questionnaire module id, IE: 'TheBasics'.
    :param latest: only process the most recent response if True
    :param convert_to_enum: If schema field description includes Enum class info, convert value to Enum.
    :return: BQTable object, List of BQRecord objects
    """
        if not self.ro_dao:
            self.ro_dao = BigQuerySyncDao(backup=True)

        if module_id == 'TheBasics':
            table = BQPDRTheBasics
        elif module_id == 'ConsentPII':
            table = BQPDRConsentPII
        elif module_id == 'Lifestyle':
            table = BQPDRLifestyle
        elif module_id == 'OverallHealth':
            table = BQPDROverallHealth
        elif module_id == 'DVEHRSharing':
            table = BQPDRDVEHRSharing
        elif module_id == 'EHRConsentPII':
            table = BQPDREHRConsentPII
        elif module_id == 'FamilyHistory':
            table = BQPDRFamilyHistory
        elif module_id == 'HealthcareAccess':
            table = BQPDRHealthcareAccess
        elif module_id == 'PersonalMedicalHistory':
            table = BQPDRPersonalMedicalHistory
        else:
            logging.info(
                'Generator: ignoring questionnaire module id {0}.'.format(
                    module_id))
            return None, list()

        qnans = self.ro_dao.call_proc('sp_get_questionnaire_answers',
                                      args=[module_id, p_id])
        if not qnans or len(qnans) == 0:
            return None, list()

        bqrs = list()
        for qnan in qnans:
            bqr = BQRecord(schema=table().get_schema(),
                           data=qnan,
                           convert_to_enum=convert_to_enum)
            bqr.participant_id = p_id  # reset participant_id.

            fields = bqr.get_fields()
            for field in fields:
                fld_name = field['name']
                if fld_name in ('id', 'created', 'modified', 'authored',
                                'language', 'participant_id',
                                'questionnaire_response_id'):
                    continue

                fld_value = getattr(bqr, fld_name, None)
                if fld_value is None:  # Let empty strings pass.
                    continue
                # question responses values need to be coerced to a String type.
                if isinstance(fld_value, (datetime.date, datetime.datetime)):
                    setattr(bqr, fld_name, fld_value.isoformat())
                else:
                    try:
                        setattr(bqr, fld_name, str(fld_value))
                    except UnicodeEncodeError:
                        setattr(bqr, fld_name, unicode(fld_value))

                # Truncate zip codes to 3 digits
                if fld_name in ('StreetAddress_PIIZIP',
                                'EmploymentWorkAddress_ZipCode'
                                ) and len(fld_value) > 2:
                    setattr(bqr, fld_name, fld_value[:3])

            bqrs.append(bqr)
            if latest:
                break

        return table, bqrs
Exemple #2
0
class BQParticipantSummaryGenerator(BigQueryGenerator):
    """
  Generate a Participant Summary BQRecord object
  """
    ro_dao = None

    def make_bqrecord(self, p_id, convert_to_enum=False):
        """
    Build a Participant Summary BQRecord object for the given participant id.
    :param p_id: participant id
    :param convert_to_enum: If schema field description includes Enum class info, convert value to Enum.
    :return: BQRecord object
    """
        if not self.ro_dao:
            self.ro_dao = BigQuerySyncDao(backup=True)

        with self.ro_dao.session() as session:
            # prep participant info from Participant record
            summary = self._prep_participant(p_id, session)
            # prep ConsentPII questionnaire information
            summary = self._merge_schema_dicts(
                summary, self._prep_consentpii_answers(p_id, session))
            # prep questionnaire modules information, includes gathering extra consents.
            summary = self._merge_schema_dicts(
                summary, self._prep_modules(p_id, session))
            # prep physical measurements
            summary = self._merge_schema_dicts(
                summary, self._prep_physical_measurements(p_id, session))
            # prep race and gender
            summary = self._merge_schema_dicts(
                summary, self._prep_the_basics(p_id, session))
            # prep biobank orders and samples
            summary = self._merge_schema_dicts(
                summary, self._prep_biobank_info(p_id, session))
            # calculate enrollment status for participant
            summary = self._merge_schema_dicts(
                summary, self._calculate_enrollment_status(summary))
            # calculate distinct visits
            summary = self._merge_schema_dicts(
                summary, self._calculate_distinct_visits(summary))

            return BQRecord(schema=BQParticipantSummarySchema,
                            data=summary,
                            convert_to_enum=convert_to_enum)

    def _prep_participant(self, p_id, session):
        """
    Get the information from the participant record
    :param p_id: participant id
    :param session: DAO session object
    :return: dict
    """
        p = session.query(Participant).filter(
            Participant.participantId == p_id).first()
        if not p:
            raise LookupError(
                'participant lookup for P{0} failed.'.format(p_id))

        hpo = session.query(HPO.name).filter(HPO.hpoId == p.hpoId).first()
        organization = session.query(Organization.externalId). \
                              filter(Organization.organizationId == p.organizationId).first()

        withdrawal_status = WithdrawalStatus(p.withdrawalStatus)
        withdrawal_reason = WithdrawalReason(
            p.withdrawalReason if p.withdrawalReason else 0)
        suspension_status = SuspensionStatus(p.suspensionStatus)

        data = {
            'participant_id': p_id,
            'biobank_id': p.biobankId,
            'last_modified': p.lastModified,
            'sign_up_time': p.signUpTime,
            'hpo': hpo.name if hpo else None,
            'hpo_id': p.hpoId,
            'organization': organization.externalId if organization else None,
            'organization_id': p.organizationId,
            'withdrawal_status': str(withdrawal_status),
            'withdrawal_status_id': int(withdrawal_status),
            'withdrawal_reason': str(withdrawal_reason),
            'withdrawal_reason_id': int(withdrawal_reason),
            'withdrawal_time': p.withdrawalTime,
            'withdrawal_authored': p.withdrawalAuthored,
            'withdrawal_reason_justification': p.withdrawalReasonJustification,
            'suspension_status': str(suspension_status),
            'suspension_status_id': int(suspension_status),
            'suspension_time': p.suspensionTime,
            'site': self._lookup_site_name(p.siteId, session),
            'site_id': p.siteId,
            'is_ghost_id': 1 if p.isGhostId is True else 0
        }

        return data

    def _prep_consentpii_answers(self, p_id, ro_session):
        """
    Get participant information from the ConsentPII questionnaire
    :param p_id: participant id
    :param ro_session: Readonly DAO session object
    :return: dict
    """
        qnans = self.ro_dao.call_proc('sp_get_questionnaire_answers',
                                      args=['ConsentPII', p_id])
        if not qnans or len(qnans) == 0:
            # return the minimum data required when we don't have the questionnaire data.
            return {'email': None, 'is_ghost_id': 0}
        qnan = BQRecord(schema=None,
                        data=qnans[0])  # use only most recent response.

        # TODO: We may need to use the first response to set consent dates,
        #  unless the consent value changed across response records.

        data = {
            'first_name':
            qnan.PIIName_First,
            'middle_name':
            qnan.PIIName_Middle,
            'last_name':
            qnan.PIIName_Last,
            'date_of_birth':
            qnan.PIIBirthInformation_BirthDate,
            'primary_language':
            qnan.language,
            'email':
            qnan.ConsentPII_EmailAddress,
            'phone_number':
            qnan.PIIContactInformation_Phone,
            'login_phone_number':
            qnan.ConsentPII_VerifiedPrimaryPhoneNumber,
            'addresses': [{
                'addr_type':
                BQStreetAddressTypeEnum.RESIDENCE.name,
                'addr_type_id':
                BQStreetAddressTypeEnum.RESIDENCE.value,
                'addr_street_address_1':
                qnan.PIIAddress_StreetAddress,
                'addr_street_address_2':
                qnan.PIIAddress_StreetAddress2,
                'addr_city':
                qnan.StreetAddress_PIICity,
                'addr_state':
                qnan.StreetAddress_PIIState.replace('PIIState_', '').upper()
                if qnan.StreetAddress_PIIState else None,
                'addr_zip':
                qnan.StreetAddress_PIIZIP,
                'addr_country':
                'US'
            }],
            'consents': [
                {
                    'consent':
                    'ConsentPII',
                    'consent_id':
                    self._lookup_code_id('ConsentPII', ro_session),
                    'consent_date':
                    parser.parse(qnan.authored).date()
                    if qnan.authored else None,
                    'consent_value':
                    'ConsentPermission_Yes',
                    'consent_value_id':
                    self._lookup_code_id('ConsentPermission_Yes', ro_session),
                },
            ]
        }

        return data

    def _prep_modules(self, p_id, ro_session):
        """
    Find all questionnaire modules the participant has completed and loop through them.
    :param p_id: participant id
    :param ro_session: Readonly DAO session object
    :return: dict
    """
        code_id_query = ro_session.query(func.max(QuestionnaireConcept.codeId)).\
                            filter(QuestionnaireResponse.questionnaireId ==
                                    QuestionnaireConcept.questionnaireId).label('codeId')
        query = ro_session.query(
                      QuestionnaireResponse.questionnaireResponseId, QuestionnaireResponse.authored,
                      QuestionnaireResponse.created, QuestionnaireResponse.language, code_id_query).\
                    filter(QuestionnaireResponse.participantId == p_id).\
                    order_by(QuestionnaireResponse.questionnaireResponseId)
        # sql = self.dao.query_to_text(query)
        results = query.all()

        data = dict()
        modules = list()
        consents = list()
        baseline_modules = ['TheBasics', 'OverallHealth', 'Lifestyle']
        try:
            baseline_modules = config.getSettingList(
                'baseline_ppi_questionnaire_fields')
        except ValueError:
            pass
        except AssertionError:  # unittest errors because of GCP SDK
            pass

        consent_modules = {
            # module: question code string
            'DVEHRSharing': 'DVEHRSharing_AreYouInterested',
            'EHRConsentPII': 'EHRConsentPII_ConsentPermission',
        }

        if results:
            for row in results:
                module_name = self._lookup_code_value(row.codeId, ro_session)
                modules.append({
                    'mod_module':
                    module_name,
                    'mod_baseline_module':
                    1
                    if module_name in baseline_modules else 0,  # Boolean field
                    'mod_authored':
                    row.authored,
                    'mod_created':
                    row.created,
                    'mod_language':
                    row.language,
                    'mod_status':
                    BQModuleStatusEnum.SUBMITTED.name,
                    'mod_status_id':
                    BQModuleStatusEnum.SUBMITTED.value,
                })

                # check if this is a module with consents.
                if module_name not in consent_modules:
                    continue
                qnans = self.ro_dao.call_proc('sp_get_questionnaire_answers',
                                              args=[module_name, p_id])
                if qnans and len(qnans) > 0:
                    qnan = BQRecord(
                        schema=None,
                        data=qnans[0])  # use only most recent questionnaire.
                    consents.append({
                        'consent':
                        consent_modules[module_name],
                        'consent_id':
                        self._lookup_code_id(consent_modules[module_name],
                                             ro_session),
                        'consent_date':
                        parser.parse(qnan.authored).date()
                        if qnan.authored else None,
                        'consent_value':
                        qnan[consent_modules[module_name]],
                        'consent_value_id':
                        self._lookup_code_id(
                            qnan[consent_modules[module_name]], ro_session),
                    })

        if len(modules) > 0:
            data['modules'] = modules
            if len(consents) > 0:
                data['consents'] = consents

        return data

    def _prep_the_basics(self, p_id, ro_session):
        """
    Get the participant's race and gender selections
    :param p_id: participant id
    :param ro_session: Readonly DAO session object
    :return: dict
    """
        qnans = self.ro_dao.call_proc('sp_get_questionnaire_answers',
                                      args=['TheBasics', p_id])
        if not qnans or len(qnans) == 0:
            return {}

        # get race question answers
        qnan = BQRecord(schema=None,
                        data=qnans[0])  # use only most recent questionnaire.
        data = {}
        if qnan.Race_WhatRaceEthnicity:
            rl = list()
            for val in qnan.Race_WhatRaceEthnicity.split(','):
                rl.append({
                    'race': val,
                    'race_id': self._lookup_code_id(val, ro_session)
                })
            data['races'] = rl
        # get gender question answers
        gl = list()
        if qnan.Gender_GenderIdentity:
            for val in qnan.Gender_GenderIdentity.split(','):
                if val == 'GenderIdentity_AdditionalOptions':
                    continue
                gl.append({
                    'gender': val,
                    'gender_id': self._lookup_code_id(val, ro_session)
                })
        # get additional gender answers, if any.
        if qnan.GenderIdentity_SexualityCloserDescription:
            for val in qnan.GenderIdentity_SexualityCloserDescription.split(
                    ','):
                gl.append({
                    'gender': val,
                    'gender_id': self._lookup_code_id(val, ro_session)
                })

        if len(gl) > 0:
            data['genders'] = gl

        data['education'] = qnan.EducationLevel_HighestGrade
        data['education_id'] = self._lookup_code_id(
            qnan.EducationLevel_HighestGrade, ro_session)
        data['income'] = qnan.Income_AnnualIncome
        data['income_id'] = self._lookup_code_id(qnan.Income_AnnualIncome,
                                                 ro_session)
        data['sex'] = qnan.BiologicalSexAtBirth_SexAtBirth
        data['sex_id'] = self._lookup_code_id(
            qnan.BiologicalSexAtBirth_SexAtBirth, ro_session)
        data['sexual_orientation'] = qnan.TheBasics_SexualOrientation
        data['sexual_orientation_id'] = self._lookup_code_id(
            qnan.TheBasics_SexualOrientation, ro_session)

        return data

    def _prep_physical_measurements(self, p_id, ro_session):
        """
    Get participant's physical measurements information
    :param p_id: participant id
    :param ro_session: Readonly DAO session object
    :return: dict
    """
        data = {}
        pm_list = list()

        query = ro_session.query(PhysicalMeasurements.created, PhysicalMeasurements.createdSiteId,
                                 PhysicalMeasurements.final, PhysicalMeasurements.finalized,
                                 PhysicalMeasurements.finalizedSiteId, PhysicalMeasurements.status,
                                 PhysicalMeasurements.cancelledTime).\
                filter(PhysicalMeasurements.participantId == p_id).\
                order_by(desc(PhysicalMeasurements.created))
        # sql = self.dao.query_to_text(query)
        results = query.all()

        for row in results:
            # row.status is not really used, we can only determine status
            if not row.cancelledTime:
                status = PhysicalMeasurementsStatus.COMPLETED
            else:
                status = PhysicalMeasurementsStatus.CANCELLED

            pm_list.append({
                'pm_status':
                str(status),
                'pm_status_id':
                int(status),
                'pm_created':
                row.created,
                'pm_created_site':
                self._lookup_site_name(row.createdSiteId, ro_session),
                'pm_created_site_id':
                row.createdSiteId,
                'pm_finalized':
                row.finalized,
                'pm_finalized_site':
                self._lookup_site_name(row.finalizedSiteId, ro_session),
                'pm_finalized_site_id':
                row.finalizedSiteId,
            })

        if len(pm_list) > 0:
            data['pm'] = pm_list
        return data

    def _prep_biobank_info(self, p_id, ro_session):
        """
    Look up biobank orders
    :param p_id: participant id
    :param ro_session: Readonly DAO session object
    :return:
    """
        data = {}
        orders = list()
        baseline_tests = [
            "1ED04", "1ED10", "1HEP4", "1PST8", "2PST8", "1SST8", "2SST8",
            "1PS08", "1SS08", "1UR10", "1CFD9", "1PXR2", "1UR90", "2ED10"
        ]
        try:
            baseline_tests = config.getSettingList(
                'baseline_sample_test_codes')
        except ValueError:
            pass
        except AssertionError:  # unittest errors because of GCP SDK
            pass

        dna_tests = ["1ED10", "2ED10", "1ED04", "1SAL", "1SAL2"]
        try:
            dna_tests = config.getSettingList('dna_sample_test_codes')
        except ValueError:
            pass
        except AssertionError:  # unittest errors because of GCP SDK
            pass

        sql = """
      select bo.biobank_order_id, bo.created, bo.collected_site_id, bo.processed_site_id, bo.finalized_site_id, 
              bos.test, bos.collected, bos.processed, bos.finalized, bo.order_status,
              bss.confirmed as bb_confirmed, bss.created as bb_created, bss.disposed as bb_disposed, 
              bss.status as bb_status, (
                select count(1) from biobank_dv_order bdo where bdo.biobank_order_id = bo.biobank_order_id
              ) as dv_order
        from biobank_order bo inner join biobank_ordered_sample bos on bo.biobank_order_id = bos.order_id
                inner join biobank_order_identifier boi on bo.biobank_order_id = boi.biobank_order_id
                left outer join 
                  biobank_stored_sample bss on boi.`value` = bss.biobank_order_identifier and bos.test = bss.test
        where boi.`system` = 'https://www.pmi-ops.org' and bo.participant_id = :pid
        order by bo.biobank_order_id, bos.test;
    """

        cursor = ro_session.execute(sql, {'pid': p_id})
        results = [r for r in cursor]
        # loop through results and create one order record for each biobank_order_id value.
        for row in results:
            if not filter(
                    lambda order: order['bbo_biobank_order_id'] == row.
                    biobank_order_id, orders):
                orders.append({
                    'bbo_biobank_order_id':
                    row.biobank_order_id,
                    'bbo_created':
                    row.created,
                    'bbo_status':
                    str(
                        BiobankOrderStatus(row.order_status) if row.
                        order_status else BiobankOrderStatus.UNSET),
                    'bbo_status_id':
                    int(
                        BiobankOrderStatus(row.order_status) if row.
                        order_status else BiobankOrderStatus.UNSET),
                    'bbo_dv_order':
                    0 if row.dv_order == 0 else 1,  # Boolean field
                    'bbo_collected_site':
                    self._lookup_site_name(row.collected_site_id, ro_session),
                    'bbo_collected_site_id':
                    row.collected_site_id,
                    'bbo_processed_site':
                    self._lookup_site_name(row.processed_site_id, ro_session),
                    'bbo_processed_site_id':
                    row.processed_site_id,
                    'bbo_finalized_site':
                    self._lookup_site_name(row.finalized_site_id, ro_session),
                    'bbo_finalized_site_id':
                    row.finalized_site_id,
                })
        # loop through results again and add each sample to it's order.
        for row in results:
            # get the order list index for this sample record
            try:
                idx = orders.index(
                    filter(
                        lambda order: order['bbo_biobank_order_id'] == row.
                        biobank_order_id, orders)[0])
            except IndexError:
                continue
            # if we haven't added any samples to this order, create an empty list.
            if 'bbo_samples' not in orders[idx]:
                orders[idx]['bbo_samples'] = list()
            # append the sample to the order
            orders[idx]['bbo_samples'].append({
                'bbs_test':
                row.test,
                'bbs_baseline_test':
                1 if row.test in baseline_tests else 0,  # Boolean field
                'bbs_dna_test':
                1 if row.test in dna_tests else 0,  # Boolean field
                'bbs_collected':
                row.collected,
                'bbs_processed':
                row.processed,
                'bbs_finalized':
                row.finalized,
                'bbs_confirmed':
                row.bb_confirmed,
                'bbs_status':
                str(SampleStatus.RECEIVED) if row.bb_confirmed else None,
                'bbs_status_id':
                int(SampleStatus.RECEIVED) if row.bb_confirmed else None,
                'bbs_created':
                row.bb_created,
                'bbs_disposed':
                row.bb_disposed,
                'bbs_disposed_reason':
                str(SampleStatus(row.bb_status)) if row.bb_status else None,
                'bbs_disposed_reason_id':
                int(SampleStatus(row.bb_status)) if row.bb_status else None,
            })

        if len(orders) > 0:
            data['biobank_orders'] = orders
        return data

    def _calculate_enrollment_status(self, summary):
        """
    Calculate the participant's enrollment status
    :param summary: summary data
    :return: dict
    """
        if 'consents' not in summary:
            return {}
        try:
            baseline_modules = config.getSettingList(
                'baseline_ppi_questionnaire_fields')
        except ValueError:
            baseline_modules = ['TheBasics', 'OverallHealth', 'Lifestyle']

        study_consent = ehr_consent = dvehr_consent = pm_complete = False
        status = None
        # iterate over consents
        for consent in summary['consents']:
            if consent['consent'] == 'ConsentPII':
                study_consent = True
            if consent['consent'] == 'EHRConsentPII_ConsentPermission' and \
                                  consent['consent_value'] == 'ConsentPermission_Yes':
                ehr_consent = True
            if consent['consent'] == 'DVEHRSharing_AreYouInterested' and \
                                  consent['consent_value'] == 'DVEHRSharing_Yes':
                dvehr_consent = True

        # check physical measurements
        if 'pm' in summary and summary['pm']:
            for row in summary['pm']:
                if row['pm_status_id'] == int(
                        PhysicalMeasurementsStatus.COMPLETED):
                    pm_complete = True
                    break

        baseline_module_count = dna_sample_count = 0
        if 'modules' in summary:
            baseline_module_count = len(
                filter(lambda module: module['mod_baseline_module'] == 1,
                       summary['modules']))
        if 'biobank_orders' in summary:
            for order in summary['biobank_orders']:
                if 'bbo_samples' in order:
                    dna_sample_count += len(
                        filter(lambda sample: sample['bbs_dna_test'] == 1,
                               order['bbo_samples']))

        if study_consent:
            status = EnrollmentStatus.INTERESTED
        if ehr_consent or dvehr_consent:
            status = EnrollmentStatus.MEMBER
        if pm_complete and 'modules' in summary and baseline_module_count == len(baseline_modules) and \
                dna_sample_count > 0:
            status = EnrollmentStatus.FULL_PARTICIPANT

        # TODO: Get Enrollment dates for additional fields -> participant_summary_dao.py:499

        # TODO: Calculate EHR status and dates -> participant_summary_dao.py:707

        data = {
            'enrollment_status': str(status) if status else None,
            'enrollment_status_id': int(status) if status else None,
        }
        return data

    def _calculate_distinct_visits(self, summary):  # pylint: disable=unused-argument
        """
    Calculate the distinct number of visits.
    :param summary: summary data
    :return: dict
    """
        def datetime_to_date(val):
            """
      Change from UTC to middle of the US before extracting date. That way if we have an early and late visit
      they will end up as the same day.
      """
            tmp = val.replace(tzinfo=tz.tzutc()).astimezone(
                tz.gettz('America/Denver'))
            return datetime.date(tmp.year, tmp.month, tmp.day)

        data = {}
        dates = list()

        if 'pm' in summary:
            for pm in summary['pm']:
                if pm['pm_status_id'] != int(PhysicalMeasurementsStatus.
                                             CANCELLED) and pm['pm_finalized']:
                    dates.append(datetime_to_date(pm['pm_finalized']))

        if 'biobank_orders' in summary:
            for order in summary['biobank_orders']:
                if order['bbo_status_id'] != int(BiobankOrderStatus.CANCELLED
                                                 ) and 'bbo_samples' in order:
                    for sample in order['bbo_samples']:
                        if 'bbs_finalized' in sample and sample['bbs_finalized'] and \
                                          isinstance(sample['bbs_finalized'], datetime.datetime):
                            dates.append(
                                datetime_to_date(sample['bbs_finalized']))

        dates = list(set(dates))  # de-dup list
        data['distinct_visits'] = len(dates)
        return data