def test_schema_nested_data(self): """ test a BQRecord object with schema and nested data """ record = BQRecord(schema=BQTestSchema, data=self.full_data, convert_to_enum=False) new_data = record.to_dict() self.assertEqual(self.full_data, new_data) # alter some data and verify we are not equal anymore. new_data['nested'][0]['int_field'] = 55 self.assertNotEqual(self.full_data, new_data)
def make_bqrecord(self, p_id, convert_to_enum=False, ps_bqr=None): """ Build a Participant Summary BQRecord object for the given participant id. :param p_id: participant id :param convert_to_enum: If schema field description includes Enum class info, convert value to Enum. :param ps_bqr: A BQParticipantSummary BQRecord object. :return: BQRecord object """ # Since we are primarily a subset of the Participant Summary, call the full Participant Summary generator # and take what we need from it. if not ps_bqr: ps_bqr = BQParticipantSummaryGenerator().make_bqrecord( p_id, convert_to_enum=convert_to_enum) bqr = BQRecord(schema=BQPDRParticipantSummarySchema, data=ps_bqr.to_dict(), convert_to_enum=convert_to_enum) if hasattr(bqr, 'addr_zip') and getattr(bqr, 'addr_zip'): setattr(bqr, 'addr_zip', getattr(bqr, 'addr_zip')[:3]) summary = bqr.to_dict() # Populate BQAnalyticsBiospecimenSchema if there are biobank orders. if hasattr(ps_bqr, 'biobank_orders'): data = {'biospec': list()} for order in ps_bqr.biobank_orders: # Count the number of DNA tests in this order. dna_tests = 0 for test in order.get('bbo_samples', list()): if test['bbs_dna_test'] == 1: dna_tests += 1 data['biospec'].append({ 'biosp_status': order.get('bbo_status', None), 'biosp_status_id': order.get('bbo_status_id', None), 'biosp_order_time': order.get('bbo_created', None), 'biosp_isolate_dna': dna_tests }) summary = self._merge_schema_dicts(summary, data) # Calculate UBR summary = self._merge_schema_dicts(summary, self._calculate_ubr(ps_bqr)) bqr = BQRecord(schema=BQPDRParticipantSummarySchema, data=summary, convert_to_enum=convert_to_enum) return bqr
def _prep_the_basics(self, p_id, ro_session): """ Get the participant's race and gender selections :param p_id: participant id :param ro_session: Readonly DAO session object :return: dict """ qnans = self.ro_dao.call_proc('sp_get_questionnaire_answers', args=['TheBasics', p_id]) if not qnans or len(qnans) == 0: return {} # get race question answers qnan = BQRecord(schema=None, data=qnans[0]) # use only most recent questionnaire. data = {} if qnan.Race_WhatRaceEthnicity: rl = list() for val in qnan.Race_WhatRaceEthnicity.split(','): rl.append({ 'race': val, 'race_id': self._lookup_code_id(val, ro_session) }) data['races'] = rl # get gender question answers gl = list() if qnan.Gender_GenderIdentity: for val in qnan.Gender_GenderIdentity.split(','): if val == 'GenderIdentity_AdditionalOptions': continue gl.append({ 'gender': val, 'gender_id': self._lookup_code_id(val, ro_session) }) # get additional gender answers, if any. if qnan.GenderIdentity_SexualityCloserDescription: for val in qnan.GenderIdentity_SexualityCloserDescription.split( ','): gl.append({ 'gender': val, 'gender_id': self._lookup_code_id(val, ro_session) }) if len(gl) > 0: data['genders'] = gl data['education'] = qnan.EducationLevel_HighestGrade data['education_id'] = self._lookup_code_id( qnan.EducationLevel_HighestGrade, ro_session) data['income'] = qnan.Income_AnnualIncome data['income_id'] = self._lookup_code_id(qnan.Income_AnnualIncome, ro_session) data['sex'] = qnan.BiologicalSexAtBirth_SexAtBirth data['sex_id'] = self._lookup_code_id( qnan.BiologicalSexAtBirth_SexAtBirth, ro_session) data['sexual_orientation'] = qnan.TheBasics_SexualOrientation data['sexual_orientation_id'] = self._lookup_code_id( qnan.TheBasics_SexualOrientation, ro_session) return data
def make_bqrecord(self, hpo_id, convert_to_enum=False, backup=True): """ Build a BQRecord object from the given hpo id. :param hpo_id: Primary key value from hpo table. :param convert_to_enum: If schema field description includes Enum class info, convert value to Enum. :param backup: if True, get from backup database :return: BQRecord object """ ro_dao = BigQuerySyncDao(backup=backup) with ro_dao.session() as ro_session: row = ro_session.execute(text('select * from hpo where hpo_id = :id'), {'id': hpo_id}).first() data = ro_dao.to_dict(row) return BQRecord(schema=BQHPOSchema, data=data, convert_to_enum=convert_to_enum)
def make_bqrecord(self, p_id, convert_to_enum=False): """ Build a Participant Summary BQRecord object for the given participant id. :param p_id: participant id :param convert_to_enum: If schema field description includes Enum class info, convert value to Enum. :return: BQRecord object """ if not self.ro_dao: self.ro_dao = BigQuerySyncDao(backup=True) with self.ro_dao.session() as session: # prep participant info from Participant record summary = self._prep_participant(p_id, session) # prep ConsentPII questionnaire information summary = self._merge_schema_dicts( summary, self._prep_consentpii_answers(p_id, session)) # prep questionnaire modules information, includes gathering extra consents. summary = self._merge_schema_dicts( summary, self._prep_modules(p_id, session)) # prep physical measurements summary = self._merge_schema_dicts( summary, self._prep_physical_measurements(p_id, session)) # prep race and gender summary = self._merge_schema_dicts( summary, self._prep_the_basics(p_id, session)) # prep biobank orders and samples summary = self._merge_schema_dicts( summary, self._prep_biobank_info(p_id, session)) # calculate enrollment status for participant summary = self._merge_schema_dicts( summary, self._calculate_enrollment_status(summary)) # calculate distinct visits summary = self._merge_schema_dicts( summary, self._calculate_distinct_visits(summary)) return BQRecord(schema=BQParticipantSummarySchema, data=summary, convert_to_enum=convert_to_enum)
def test_record_from_bq_data(self): """ test receiving data from bigquery """ schema = BQSchema(schemaFromBQ) record = BQRecord(schema=schema, data=self.bq_data) new_data = record.to_dict() self.assertEqual(self.full_data, new_data)
def test_schema_with_data(self): """ test a BQRecord object with schema and data """ record = BQRecord(schema=BQTestSchema, data=self.partial_data) self.assertEqual(self.partial_data, record.to_dict())
def test_schema_no_data(self): """ test a BQRecord object with only schema """ record = BQRecord(schema=BQTestSchema, data=None) # add partial data record.update_values(self.partial_data) self.assertEqual(self.partial_data, record.to_dict())
def make_bqrecord(self, p_id, module_id, latest=False, convert_to_enum=False): """ Generate a list of questionnaire module BQRecords for the given participant id. :param p_id: participant id :param module_id: A questionnaire module id, IE: 'TheBasics'. :param latest: only process the most recent response if True :param convert_to_enum: If schema field description includes Enum class info, convert value to Enum. :return: BQTable object, List of BQRecord objects """ if not self.ro_dao: self.ro_dao = BigQuerySyncDao(backup=True) if module_id == 'TheBasics': table = BQPDRTheBasics elif module_id == 'ConsentPII': table = BQPDRConsentPII elif module_id == 'Lifestyle': table = BQPDRLifestyle elif module_id == 'OverallHealth': table = BQPDROverallHealth elif module_id == 'DVEHRSharing': table = BQPDRDVEHRSharing elif module_id == 'EHRConsentPII': table = BQPDREHRConsentPII elif module_id == 'FamilyHistory': table = BQPDRFamilyHistory elif module_id == 'HealthcareAccess': table = BQPDRHealthcareAccess elif module_id == 'PersonalMedicalHistory': table = BQPDRPersonalMedicalHistory else: logging.info( 'Generator: ignoring questionnaire module id {0}.'.format( module_id)) return None, list() qnans = self.ro_dao.call_proc('sp_get_questionnaire_answers', args=[module_id, p_id]) if not qnans or len(qnans) == 0: return None, list() bqrs = list() for qnan in qnans: bqr = BQRecord(schema=table().get_schema(), data=qnan, convert_to_enum=convert_to_enum) bqr.participant_id = p_id # reset participant_id. fields = bqr.get_fields() for field in fields: fld_name = field['name'] if fld_name in ('id', 'created', 'modified', 'authored', 'language', 'participant_id', 'questionnaire_response_id'): continue fld_value = getattr(bqr, fld_name, None) if fld_value is None: # Let empty strings pass. continue # question responses values need to be coerced to a String type. if isinstance(fld_value, (datetime.date, datetime.datetime)): setattr(bqr, fld_name, fld_value.isoformat()) else: try: setattr(bqr, fld_name, str(fld_value)) except UnicodeEncodeError: setattr(bqr, fld_name, unicode(fld_value)) # Truncate zip codes to 3 digits if fld_name in ('StreetAddress_PIIZIP', 'EmploymentWorkAddress_ZipCode' ) and len(fld_value) > 2: setattr(bqr, fld_name, fld_value[:3]) bqrs.append(bqr) if latest: break return table, bqrs
def _prep_modules(self, p_id, ro_session): """ Find all questionnaire modules the participant has completed and loop through them. :param p_id: participant id :param ro_session: Readonly DAO session object :return: dict """ code_id_query = ro_session.query(func.max(QuestionnaireConcept.codeId)).\ filter(QuestionnaireResponse.questionnaireId == QuestionnaireConcept.questionnaireId).label('codeId') query = ro_session.query( QuestionnaireResponse.questionnaireResponseId, QuestionnaireResponse.authored, QuestionnaireResponse.created, QuestionnaireResponse.language, code_id_query).\ filter(QuestionnaireResponse.participantId == p_id).\ order_by(QuestionnaireResponse.questionnaireResponseId) # sql = self.dao.query_to_text(query) results = query.all() data = dict() modules = list() consents = list() baseline_modules = ['TheBasics', 'OverallHealth', 'Lifestyle'] try: baseline_modules = config.getSettingList( 'baseline_ppi_questionnaire_fields') except ValueError: pass except AssertionError: # unittest errors because of GCP SDK pass consent_modules = { # module: question code string 'DVEHRSharing': 'DVEHRSharing_AreYouInterested', 'EHRConsentPII': 'EHRConsentPII_ConsentPermission', } if results: for row in results: module_name = self._lookup_code_value(row.codeId, ro_session) modules.append({ 'mod_module': module_name, 'mod_baseline_module': 1 if module_name in baseline_modules else 0, # Boolean field 'mod_authored': row.authored, 'mod_created': row.created, 'mod_language': row.language, 'mod_status': BQModuleStatusEnum.SUBMITTED.name, 'mod_status_id': BQModuleStatusEnum.SUBMITTED.value, }) # check if this is a module with consents. if module_name not in consent_modules: continue qnans = self.ro_dao.call_proc('sp_get_questionnaire_answers', args=[module_name, p_id]) if qnans and len(qnans) > 0: qnan = BQRecord( schema=None, data=qnans[0]) # use only most recent questionnaire. consents.append({ 'consent': consent_modules[module_name], 'consent_id': self._lookup_code_id(consent_modules[module_name], ro_session), 'consent_date': parser.parse(qnan.authored).date() if qnan.authored else None, 'consent_value': qnan[consent_modules[module_name]], 'consent_value_id': self._lookup_code_id( qnan[consent_modules[module_name]], ro_session), }) if len(modules) > 0: data['modules'] = modules if len(consents) > 0: data['consents'] = consents return data
def _prep_consentpii_answers(self, p_id, ro_session): """ Get participant information from the ConsentPII questionnaire :param p_id: participant id :param ro_session: Readonly DAO session object :return: dict """ qnans = self.ro_dao.call_proc('sp_get_questionnaire_answers', args=['ConsentPII', p_id]) if not qnans or len(qnans) == 0: # return the minimum data required when we don't have the questionnaire data. return {'email': None, 'is_ghost_id': 0} qnan = BQRecord(schema=None, data=qnans[0]) # use only most recent response. # TODO: We may need to use the first response to set consent dates, # unless the consent value changed across response records. data = { 'first_name': qnan.PIIName_First, 'middle_name': qnan.PIIName_Middle, 'last_name': qnan.PIIName_Last, 'date_of_birth': qnan.PIIBirthInformation_BirthDate, 'primary_language': qnan.language, 'email': qnan.ConsentPII_EmailAddress, 'phone_number': qnan.PIIContactInformation_Phone, 'login_phone_number': qnan.ConsentPII_VerifiedPrimaryPhoneNumber, 'addresses': [{ 'addr_type': BQStreetAddressTypeEnum.RESIDENCE.name, 'addr_type_id': BQStreetAddressTypeEnum.RESIDENCE.value, 'addr_street_address_1': qnan.PIIAddress_StreetAddress, 'addr_street_address_2': qnan.PIIAddress_StreetAddress2, 'addr_city': qnan.StreetAddress_PIICity, 'addr_state': qnan.StreetAddress_PIIState.replace('PIIState_', '').upper() if qnan.StreetAddress_PIIState else None, 'addr_zip': qnan.StreetAddress_PIIZIP, 'addr_country': 'US' }], 'consents': [ { 'consent': 'ConsentPII', 'consent_id': self._lookup_code_id('ConsentPII', ro_session), 'consent_date': parser.parse(qnan.authored).date() if qnan.authored else None, 'consent_value': 'ConsentPermission_Yes', 'consent_value_id': self._lookup_code_id('ConsentPermission_Yes', ro_session), }, ] } return data