def bq_hpo_update(project_id=None): """ Generate all new HPO records for BQ. Since there is called from a tool, this is not deferred. :param project_id: Override the project_id """ ro_dao = BigQuerySyncDao(backup=True) with ro_dao.session() as ro_session: gen = BQHPOGenerator() results = ro_session.query(HPO.hpoId).all() w_dao = BigQuerySyncDao() with w_dao.session() as w_session: logging.info('BQ HPO table: rebuilding {0} records...'.format(len(results))) for row in results: bqr = gen.make_bqrecord(row.hpoId) gen.save_bqrecord(row.hpoId, bqr, bqtable=BQHPO, w_dao=w_dao, w_session=w_session, project_id=project_id)
def bq_hpo_update_by_id(hpo_id): gen = BQHPOGenerator() # get from main database in case the backup is not synch in time bqr = gen.make_bqrecord(hpo_id, backup=False) w_dao = BigQuerySyncDao() with w_dao.session() as w_session: gen.save_bqrecord(hpo_id, bqr, bqtable=BQHPO, w_dao=w_dao, w_session=w_session)
def bq_organization_update_by_id(org_id): gen = BQOrganizationGenerator() bqr = gen.make_bqrecord(org_id, backup=False) w_dao = BigQuerySyncDao() with w_dao.session() as w_session: gen.save_bqrecord(org_id, bqr, bqtable=BQOrganization, w_dao=w_dao, w_session=w_session)
def bq_site_update_by_id(site_id): gen = BQSiteGenerator() bqr = gen.make_bqrecord(site_id, backup=False) w_dao = BigQuerySyncDao() with w_dao.session() as w_session: gen.save_bqrecord(site_id, bqr, bqtable=BQSite, w_dao=w_dao, w_session=w_session)
def deferrered_bq_codebook_update(): """ Generate all new Codebook records for BQ. """ ro_dao = BigQuerySyncDao(backup=True) with ro_dao.session() as ro_session: gen = BQCodeGenerator() results = ro_session.query(Code.codeId).all() w_dao = BigQuerySyncDao() with w_dao.session() as w_session: logging.info('Code table: rebuilding {0} records...'.format( len(results))) for row in results: bqr = gen.make_bqrecord(row.codeId) gen.save_bqrecord(row.codeId, bqr, bqtable=BQCode, w_dao=w_dao, w_session=w_session)
def deferred_bq_questionnaire_update(p_id, qr_id): """ Generate a BQ questionnaire response record from the given p_id and questionnaire response id. :param p_id: participant id :param qr_id: A questionnaire response id. """ # determine what the module id is for the given questionnaire response id. sql = text(""" select c.value from questionnaire_response qr inner join questionnaire_concept qc on qr.questionnaire_id = qc.questionnaire_id inner join code c on qc.code_id = c.code_id where qr.questionnaire_response_id = :qr_id """) ro_dao = BigQuerySyncDao(backup=True) w_dao = BigQuerySyncDao() qr_gen = BQPDRQuestionnaireResponseGenerator() module_id = None with ro_dao.session() as ro_session: results = ro_session.execute(sql, {'qr_id': qr_id}) if results: for row in results: module_id = row.value break if not module_id: logging.warning( 'No questionnaire module id found for questionnaire response id {0}' .format(qr_id)) return table, bqrs = qr_gen.make_bqrecord(p_id, module_id, latest=True) with w_dao.session() as w_session: for bqr in bqrs: qr_gen.save_bqrecord(qr_id, bqr, bqtable=table, w_dao=w_dao, w_session=w_session)
def make_bqrecord(self, hpo_id, convert_to_enum=False, backup=True): """ Build a BQRecord object from the given hpo id. :param hpo_id: Primary key value from hpo table. :param convert_to_enum: If schema field description includes Enum class info, convert value to Enum. :param backup: if True, get from backup database :return: BQRecord object """ ro_dao = BigQuerySyncDao(backup=backup) with ro_dao.session() as ro_session: row = ro_session.execute(text('select * from hpo where hpo_id = :id'), {'id': hpo_id}).first() data = ro_dao.to_dict(row) return BQRecord(schema=BQHPOSchema, data=data, convert_to_enum=convert_to_enum)
def rebuild_bigquery_handler(): """ Cron job handler, setup queued tasks to rebuild bigquery data # TODO: Future: Currently rebuild is synchronous, this could be asynchronous if we # passed a set of participant ids to each task. GET requests are limited to 2,083 # characters, so we probably would have to create a temp MySQL table to store # batches of participant ids and then we could pass a batch id in the GET request. """ timestamp = datetime.utcnow() batch_size = 300 ro_dao = BigQuerySyncDao(backup=True) with ro_dao.session() as ro_session: total_rows = ro_session.query(func.count( Participant.participantId)).first()[0] count = int(math.ceil(float(total_rows) / float(batch_size))) logging.info( 'Calculated {0} tasks from {1} records and a batch size of {2}.'. format(count, total_rows, batch_size)) while count > 0: task = taskqueue.add(queue_name='bigquery-rebuild', url='/rdr/v1/BQRebuildTaskApi', method='GET', target='worker', params={ 'timestamp': timestamp, 'limit': batch_size }) logging.info('Task {} enqueued, ETA {}.'.format( task.name, task.eta)) count -= 1 # # Process tables that don't need to be broken up into smaller tasks. # # Code Table deferrered_bq_codebook_update() # HPO Table bq_hpo_update() # Organization Table bq_organization_update() # Site Table bq_site_update()
def rebuild_bq_participant(p_id, ps_bqgen=None, pdr_bqgen=None): """ Rebuild a BQ record for a specific participant :param p_id: participant id :param ps_bqgen: BQParticipantSummaryGenerator object :param pdr_bqgen: BQPDRParticipantSummaryGenerator object :return: """ # Allow for batch requests to rebuild participant summary data. if not ps_bqgen: ps_bqgen = BQParticipantSummaryGenerator() if not pdr_bqgen: from dao.bq_pdr_participant_summary_dao import BQPDRParticipantSummaryGenerator pdr_bqgen = BQPDRParticipantSummaryGenerator() ps_bqr = ps_bqgen.make_bqrecord(p_id) # Since the PDR participant summary is primarily a subset of the Participant Summary, call the full # Participant Summary generator and take what we need from it. pdr_bqr = pdr_bqgen.make_bqrecord(p_id, ps_bqr=ps_bqr) w_dao = BigQuerySyncDao() with w_dao.session() as w_session: # save the participant summary record. ps_bqgen.save_bqrecord(p_id, ps_bqr, bqtable=BQParticipantSummary, w_dao=w_dao, w_session=w_session) # save the PDR participant summary record pdr_bqgen.save_bqrecord(p_id, pdr_bqr, bqtable=BQPDRParticipantSummary, w_dao=w_dao, w_session=w_session) return ps_bqr
def get_fields(self): """ Look up a participant id who has submitted this module and then get the module response answers to use for creating the schema. :return: list of fields """ fields = list() self._fields = list() # Standard fields that must be in every BigQuery table. fields.append({ 'name': 'id', 'type': BQFieldTypeEnum.INTEGER.name, 'mode': BQFieldModeEnum.REQUIRED.name }) fields.append({ 'name': 'created', 'type': BQFieldTypeEnum.DATETIME.name, 'mode': BQFieldModeEnum.REQUIRED.name }) fields.append({ 'name': 'modified', 'type': BQFieldTypeEnum.DATETIME.name, 'mode': BQFieldModeEnum.REQUIRED.name }) fields.append({ 'name': 'authored', 'type': BQFieldTypeEnum.DATETIME.name, 'mode': BQFieldModeEnum.NULLABLE.name }) fields.append({ 'name': 'language', 'type': BQFieldTypeEnum.STRING.name, 'mode': BQFieldModeEnum.NULLABLE.name }) fields.append({ 'name': 'participant_id', 'type': BQFieldTypeEnum.INTEGER.name, 'mode': BQFieldModeEnum.REQUIRED.name }) fields.append({ 'name': 'questionnaire_response_id', 'type': BQFieldTypeEnum.INTEGER.name, 'mode': BQFieldModeEnum.REQUIRED.name }) dao = BigQuerySyncDao() _sql_term = text(""" select convert(qh.resource using utf8) as resource from questionnaire_concept qc inner join code c on qc.code_id = c.code_id inner join questionnaire_history qh on qc.questionnaire_id = qh.questionnaire_id and qc.questionnaire_version = qh.version where c.value = :mod order by qh.created desc limit 1; """) with dao.session() as session: # get a participant id that has submitted the module result = session.execute(_sql_term, {'mod': self._module}).first() if not result: return fields qn_mod = json.loads(result[0]) if 'resourceType' not in qn_mod or 'group' not in qn_mod: return fields for qn in qn_mod['group']['question']: # To support 1) The user supplied answer, 2) question skipped or 3) user was not given this question. We # have to store all question responses as Strings in BigQuery. field = qn['concept'][0].get('code', None) if not field: continue name = qn['concept'][0]['code'] if name in self._excluded_fields: continue field = dict() field['name'] = name field['type'] = BQFieldTypeEnum.STRING.name field['mode'] = BQFieldModeEnum.NULLABLE.name field['enum'] = None fields.append(field) return fields
def rebuild_bq_participant_task(timestamp, limit=0): """ Loop through all participants and generate the BQ participant summary data and store it in the biguqery_sync table. Warning: this will force a rebuild and eventually a re-sync for every participant record. :param timestamp: datetime: to be used to rebuild any records old than this. :param limit: integer: 0 = all, otherwise only process records until limit has been reached. """ if not limit or not isinstance(limit, int) or limit < 0: raise ValueError('invalid limit value.') if not timestamp: timestamp = datetime.utcnow() # try: # app_id = app_identity.get_application_id() # except AttributeError: # app_id = 'localhost' ro_dao = BigQuerySyncDao(backup=True) ps_bqgen = BQParticipantSummaryGenerator() pdr_bqgen = BQPDRParticipantSummaryGenerator() mod_bqgen = BQPDRQuestionnaireResponseGenerator() with ro_dao.session() as ro_session: count = 0 while limit: limit -= 1 # Collect all participants who do not have a PS generated yet or the modified date is less than the timestamp. sq = ro_session.query(Participant.participantId, BigQuerySync.id, BigQuerySync.modified).\ outerjoin(BigQuerySync, and_( BigQuerySync.pk_id == Participant.participantId, BigQuerySync.tableId.in_(('participant_summary', 'pdr_participant')))).subquery() query = ro_session.query(sq.c.participant_id.label('participantId')).\ filter(or_(sq.c.id == None, sq.c.modified < timestamp)).\ order_by(sq.c.modified) query = query.limit(1) # sql = dao.query_to_text(query) results = query.all() for row in results: count += 1 # All logic for generating a participant summary is here. rebuild_bq_participant(row.participantId, ps_bqgen=ps_bqgen, pdr_bqgen=pdr_bqgen) # Generate participant questionnaire module response data modules = (BQPDRConsentPII, BQPDRTheBasics, BQPDRLifestyle, BQPDROverallHealth, BQPDREHRConsentPII, BQPDRDVEHRSharing) for module in modules: mod = module() table, mod_bqrs = mod_bqgen.make_bqrecord( row.participantId, mod.get_schema().get_module_name()) if not table: continue w_dao = BigQuerySyncDao() with w_dao.session() as w_session: for mod_bqr in mod_bqrs: mod_bqgen.save_bqrecord( mod_bqr.questionnaire_response_id, mod_bqr, bqtable=table, w_dao=w_dao, w_session=w_session) logging.info( 'Rebuilt BigQuery data for {0} participants.'.format(count))
def sync_bigquery_handler(dryrun=False): """ Cron entry point, Sync MySQL records to bigquery. :param dryrun: Don't send to bigquery if True Links for Streaming Inserts: # https://cloud.google.com/bigquery/streaming-data-into-bigquery # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency # https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll # https://cloud.google.com/bigquery/troubleshooting-errors#streaming """ ro_dao = BigQuerySyncDao(backup=True) bq = build('bigquery', 'v2') if dryrun is False else None total_inserts = 0 # Google says maximum of 500 in a batch. Pretty sure they are talking about log shipping, I believe # that one participant summary record is larger than their expected average log record size. batch_size = 250 run_limit = ( 2 * 60 ) - 10 # Only run for 110 seconds before exiting, so we don't have overlapping cron jobs. limit_reached = False start_ts = datetime.now() table_list = list() with ro_dao.session() as ro_session: tables = ro_session.query(BigQuerySync.projectId, BigQuerySync.datasetId, BigQuerySync.tableId).\ distinct(BigQuerySync.projectId, BigQuerySync.datasetId, BigQuerySync.tableId).all() # don't always process the list in the same order so we don't get stuck processing the same table each run. for table_row in tables: table_list.append( (table_row.projectId, table_row.datasetId, table_row.tableId)) random.shuffle(table_list) for item in table_list: project_id = item[0] dataset_id = item[1] table_id = item[2] count = 0 errors = '' error_count = 0 try: # pylint: disable=unused-variable max_created, max_modified = _get_remote_max_timestamps(project_id, dataset_id, table_id) \ if dryrun is False else (datetime.min, datetime.min) except httplib.HTTPException: logging.warning( 'Failed to retrieve max date values from bigquery, skipping this run.' ) return 0 # figure out how many records need to be sync'd and divide into slices. total_rows = ro_session.query(BigQuerySync.id). \ filter(BigQuerySync.projectId == project_id, BigQuerySync.tableId == table_id, BigQuerySync.datasetId == dataset_id, BigQuerySync.modified >= max_modified).count() if total_rows == 0: logging.info('No rows to sync for {0}.{1}.'.format( dataset_id, table_id)) continue slices = int(math.ceil(float(total_rows) / float(batch_size))) slice_num = 0 while slice_num < slices: results = ro_session.query(BigQuerySync.id, BigQuerySync.created, BigQuerySync.modified). \ filter(BigQuerySync.projectId == project_id, BigQuerySync.tableId == table_id, BigQuerySync.datasetId == dataset_id, BigQuerySync.modified >= max_modified).\ order_by(BigQuerySync.modified).limit(batch_size).all() slice_num += 1 batch = list() for row in results: count += 1 max_modified = row.modified rec = ro_session.query(BigQuerySync.resource).filter( BigQuerySync.id == row.id).first() if isinstance(rec.resource, (str, unicode)): rec_data = json.loads(rec.resource) else: rec_data = rec.resource rec_data['id'] = row.id rec_data['created'] = row.created.isoformat() rec_data['modified'] = row.modified.isoformat() data = {'insertId': str(row.id), 'json': rec_data} batch.append(data) if len(batch) > 0: result, resp = insert_batch_into_bq( bq, project_id, dataset_id, table_id, batch, dryrun) if result is False: # errors are cumulative, so wait until the end of the while # statement before printing errors. errors = resp error_count += len(resp['insertErrors']) # Don't exceed our execution time limit. if (datetime.now() - start_ts).seconds > run_limit: logging.info( 'Hit {0} second time limit.'.format(run_limit)) limit_reached = True break if errors: logging.error(errors) total_inserts += (count - error_count) msg = '{0} inserts and {1} errors for {2}.{3}.{4}'.format( count, error_count, project_id, dataset_id, table_id) if error_count == 0: logging.info(msg) else: logging.info(msg) if limit_reached: break return total_inserts
class BQParticipantSummaryGenerator(BigQueryGenerator): """ Generate a Participant Summary BQRecord object """ ro_dao = None def make_bqrecord(self, p_id, convert_to_enum=False): """ Build a Participant Summary BQRecord object for the given participant id. :param p_id: participant id :param convert_to_enum: If schema field description includes Enum class info, convert value to Enum. :return: BQRecord object """ if not self.ro_dao: self.ro_dao = BigQuerySyncDao(backup=True) with self.ro_dao.session() as session: # prep participant info from Participant record summary = self._prep_participant(p_id, session) # prep ConsentPII questionnaire information summary = self._merge_schema_dicts( summary, self._prep_consentpii_answers(p_id, session)) # prep questionnaire modules information, includes gathering extra consents. summary = self._merge_schema_dicts( summary, self._prep_modules(p_id, session)) # prep physical measurements summary = self._merge_schema_dicts( summary, self._prep_physical_measurements(p_id, session)) # prep race and gender summary = self._merge_schema_dicts( summary, self._prep_the_basics(p_id, session)) # prep biobank orders and samples summary = self._merge_schema_dicts( summary, self._prep_biobank_info(p_id, session)) # calculate enrollment status for participant summary = self._merge_schema_dicts( summary, self._calculate_enrollment_status(summary)) # calculate distinct visits summary = self._merge_schema_dicts( summary, self._calculate_distinct_visits(summary)) return BQRecord(schema=BQParticipantSummarySchema, data=summary, convert_to_enum=convert_to_enum) def _prep_participant(self, p_id, session): """ Get the information from the participant record :param p_id: participant id :param session: DAO session object :return: dict """ p = session.query(Participant).filter( Participant.participantId == p_id).first() if not p: raise LookupError( 'participant lookup for P{0} failed.'.format(p_id)) hpo = session.query(HPO.name).filter(HPO.hpoId == p.hpoId).first() organization = session.query(Organization.externalId). \ filter(Organization.organizationId == p.organizationId).first() withdrawal_status = WithdrawalStatus(p.withdrawalStatus) withdrawal_reason = WithdrawalReason( p.withdrawalReason if p.withdrawalReason else 0) suspension_status = SuspensionStatus(p.suspensionStatus) data = { 'participant_id': p_id, 'biobank_id': p.biobankId, 'last_modified': p.lastModified, 'sign_up_time': p.signUpTime, 'hpo': hpo.name if hpo else None, 'hpo_id': p.hpoId, 'organization': organization.externalId if organization else None, 'organization_id': p.organizationId, 'withdrawal_status': str(withdrawal_status), 'withdrawal_status_id': int(withdrawal_status), 'withdrawal_reason': str(withdrawal_reason), 'withdrawal_reason_id': int(withdrawal_reason), 'withdrawal_time': p.withdrawalTime, 'withdrawal_authored': p.withdrawalAuthored, 'withdrawal_reason_justification': p.withdrawalReasonJustification, 'suspension_status': str(suspension_status), 'suspension_status_id': int(suspension_status), 'suspension_time': p.suspensionTime, 'site': self._lookup_site_name(p.siteId, session), 'site_id': p.siteId, 'is_ghost_id': 1 if p.isGhostId is True else 0 } return data def _prep_consentpii_answers(self, p_id, ro_session): """ Get participant information from the ConsentPII questionnaire :param p_id: participant id :param ro_session: Readonly DAO session object :return: dict """ qnans = self.ro_dao.call_proc('sp_get_questionnaire_answers', args=['ConsentPII', p_id]) if not qnans or len(qnans) == 0: # return the minimum data required when we don't have the questionnaire data. return {'email': None, 'is_ghost_id': 0} qnan = BQRecord(schema=None, data=qnans[0]) # use only most recent response. # TODO: We may need to use the first response to set consent dates, # unless the consent value changed across response records. data = { 'first_name': qnan.PIIName_First, 'middle_name': qnan.PIIName_Middle, 'last_name': qnan.PIIName_Last, 'date_of_birth': qnan.PIIBirthInformation_BirthDate, 'primary_language': qnan.language, 'email': qnan.ConsentPII_EmailAddress, 'phone_number': qnan.PIIContactInformation_Phone, 'login_phone_number': qnan.ConsentPII_VerifiedPrimaryPhoneNumber, 'addresses': [{ 'addr_type': BQStreetAddressTypeEnum.RESIDENCE.name, 'addr_type_id': BQStreetAddressTypeEnum.RESIDENCE.value, 'addr_street_address_1': qnan.PIIAddress_StreetAddress, 'addr_street_address_2': qnan.PIIAddress_StreetAddress2, 'addr_city': qnan.StreetAddress_PIICity, 'addr_state': qnan.StreetAddress_PIIState.replace('PIIState_', '').upper() if qnan.StreetAddress_PIIState else None, 'addr_zip': qnan.StreetAddress_PIIZIP, 'addr_country': 'US' }], 'consents': [ { 'consent': 'ConsentPII', 'consent_id': self._lookup_code_id('ConsentPII', ro_session), 'consent_date': parser.parse(qnan.authored).date() if qnan.authored else None, 'consent_value': 'ConsentPermission_Yes', 'consent_value_id': self._lookup_code_id('ConsentPermission_Yes', ro_session), }, ] } return data def _prep_modules(self, p_id, ro_session): """ Find all questionnaire modules the participant has completed and loop through them. :param p_id: participant id :param ro_session: Readonly DAO session object :return: dict """ code_id_query = ro_session.query(func.max(QuestionnaireConcept.codeId)).\ filter(QuestionnaireResponse.questionnaireId == QuestionnaireConcept.questionnaireId).label('codeId') query = ro_session.query( QuestionnaireResponse.questionnaireResponseId, QuestionnaireResponse.authored, QuestionnaireResponse.created, QuestionnaireResponse.language, code_id_query).\ filter(QuestionnaireResponse.participantId == p_id).\ order_by(QuestionnaireResponse.questionnaireResponseId) # sql = self.dao.query_to_text(query) results = query.all() data = dict() modules = list() consents = list() baseline_modules = ['TheBasics', 'OverallHealth', 'Lifestyle'] try: baseline_modules = config.getSettingList( 'baseline_ppi_questionnaire_fields') except ValueError: pass except AssertionError: # unittest errors because of GCP SDK pass consent_modules = { # module: question code string 'DVEHRSharing': 'DVEHRSharing_AreYouInterested', 'EHRConsentPII': 'EHRConsentPII_ConsentPermission', } if results: for row in results: module_name = self._lookup_code_value(row.codeId, ro_session) modules.append({ 'mod_module': module_name, 'mod_baseline_module': 1 if module_name in baseline_modules else 0, # Boolean field 'mod_authored': row.authored, 'mod_created': row.created, 'mod_language': row.language, 'mod_status': BQModuleStatusEnum.SUBMITTED.name, 'mod_status_id': BQModuleStatusEnum.SUBMITTED.value, }) # check if this is a module with consents. if module_name not in consent_modules: continue qnans = self.ro_dao.call_proc('sp_get_questionnaire_answers', args=[module_name, p_id]) if qnans and len(qnans) > 0: qnan = BQRecord( schema=None, data=qnans[0]) # use only most recent questionnaire. consents.append({ 'consent': consent_modules[module_name], 'consent_id': self._lookup_code_id(consent_modules[module_name], ro_session), 'consent_date': parser.parse(qnan.authored).date() if qnan.authored else None, 'consent_value': qnan[consent_modules[module_name]], 'consent_value_id': self._lookup_code_id( qnan[consent_modules[module_name]], ro_session), }) if len(modules) > 0: data['modules'] = modules if len(consents) > 0: data['consents'] = consents return data def _prep_the_basics(self, p_id, ro_session): """ Get the participant's race and gender selections :param p_id: participant id :param ro_session: Readonly DAO session object :return: dict """ qnans = self.ro_dao.call_proc('sp_get_questionnaire_answers', args=['TheBasics', p_id]) if not qnans or len(qnans) == 0: return {} # get race question answers qnan = BQRecord(schema=None, data=qnans[0]) # use only most recent questionnaire. data = {} if qnan.Race_WhatRaceEthnicity: rl = list() for val in qnan.Race_WhatRaceEthnicity.split(','): rl.append({ 'race': val, 'race_id': self._lookup_code_id(val, ro_session) }) data['races'] = rl # get gender question answers gl = list() if qnan.Gender_GenderIdentity: for val in qnan.Gender_GenderIdentity.split(','): if val == 'GenderIdentity_AdditionalOptions': continue gl.append({ 'gender': val, 'gender_id': self._lookup_code_id(val, ro_session) }) # get additional gender answers, if any. if qnan.GenderIdentity_SexualityCloserDescription: for val in qnan.GenderIdentity_SexualityCloserDescription.split( ','): gl.append({ 'gender': val, 'gender_id': self._lookup_code_id(val, ro_session) }) if len(gl) > 0: data['genders'] = gl data['education'] = qnan.EducationLevel_HighestGrade data['education_id'] = self._lookup_code_id( qnan.EducationLevel_HighestGrade, ro_session) data['income'] = qnan.Income_AnnualIncome data['income_id'] = self._lookup_code_id(qnan.Income_AnnualIncome, ro_session) data['sex'] = qnan.BiologicalSexAtBirth_SexAtBirth data['sex_id'] = self._lookup_code_id( qnan.BiologicalSexAtBirth_SexAtBirth, ro_session) data['sexual_orientation'] = qnan.TheBasics_SexualOrientation data['sexual_orientation_id'] = self._lookup_code_id( qnan.TheBasics_SexualOrientation, ro_session) return data def _prep_physical_measurements(self, p_id, ro_session): """ Get participant's physical measurements information :param p_id: participant id :param ro_session: Readonly DAO session object :return: dict """ data = {} pm_list = list() query = ro_session.query(PhysicalMeasurements.created, PhysicalMeasurements.createdSiteId, PhysicalMeasurements.final, PhysicalMeasurements.finalized, PhysicalMeasurements.finalizedSiteId, PhysicalMeasurements.status, PhysicalMeasurements.cancelledTime).\ filter(PhysicalMeasurements.participantId == p_id).\ order_by(desc(PhysicalMeasurements.created)) # sql = self.dao.query_to_text(query) results = query.all() for row in results: # row.status is not really used, we can only determine status if not row.cancelledTime: status = PhysicalMeasurementsStatus.COMPLETED else: status = PhysicalMeasurementsStatus.CANCELLED pm_list.append({ 'pm_status': str(status), 'pm_status_id': int(status), 'pm_created': row.created, 'pm_created_site': self._lookup_site_name(row.createdSiteId, ro_session), 'pm_created_site_id': row.createdSiteId, 'pm_finalized': row.finalized, 'pm_finalized_site': self._lookup_site_name(row.finalizedSiteId, ro_session), 'pm_finalized_site_id': row.finalizedSiteId, }) if len(pm_list) > 0: data['pm'] = pm_list return data def _prep_biobank_info(self, p_id, ro_session): """ Look up biobank orders :param p_id: participant id :param ro_session: Readonly DAO session object :return: """ data = {} orders = list() baseline_tests = [ "1ED04", "1ED10", "1HEP4", "1PST8", "2PST8", "1SST8", "2SST8", "1PS08", "1SS08", "1UR10", "1CFD9", "1PXR2", "1UR90", "2ED10" ] try: baseline_tests = config.getSettingList( 'baseline_sample_test_codes') except ValueError: pass except AssertionError: # unittest errors because of GCP SDK pass dna_tests = ["1ED10", "2ED10", "1ED04", "1SAL", "1SAL2"] try: dna_tests = config.getSettingList('dna_sample_test_codes') except ValueError: pass except AssertionError: # unittest errors because of GCP SDK pass sql = """ select bo.biobank_order_id, bo.created, bo.collected_site_id, bo.processed_site_id, bo.finalized_site_id, bos.test, bos.collected, bos.processed, bos.finalized, bo.order_status, bss.confirmed as bb_confirmed, bss.created as bb_created, bss.disposed as bb_disposed, bss.status as bb_status, ( select count(1) from biobank_dv_order bdo where bdo.biobank_order_id = bo.biobank_order_id ) as dv_order from biobank_order bo inner join biobank_ordered_sample bos on bo.biobank_order_id = bos.order_id inner join biobank_order_identifier boi on bo.biobank_order_id = boi.biobank_order_id left outer join biobank_stored_sample bss on boi.`value` = bss.biobank_order_identifier and bos.test = bss.test where boi.`system` = 'https://www.pmi-ops.org' and bo.participant_id = :pid order by bo.biobank_order_id, bos.test; """ cursor = ro_session.execute(sql, {'pid': p_id}) results = [r for r in cursor] # loop through results and create one order record for each biobank_order_id value. for row in results: if not filter( lambda order: order['bbo_biobank_order_id'] == row. biobank_order_id, orders): orders.append({ 'bbo_biobank_order_id': row.biobank_order_id, 'bbo_created': row.created, 'bbo_status': str( BiobankOrderStatus(row.order_status) if row. order_status else BiobankOrderStatus.UNSET), 'bbo_status_id': int( BiobankOrderStatus(row.order_status) if row. order_status else BiobankOrderStatus.UNSET), 'bbo_dv_order': 0 if row.dv_order == 0 else 1, # Boolean field 'bbo_collected_site': self._lookup_site_name(row.collected_site_id, ro_session), 'bbo_collected_site_id': row.collected_site_id, 'bbo_processed_site': self._lookup_site_name(row.processed_site_id, ro_session), 'bbo_processed_site_id': row.processed_site_id, 'bbo_finalized_site': self._lookup_site_name(row.finalized_site_id, ro_session), 'bbo_finalized_site_id': row.finalized_site_id, }) # loop through results again and add each sample to it's order. for row in results: # get the order list index for this sample record try: idx = orders.index( filter( lambda order: order['bbo_biobank_order_id'] == row. biobank_order_id, orders)[0]) except IndexError: continue # if we haven't added any samples to this order, create an empty list. if 'bbo_samples' not in orders[idx]: orders[idx]['bbo_samples'] = list() # append the sample to the order orders[idx]['bbo_samples'].append({ 'bbs_test': row.test, 'bbs_baseline_test': 1 if row.test in baseline_tests else 0, # Boolean field 'bbs_dna_test': 1 if row.test in dna_tests else 0, # Boolean field 'bbs_collected': row.collected, 'bbs_processed': row.processed, 'bbs_finalized': row.finalized, 'bbs_confirmed': row.bb_confirmed, 'bbs_status': str(SampleStatus.RECEIVED) if row.bb_confirmed else None, 'bbs_status_id': int(SampleStatus.RECEIVED) if row.bb_confirmed else None, 'bbs_created': row.bb_created, 'bbs_disposed': row.bb_disposed, 'bbs_disposed_reason': str(SampleStatus(row.bb_status)) if row.bb_status else None, 'bbs_disposed_reason_id': int(SampleStatus(row.bb_status)) if row.bb_status else None, }) if len(orders) > 0: data['biobank_orders'] = orders return data def _calculate_enrollment_status(self, summary): """ Calculate the participant's enrollment status :param summary: summary data :return: dict """ if 'consents' not in summary: return {} try: baseline_modules = config.getSettingList( 'baseline_ppi_questionnaire_fields') except ValueError: baseline_modules = ['TheBasics', 'OverallHealth', 'Lifestyle'] study_consent = ehr_consent = dvehr_consent = pm_complete = False status = None # iterate over consents for consent in summary['consents']: if consent['consent'] == 'ConsentPII': study_consent = True if consent['consent'] == 'EHRConsentPII_ConsentPermission' and \ consent['consent_value'] == 'ConsentPermission_Yes': ehr_consent = True if consent['consent'] == 'DVEHRSharing_AreYouInterested' and \ consent['consent_value'] == 'DVEHRSharing_Yes': dvehr_consent = True # check physical measurements if 'pm' in summary and summary['pm']: for row in summary['pm']: if row['pm_status_id'] == int( PhysicalMeasurementsStatus.COMPLETED): pm_complete = True break baseline_module_count = dna_sample_count = 0 if 'modules' in summary: baseline_module_count = len( filter(lambda module: module['mod_baseline_module'] == 1, summary['modules'])) if 'biobank_orders' in summary: for order in summary['biobank_orders']: if 'bbo_samples' in order: dna_sample_count += len( filter(lambda sample: sample['bbs_dna_test'] == 1, order['bbo_samples'])) if study_consent: status = EnrollmentStatus.INTERESTED if ehr_consent or dvehr_consent: status = EnrollmentStatus.MEMBER if pm_complete and 'modules' in summary and baseline_module_count == len(baseline_modules) and \ dna_sample_count > 0: status = EnrollmentStatus.FULL_PARTICIPANT # TODO: Get Enrollment dates for additional fields -> participant_summary_dao.py:499 # TODO: Calculate EHR status and dates -> participant_summary_dao.py:707 data = { 'enrollment_status': str(status) if status else None, 'enrollment_status_id': int(status) if status else None, } return data def _calculate_distinct_visits(self, summary): # pylint: disable=unused-argument """ Calculate the distinct number of visits. :param summary: summary data :return: dict """ def datetime_to_date(val): """ Change from UTC to middle of the US before extracting date. That way if we have an early and late visit they will end up as the same day. """ tmp = val.replace(tzinfo=tz.tzutc()).astimezone( tz.gettz('America/Denver')) return datetime.date(tmp.year, tmp.month, tmp.day) data = {} dates = list() if 'pm' in summary: for pm in summary['pm']: if pm['pm_status_id'] != int(PhysicalMeasurementsStatus. CANCELLED) and pm['pm_finalized']: dates.append(datetime_to_date(pm['pm_finalized'])) if 'biobank_orders' in summary: for order in summary['biobank_orders']: if order['bbo_status_id'] != int(BiobankOrderStatus.CANCELLED ) and 'bbo_samples' in order: for sample in order['bbo_samples']: if 'bbs_finalized' in sample and sample['bbs_finalized'] and \ isinstance(sample['bbs_finalized'], datetime.datetime): dates.append( datetime_to_date(sample['bbs_finalized'])) dates = list(set(dates)) # de-dup list data['distinct_visits'] = len(dates) return data