def _cleanup_old_entities(self, session, row_list, dry_run): log_prefix = '(dry run) ' if dry_run else '' self.org_dao = OrganizationDao() existing_orgs = set( str(org.externalId) for org in self.org_dao.get_all()) org_group_list_from_sheet = [ row[ORGANIZATION_ORGANIZATION_ID_COLUMN].upper() for row in row_list ] orgs_to_remove = existing_orgs - set(org_group_list_from_sheet) if orgs_to_remove: org_id_list = [] for org in orgs_to_remove: old_org = self.org_dao.get_by_external_id(org) if old_org and old_org.isObsolete != ObsoleteStatus.OBSOLETE: org_id_list.append(old_org.organizationId) self.deletion_count += 1 elif old_org and old_org.isObsolete == ObsoleteStatus.OBSOLETE: logging.info( 'Not attempting to delete org [%s] with existing obsolete status', old_org.displayName) if org_id_list and not dry_run: logging.info( log_prefix + 'Marking old Organization as obsolete : %s', old_org) str_list = ','.join([str(i) for i in org_id_list]) sql = """ UPDATE organization SET is_obsolete = 1 WHERE organization_id in ({org_id_list})""".format( org_id_list=str_list) session.execute(sql) logging.info( log_prefix + 'Deleting old Organization no longer in Google sheet: %s', org) self.delete_sql_statement(session, str_list) self.org_dao._invalidate_cache()
def _setup_data(self): organization_dao = OrganizationDao() site_dao = SiteDao() org_1 = organization_dao.insert( Organization(externalId='ORG_1', displayName='Organization 1', hpoId=PITT_HPO_ID)) organization_dao.insert( Organization(externalId='AARDVARK_ORG', displayName='Aardvarks Rock', hpoId=PITT_HPO_ID)) site_dao.insert( Site(siteName='Site 1', googleGroup='hpo-site-1', mayolinkClientNumber=123456, organizationId=org_1.organizationId, siteStatus=SiteStatus.ACTIVE, enrollingStatus=EnrollingStatus.ACTIVE, launchDate=datetime.datetime(2016, 1, 1), notes='notes', latitude=12.1, longitude=13.1, directions='directions', physicalLocationName='locationName', address1='address1', address2='address2', city='Austin', state='TX', zipCode='78751', phoneNumber='555-555-5555', adminEmails='[email protected], [email protected]', link='http://www.example.com')) site_dao.insert( Site(siteName='Zebras Rock', googleGroup='aaaaaaa', organizationId=org_1.organizationId, enrollingStatus=EnrollingStatus.INACTIVE, siteStatus=SiteStatus.INACTIVE))
def _to_json(model, inactive_sites=False): resource = _FhirAwardee() resource.id = model.name resource.display_name = model.displayName if model.organizationType: resource.type = str(model.organizationType) else: resource.type = UNSET resource.organizations = [ OrganizationDao._to_json(organization, inactive_sites) for organization in model.organizations ] json = resource.as_json() del json['resourceType'] return json
def setUp(self, **kwargs): super(UpdateEhrStatusUpdatesTestCase, self).setUp(use_mysql=True, **kwargs) self.hpo_dao = HPODao() self.org_dao = OrganizationDao() self.participant_dao = ParticipantDao() self.summary_dao = ParticipantSummaryDao() self.ehr_receipt_dao = EhrReceiptDao() self.hpo_foo = self._make_hpo(int_id=10, string_id='hpo_foo') self.hpo_bar = self._make_hpo(int_id=11, string_id='hpo_bar') self.org_foo_a = self._make_org(hpo=self.hpo_foo, int_id=10, external_id='FOO_A') self.org_foo_b = self._make_org(hpo=self.hpo_foo, int_id=11, external_id='FOO_B') self.org_bar_a = self._make_org(hpo=self.hpo_bar, int_id=12, external_id='BAR_A') self.participants = [ self._make_participant(hpo=self.hpo_foo, org=self.org_foo_a, int_id=11), self._make_participant(hpo=self.hpo_foo, org=self.org_foo_b, int_id=12), self._make_participant(hpo=self.hpo_bar, org=self.org_bar_a, int_id=13), self._make_participant(hpo=self.hpo_bar, org=self.org_bar_a, int_id=14), ]
def setUp(self): super(HierarchyContentApiTest, self).setUp(with_data=False) hpo_dao = HPODao() hpo_dao.insert( HPO(hpoId=UNSET_HPO_ID, name='UNSET', displayName='Unset', organizationType=OrganizationType.UNSET, resourceId='h123456')) hpo_dao.insert( HPO(hpoId=PITT_HPO_ID, name='PITT', displayName='Pittsburgh', organizationType=OrganizationType.HPO, resourceId='h123457')) hpo_dao.insert( HPO(hpoId=AZ_HPO_ID, name='AZ_TUCSON', displayName='Arizona', organizationType=OrganizationType.HPO, resourceId='h123458')) self.site_dao = SiteDao() self.org_dao = OrganizationDao()
class OrganizationImporter(CsvImporter): def __init__(self): super(OrganizationImporter, self).__init__( 'organization', OrganizationDao(), 'organizationId', 'externalId', [ ORGANIZATION_AWARDEE_ID_COLUMN, ORGANIZATION_ORGANIZATION_ID_COLUMN, ORGANIZATION_NAME_COLUMN ]) self.hpo_dao = HPODao() self.environment = None def _entity_from_row(self, row): hpo = self.hpo_dao.get_by_name( row[ORGANIZATION_AWARDEE_ID_COLUMN].upper()) if hpo is None: logging.warn('Invalid awardee ID %s importing organization %s', row[ORGANIZATION_AWARDEE_ID_COLUMN], row[ORGANIZATION_ORGANIZATION_ID_COLUMN]) self.errors.append( 'Invalid awardee ID {} importing organization {}'.format( row[ORGANIZATION_AWARDEE_ID_COLUMN], row[ORGANIZATION_ORGANIZATION_ID_COLUMN])) return None return Organization( externalId=row[ORGANIZATION_ORGANIZATION_ID_COLUMN].upper(), displayName=row[ORGANIZATION_NAME_COLUMN], hpoId=hpo.hpoId) def delete_sql_statement(self, session, str_list): sql = """ DELETE FROM organization WHERE organization_id IN ({str_list}) AND NOT EXISTS( SELECT * FROM participant WHERE organization_id = organization.organization_id) AND NOT EXISTS( SELECT * FROM participant_summary WHERE organization_id = organization.organization_id) AND NOT EXISTS( SELECT * FROM participant_history WHERE organization_id = organization.organization_id) AND NOT EXISTS( SELECT * FROM site WHERE organization_id = organization.organization_id) """.format(str_list=str_list) session.execute(sql) def _cleanup_old_entities(self, session, row_list, dry_run): log_prefix = '(dry run) ' if dry_run else '' self.org_dao = OrganizationDao() existing_orgs = set( str(org.externalId) for org in self.org_dao.get_all()) org_group_list_from_sheet = [ row[ORGANIZATION_ORGANIZATION_ID_COLUMN].upper() for row in row_list ] orgs_to_remove = existing_orgs - set(org_group_list_from_sheet) if orgs_to_remove: org_id_list = [] for org in orgs_to_remove: old_org = self.org_dao.get_by_external_id(org) if old_org and old_org.isObsolete != ObsoleteStatus.OBSOLETE: org_id_list.append(old_org.organizationId) self.deletion_count += 1 elif old_org and old_org.isObsolete == ObsoleteStatus.OBSOLETE: logging.info( 'Not attempting to delete org [%s] with existing obsolete status', old_org.displayName) if org_id_list and not dry_run: logging.info( log_prefix + 'Marking old Organization as obsolete : %s', old_org) str_list = ','.join([str(i) for i in org_id_list]) sql = """ UPDATE organization SET is_obsolete = 1 WHERE organization_id in ({org_id_list})""".format( org_id_list=str_list) session.execute(sql) logging.info( log_prefix + 'Deleting old Organization no longer in Google sheet: %s', org) self.delete_sql_statement(session, str_list) self.org_dao._invalidate_cache()
class SyncConsentFilesTest(CloudStorageSqlTestBase, NdbTestBase): """Tests behavior of sync_consent_files """ def setUp(self, **kwargs): super(SyncConsentFilesTest, self).setUp(use_mysql=True, **kwargs) NdbTestBase.doSetUp(self) TestBase.setup_fake(self) self.org_dao = OrganizationDao() self.site_dao = SiteDao() self.participant_dao = ParticipantDao() self.summary_dao = ParticipantSummaryDao() def tearDown(self): super(SyncConsentFilesTest, self).tearDown() def _create_org(self, id_): org = Organization(organizationId=id_, externalId=id_, displayName=id_, hpoId=UNSET_HPO_ID) self.org_dao.insert(org) return org def _create_site(self, id_, google_group): site = Site(siteId=id_, siteName=id_, googleGroup=google_group) self.site_dao.insert(site) return site def _create_participant(self, id_, org_id, site_id, consents=False, ghost=None, email=None, null_email=False): participant = Participant(participantId=id_, biobankId=id_, organizationId=org_id, siteId=site_id, isGhostId=ghost) self.participant_dao.insert(participant) summary = self.participant_summary(participant) if consents: summary.consentForElectronicHealthRecords = 1 summary.consentForStudyEnrollment = 1 if email: summary.email = email if null_email: summary.email = None self.summary_dao.insert(summary) return participant def test_iter_participants_data(self): """should list consenting participants """ org1 = self._create_org(1) org2 = self._create_org(2) site1 = self._create_site(1001, 'group1') site2 = self._create_site(1002, 'group2') self._create_participant(1, org1.organizationId, site1.siteId, consents=True, null_email=True) self._create_participant(2, org2.organizationId, site2.siteId) self._create_participant(3, org1.organizationId, None, consents=True, ghost=False) self._create_participant(4, org1.organizationId, None, consents=True, ghost=True) self._create_participant(5, org1.organizationId, None, consents=True, email='*****@*****.**') participant_data_list = list( sync_consent_files._iter_participants_data()) participant_ids = [d.participant_id for d in participant_data_list] self.assertEqual(len(participant_ids), 2, "finds correct number of results") self.assertEqual(participant_ids, [1, 3], "finds valid participants") self.assertEqual(participant_data_list[0].google_group, 'group1', "Includes google group") self.assertEqual(participant_data_list[1].google_group, None, "allows None for google group") @mock.patch('cloudstorage.listbucket') @mock.patch('cloudstorage.copy2') def test_cloudstorage_copy_objects_api_calls(self, mock_copy2, mock_listbucket): """Makes the proper google cloudstorage API calls """ mock_listbucket.return_value = [ cloudstorage.common.GCSFileStat('/fake_bucket1/prefix1/foo', 0, 'x', 0), cloudstorage.common.GCSFileStat('/fake_bucket1/prefix1/bar', 0, 'x', 0), ] # with trailing slashes sync_consent_files.cloudstorage_copy_objects('/fake_bucket1/prefix1/', '/fake_bucket2/prefix2/') mock_copy2.assert_has_calls([ mock.call('/fake_bucket1/prefix1/foo', '/fake_bucket2/prefix2/foo'), mock.call('/fake_bucket1/prefix1/bar', '/fake_bucket2/prefix2/bar'), ]) # without trailing slashes sync_consent_files.cloudstorage_copy_objects('/fake_bucket1/prefix1', '/fake_bucket2/prefix2') mock_copy2.assert_has_calls([ mock.call('/fake_bucket1/prefix1/foo', '/fake_bucket2/prefix2/foo'), mock.call('/fake_bucket1/prefix1/bar', '/fake_bucket2/prefix2/bar'), ]) @staticmethod def _write_cloud_object(cloud_filename, contents_str): with cloudstorage.cloudstorage_api.open(cloud_filename, mode='w') as cloud_file: cloud_file.write(contents_str.encode('utf-8')) def test_cloudstorage_copy_objects_actual(self): self._write_cloud_object('/fake_bucket1/prefix/x1/foo.txt', 'foo') self._write_cloud_object('/fake_bucket1/prefix/x1/bar.txt', 'bar') self._write_cloud_object('/fake_bucket1/prefix/x1/y1/foo.txt', 'foo') with cloudstorage.cloudstorage_api.open( '/fake_bucket1/prefix/x1/foo.txt', mode='r') as f: self.assertEqual(f.read(), 'foo', 'Wrote to cloud storage') sync_consent_files.cloudstorage_copy_objects( '/fake_bucket1/prefix/x1/', '/fake_bucket2/prefix/z/x1/') self.assertEqual([ file_stat.filename for file_stat in cloudstorage.cloudstorage_api.listbucket( '/fake_bucket2/prefix/z/x1/') ], [ '/fake_bucket2/prefix/z/x1/bar.txt', '/fake_bucket2/prefix/z/x1/foo.txt', '/fake_bucket2/prefix/z/x1/y1/foo.txt', ], "copied all objects") with cloudstorage.cloudstorage_api.open( '/fake_bucket2/prefix/z/x1/foo.txt', mode='r') as f: self.assertEqual(f.read(), 'foo', 'copied contents') @mock.patch('cloudstorage.copy2') def test_cloudstorage_copy_objects_only_new_and_changed(self, copy2): self._write_cloud_object('/fake_bucket1/prefix/x1/foo.txt', 'foo') self._write_cloud_object('/fake_bucket1/prefix/x1/bar.txt', 'bar') self._write_cloud_object('/fake_bucket2/prefix/z/x1/foo.txt', 'foo') self._write_cloud_object('/fake_bucket2/prefix/z/x1/bar.txt', 'baz') sync_consent_files.cloudstorage_copy_objects( '/fake_bucket1/prefix/x1/', '/fake_bucket2/prefix/z/x1/') copy2.assert_called_once_with('/fake_bucket1/prefix/x1/bar.txt', '/fake_bucket2/prefix/z/x1/bar.txt')
class EhrReceiptDaoTest(SqlTestBase): def setUp(self, with_data=True, use_mysql=True): super(EhrReceiptDaoTest, self).setUp(with_data=with_data, use_mysql=use_mysql) self.setup_fake() self.calendar_dao = CalendarDao() self.org_dao = OrganizationDao() self.hpo_dao = HPODao() self.participant_dao = ParticipantDao() self.summary_dao = ParticipantSummaryDao() self.ehr_receipt_dao = EhrReceiptDao() self._setup_initial_data() @staticmethod def _iter_dates_in_range(start, end): current = start while current <= end: yield current current += datetime.timedelta(days=1) def _fill_calendar_range(self, start, end): for date in self._iter_dates_in_range(start, end): self.calendar_dao.insert(Calendar(day=date)) def _make_hpo(self, int_id, string_id): hpo = HPO(hpoId=int_id, name=string_id) self.hpo_dao.insert(hpo) return hpo def _make_org(self, **kwargs): org = Organization(**kwargs) self.org_dao.insert(org) return org def _make_participant(self, org, int_id): participant = self._participant_with_defaults(participantId=int_id, biobankId=int_id) participant.hpoId = org.hpoId participant.organizationId = org.organizationId self.participant_dao.insert(participant) summary = self.participant_summary(participant) summary.hpoId = participant.hpoId summary.organizationId = participant.organizationId self.summary_dao.insert(summary) return participant, summary def _update_ehr(self, participant_summary, update_time): self.summary_dao.update_ehr_status(participant_summary, update_time) self.summary_dao.update(participant_summary) def _save_ehr_receipt(self, org, receipt_time): receipt = EhrReceipt(organizationId=org.organizationId, receiptTime=receipt_time) self.ehr_receipt_dao.insert(receipt) def _setup_initial_data(self): self.hpo_foo = self._make_hpo(int_id=10, string_id='hpo_foo') self.hpo_bar = self._make_hpo(int_id=11, string_id='hpo_bar') self.org_foo_a = self._make_org(organizationId=10, externalId='FOO_A', displayName='Foo A', hpoId=self.hpo_foo.hpoId) self.org_bar_a = self._make_org(organizationId=11, externalId='BAR_A', displayName='Bar A', hpoId=self.hpo_bar.hpoId) participant_and_summary_pairs = [ self._make_participant(org=self.org_foo_a, int_id=11), self._make_participant(org=self.org_foo_a, int_id=12), self._make_participant(org=self.org_bar_a, int_id=13), self._make_participant(org=self.org_bar_a, int_id=14), ] self.participants = { participant.participantId: participant for participant, summary in participant_and_summary_pairs } self.summaries = { participant.participantId: summary for participant, summary in participant_and_summary_pairs } def test_get_active_organization_counts_in_interval_day(self): self._fill_calendar_range(datetime.date(2019, 1, 1), datetime.date(2019, 3, 1)) self._save_ehr_receipt(org=self.org_foo_a, receipt_time=datetime.datetime(2019, 2, 2)) self._save_ehr_receipt(org=self.org_bar_a, receipt_time=datetime.datetime(2019, 2, 2)) self._save_ehr_receipt(org=self.org_foo_a, receipt_time=datetime.datetime(2019, 2, 4)) results = self.ehr_receipt_dao.get_active_organization_counts_in_interval( start_date=datetime.datetime(2019, 2, 1), end_date=datetime.datetime(2019, 2, 7), interval=INTERVAL_DAY) self.assertEqual([(r['start_date'], r['active_organization_count']) for r in results], [ (datetime.date(2019, 2, 1), 0L), (datetime.date(2019, 2, 2), 2L), (datetime.date(2019, 2, 3), 0L), (datetime.date(2019, 2, 4), 1L), (datetime.date(2019, 2, 5), 0L), (datetime.date(2019, 2, 6), 0L), (datetime.date(2019, 2, 7), 0L), ]) def test_get_active_organization_counts_in_interval_week(self): self._fill_calendar_range(datetime.date(2019, 1, 1), datetime.date(2019, 3, 1)) self._save_ehr_receipt(org=self.org_foo_a, receipt_time=datetime.datetime(2019, 2, 4)) self._save_ehr_receipt(org=self.org_bar_a, receipt_time=datetime.datetime(2019, 2, 4)) self._save_ehr_receipt(org=self.org_foo_a, receipt_time=datetime.datetime(2019, 2, 18)) results = self.ehr_receipt_dao.get_active_organization_counts_in_interval( start_date=datetime.datetime(2019, 2, 1), end_date=datetime.datetime(2019, 3, 1), interval=INTERVAL_WEEK) self.assertEqual([(r['start_date'], r['active_organization_count']) for r in results], [ (datetime.date(2019, 1, 27), 0L), (datetime.date(2019, 2, 3), 2L), (datetime.date(2019, 2, 10), 0L), (datetime.date(2019, 2, 17), 1L), (datetime.date(2019, 2, 24), 0L), ]) def test_get_active_organization_counts_in_interval_month(self): self._fill_calendar_range(datetime.date(2018, 12, 1), datetime.date(2019, 7, 1)) self._save_ehr_receipt(org=self.org_foo_a, receipt_time=datetime.datetime(2019, 2, 1)) self._save_ehr_receipt(org=self.org_bar_a, receipt_time=datetime.datetime(2019, 2, 1)) self._save_ehr_receipt(org=self.org_foo_a, receipt_time=datetime.datetime(2019, 4, 1)) results = self.ehr_receipt_dao.get_active_organization_counts_in_interval( start_date=datetime.datetime(2019, 1, 1), end_date=datetime.datetime(2019, 5, 1), interval=INTERVAL_MONTH) self.assertEqual([(r['start_date'], r['active_organization_count']) for r in results], [ (datetime.date(2019, 1, 1), 0L), (datetime.date(2019, 2, 1), 2L), (datetime.date(2019, 3, 1), 0L), (datetime.date(2019, 4, 1), 1L), (datetime.date(2019, 5, 1), 0L), ]) def test_get_active_organization_counts_in_interval_quarter(self): self._fill_calendar_range(datetime.date(2018, 12, 1), datetime.date(2020, 1, 1)) self._save_ehr_receipt(org=self.org_foo_a, receipt_time=datetime.datetime(2019, 5, 1)) self._save_ehr_receipt(org=self.org_bar_a, receipt_time=datetime.datetime(2019, 5, 1)) self._save_ehr_receipt(org=self.org_foo_a, receipt_time=datetime.datetime(2019, 11, 1)) results = self.ehr_receipt_dao.get_active_organization_counts_in_interval( start_date=datetime.datetime(2019, 1, 1), end_date=datetime.datetime(2020, 1, 1), interval=INTERVAL_QUARTER) self.assertEqual([(r['start_date'], r['active_organization_count']) for r in results], [ (datetime.date(2019, 1, 1), 0L), (datetime.date(2019, 4, 1), 2L), (datetime.date(2019, 7, 1), 0L), (datetime.date(2019, 10, 1), 1L), (datetime.date(2020, 1, 1), 0L), ])
def __init__(self): super(ParticipantDao, self).__init__(Participant) self.hpo_dao = HPODao() self.organization_dao = OrganizationDao() self.site_dao = SiteDao()
class ParticipantSummaryDao(UpdatableDao): def __init__(self): super(ParticipantSummaryDao, self).__init__(ParticipantSummary, order_by_ending=_ORDER_BY_ENDING) self.hpo_dao = HPODao() self.code_dao = CodeDao() self.site_dao = SiteDao() self.organization_dao = OrganizationDao() def get_id(self, obj): return obj.participantId def get_by_email(self, email): with self.session() as session: return session.query(ParticipantSummary).filter( ParticipantSummary.email == email).all() def _validate_update(self, session, obj, existing_obj): # pylint: disable=unused-argument """Participant summaries don't have a version value; drop it from validation logic.""" if not existing_obj: raise NotFound('%s with id %s does not exist' % (self.model_type.__name__, id)) def _has_withdrawn_filter(self, query): for field_filter in query.field_filters: if (field_filter.field_name == 'withdrawalStatus' and field_filter.value == WithdrawalStatus.NO_USE): return True if field_filter.field_name == 'withdrawalTime' and field_filter.value is not None: return True return False def _get_non_withdrawn_filter_field(self, query): """Returns the first field referenced in query filters which isn't in WITHDRAWN_PARTICIPANT_FIELDS.""" for field_filter in query.field_filters: if not field_filter.field_name in WITHDRAWN_PARTICIPANT_FIELDS: return field_filter.field_name return None def _initialize_query(self, session, query_def): non_withdrawn_field = self._get_non_withdrawn_filter_field(query_def) if self._has_withdrawn_filter(query_def): if non_withdrawn_field: raise BadRequest( "Can't query on %s for withdrawn participants" % non_withdrawn_field) # When querying for withdrawn participants, ensure that the only fields being filtered on or # ordered by are in WITHDRAWN_PARTICIPANT_FIELDS. return super(ParticipantSummaryDao, self)._initialize_query(session, query_def) else: query = super(ParticipantSummaryDao, self)._initialize_query(session, query_def) if non_withdrawn_field: # When querying on fields that aren't available for withdrawn participants, # ensure that we only return participants # who have not withdrawn or withdrew in the past 48 hours. withdrawn_visible_start = clock.CLOCK.now( ) - WITHDRAWN_PARTICIPANT_VISIBILITY_TIME return query.filter( or_( ParticipantSummary.withdrawalStatus != WithdrawalStatus.NO_USE, ParticipantSummary.withdrawalTime >= withdrawn_visible_start)) else: # When querying on fields that are available for withdrawn participants, return everybody; # withdrawn participants will have all but WITHDRAWN_PARTICIPANT_FIELDS cleared out 48 # hours after withdrawing. return query def _get_order_by_ending(self, query): if self._has_withdrawn_filter(query): return _WITHDRAWN_ORDER_BY_ENDING return self.order_by_ending def _add_order_by(self, query, order_by, field_names, fields): if order_by.field_name in _CODE_FILTER_FIELDS: return super(ParticipantSummaryDao, self)._add_order_by( query, OrderBy(order_by.field_name + 'Id', order_by.ascending), field_names, fields) return super(ParticipantSummaryDao, self)._add_order_by(query, order_by, field_names, fields) def make_query_filter(self, field_name, value): """Handle HPO and code values when parsing filter values.""" if field_name == 'hpoId' or field_name == 'awardee': hpo = self.hpo_dao.get_by_name(value) if not hpo: raise BadRequest('No HPO found with name %s' % value) if field_name == 'awardee': field_name = 'hpoId' return super(ParticipantSummaryDao, self).make_query_filter(field_name, hpo.hpoId) if field_name == 'organization': organization = self.organization_dao.get_by_external_id(value) if not organization: raise BadRequest('No organization found with name %s' % value) return super(ParticipantSummaryDao, self).make_query_filter(field_name + 'Id', organization.organizationId) if field_name in _SITE_FIELDS: if value == UNSET: return super(ParticipantSummaryDao, self).make_query_filter(field_name + 'Id', None) site = self.site_dao.get_by_google_group(value) if not site: raise BadRequest('No site found with google group %s' % value) return super(ParticipantSummaryDao, self).make_query_filter(field_name + 'Id', site.siteId) if field_name in _CODE_FILTER_FIELDS: if value == UNSET: return super(ParticipantSummaryDao, self).make_query_filter(field_name + 'Id', None) # Note: we do not at present support querying for UNMAPPED code values. code = self.code_dao.get_code(PPI_SYSTEM, value) if not code: raise BadRequest('No code found: %s' % value) return super(ParticipantSummaryDao, self).make_query_filter(field_name + 'Id', code.codeId) return super(ParticipantSummaryDao, self).make_query_filter(field_name, value) def update_from_biobank_stored_samples(self, participant_id=None): """Rewrites sample-related summary data. Call this after updating BiobankStoredSamples. If participant_id is provided, only that participant will have their summary updated.""" baseline_tests_sql, baseline_tests_params = get_sql_and_params_for_array( config.getSettingList(config.BASELINE_SAMPLE_TEST_CODES), 'baseline') dna_tests_sql, dna_tests_params = get_sql_and_params_for_array( config.getSettingList(config.DNA_SAMPLE_TEST_CODES), 'dna') sample_sql, sample_params = _get_sample_sql_and_params() sql = """ UPDATE participant_summary SET num_baseline_samples_arrived = ( SELECT COUNT(*) FROM biobank_stored_sample WHERE biobank_stored_sample.biobank_id = participant_summary.biobank_id AND biobank_stored_sample.test IN %s ), samples_to_isolate_dna = ( CASE WHEN EXISTS(SELECT * FROM biobank_stored_sample WHERE biobank_stored_sample.biobank_id = participant_summary.biobank_id AND biobank_stored_sample.test IN %s) THEN :received ELSE :unset END ), last_modified = :now %s""" % (baseline_tests_sql, dna_tests_sql, sample_sql) params = { 'received': int(SampleStatus.RECEIVED), 'unset': int(SampleStatus.UNSET), 'now': clock.CLOCK.now() } params.update(baseline_tests_params) params.update(dna_tests_params) params.update(sample_params) enrollment_status_params = { 'submitted': int(QuestionnaireStatus.SUBMITTED), 'num_baseline_ppi_modules': self._get_num_baseline_ppi_modules(), 'completed': int(PhysicalMeasurementsStatus.COMPLETED), 'received': int(SampleStatus.RECEIVED), 'full_participant': int(EnrollmentStatus.FULL_PARTICIPANT), 'member': int(EnrollmentStatus.MEMBER), 'interested': int(EnrollmentStatus.INTERESTED) } enrollment_status_sql = _ENROLLMENT_STATUS_SQL # If participant_id is provided, add the participant ID filter to both update statements. if participant_id: sql += _PARTICIPANT_ID_FILTER params['participant_id'] = participant_id enrollment_status_sql += _PARTICIPANT_ID_FILTER enrollment_status_params['participant_id'] = participant_id sql = replace_null_safe_equals(sql) with self.session() as session: session.execute(sql, params) session.execute(enrollment_status_sql, enrollment_status_params) def _get_num_baseline_ppi_modules(self): return len( config.getSettingList(config.BASELINE_PPI_QUESTIONNAIRE_FIELDS)) def update_enrollment_status(self, summary): """Updates the enrollment status field on the provided participant summary to the correct value based on the other fields on it. Called after a questionnaire response or physical measurements are submitted.""" consent = (summary.consentForStudyEnrollment == QuestionnaireStatus.SUBMITTED and summary.consentForElectronicHealthRecords == QuestionnaireStatus.SUBMITTED) enrollment_status = self.calculate_enrollment_status( consent, summary.numCompletedBaselinePPIModules, summary.physicalMeasurementsStatus, summary.samplesToIsolateDNA) summary.enrollmentStatus = enrollment_status def calculate_enrollment_status(self, consent_for_study_enrollment_and_ehr, num_completed_baseline_ppi_modules, physical_measurements_status, samples_to_isolate_dna): if consent_for_study_enrollment_and_ehr: if (num_completed_baseline_ppi_modules == self._get_num_baseline_ppi_modules() and physical_measurements_status == PhysicalMeasurementsStatus.COMPLETED and samples_to_isolate_dna == SampleStatus.RECEIVED): return EnrollmentStatus.FULL_PARTICIPANT return EnrollmentStatus.MEMBER return EnrollmentStatus.INTERESTED def to_client_json(self, model): result = model.asdict() # Participants that withdrew more than 48 hours ago should have fields other than # WITHDRAWN_PARTICIPANT_FIELDS cleared. if (model.withdrawalStatus == WithdrawalStatus.NO_USE and model.withdrawalTime < clock.CLOCK.now() - WITHDRAWN_PARTICIPANT_VISIBILITY_TIME): result = {k: result.get(k) for k in WITHDRAWN_PARTICIPANT_FIELDS} result['participantId'] = to_client_participant_id(model.participantId) biobank_id = result.get('biobankId') if biobank_id: result['biobankId'] = to_client_biobank_id(biobank_id) date_of_birth = result.get('dateOfBirth') if date_of_birth: result['ageRange'] = get_bucketed_age(date_of_birth, clock.CLOCK.now()) else: result['ageRange'] = UNSET if 'organizationId' in result: result['organization'] = result['organizationId'] del result['organizationId'] format_json_org(result, self.organization_dao, 'organization') format_json_hpo(result, self.hpo_dao, 'hpoId') result['awardee'] = result['hpoId'] _initialize_field_type_sets() for fieldname in _DATE_FIELDS: format_json_date(result, fieldname) for fieldname in _CODE_FIELDS: format_json_code(result, self.code_dao, fieldname) for fieldname in _ENUM_FIELDS: format_json_enum(result, fieldname) for fieldname in _SITE_FIELDS: format_json_site(result, self.site_dao, fieldname) if (model.withdrawalStatus == WithdrawalStatus.NO_USE or model.suspensionStatus == SuspensionStatus.NO_CONTACT): result['recontactMethod'] = 'NO_CONTACT' # Strip None values. result = {k: v for k, v in result.iteritems() if v is not None} return result def _decode_token(self, query_def, fields): """ If token exists in participant_summary api, decode and use lastModified to add a buffer of 60 seconds. This ensures when a _sync link is used no one is missed. This will return at a minimum, the last participant and any more that have been modified in the previous 60 seconds. Duplicate participants returned should be handled on the client side.""" decoded_vals = super(ParticipantSummaryDao, self)._decode_token(query_def, fields) if query_def.order_by and (query_def.order_by.field_name == 'lastModified' and query_def.always_return_token == True): decoded_vals[0] = decoded_vals[0] - datetime.timedelta( seconds=config.LAST_MODIFIED_BUFFER_SECONDS) return decoded_vals
class SiteImporter(CsvImporter): def __init__(self): super(SiteImporter, self).__init__('site', SiteDao(), 'siteId', 'googleGroup', [ SITE_ORGANIZATION_ID_COLUMN, SITE_SITE_ID_COLUMN, SITE_SITE_COLUMN, SITE_STATUS_COLUMN, ENROLLING_STATUS_COLUMN ]) self.organization_dao = OrganizationDao() args = parser.parse_args() self.geocode_flag = args.geocode_flag self.ACTIVE = SiteStatus.ACTIVE self.status_exception_list = ['hpo-site-walgreensphoenix'] def _entity_from_row(self, row): google_group = row[SITE_SITE_ID_COLUMN].lower() organization = self.organization_dao.get_by_external_id( row[SITE_ORGANIZATION_ID_COLUMN].upper()) if organization is None: logging.warn('Invalid organization ID %s importing site %s', row[SITE_ORGANIZATION_ID_COLUMN].upper(), google_group) self.errors.append( 'Invalid organization ID {} importing site {}'.format( row[SITE_ORGANIZATION_ID_COLUMN].upper(), google_group)) return None launch_date = None launch_date_str = row.get(SITE_LAUNCH_DATE_COLUMN) if launch_date_str: try: launch_date = parse(launch_date_str).date() except ValueError: logging.warn('Invalid launch date %s for site %s', launch_date_str, google_group) self.errors.append('Invalid launch date {} for site {}'.format( launch_date_str, google_group)) return None name = row[SITE_SITE_COLUMN] mayolink_client_number = None mayolink_client_number_str = row.get( SITE_MAYOLINK_CLIENT_NUMBER_COLUMN) if mayolink_client_number_str: try: mayolink_client_number = int(mayolink_client_number_str) except ValueError: logging.warn('Invalid Mayolink Client # %s for site %s', mayolink_client_number_str, google_group) self.errors.append( 'Invalid Mayolink Client # {} for site {}'.format( mayolink_client_number_str, google_group)) return None notes = row.get(SITE_NOTES_COLUMN) try: site_status = SiteStatus(row[SITE_STATUS_COLUMN].upper()) except TypeError: logging.warn('Invalid site status %s for site %s', row[SITE_STATUS_COLUMN], google_group) self.errors.append('Invalid site status {} for site {}'.format( row[SITE_STATUS_COLUMN], google_group)) return None try: enrolling_status = EnrollingStatus( row[ENROLLING_STATUS_COLUMN].upper()) except TypeError: logging.warn('Invalid enrollment site status %s for site %s', row[ENROLLING_STATUS_COLUMN], google_group) self.errors.append( 'Invalid enrollment site status {} for site {}'.format( row[ENROLLING_STATUS_COLUMN], google_group)) directions = row.get(SITE_DIRECTIONS_COLUMN) physical_location_name = row.get(SITE_PHYSICAL_LOCATION_NAME_COLUMN) address_1 = row.get(SITE_ADDRESS_1_COLUMN) address_2 = row.get(SITE_ADDRESS_2_COLUMN) city = row.get(SITE_CITY_COLUMN) state = row.get(SITE_STATE_COLUMN) zip_code = row.get(SITE_ZIP_COLUMN) phone = row.get(SITE_PHONE_COLUMN) admin_email_addresses = row.get(SITE_ADMIN_EMAIL_ADDRESSES_COLUMN) link = row.get(SITE_LINK_COLUMN) return Site(siteName=name, googleGroup=google_group, mayolinkClientNumber=mayolink_client_number, organizationId=organization.organizationId, hpoId=organization.hpoId, siteStatus=site_status, enrollingStatus=enrolling_status, launchDate=launch_date, notes=notes, directions=directions, physicalLocationName=physical_location_name, address1=address_1, address2=address_2, city=city, state=state, zipCode=zip_code, phoneNumber=phone, adminEmails=admin_email_addresses, link=link) def _update_entity(self, entity, existing_entity, session, dry_run): self._populate_lat_lng_and_time_zone(entity, existing_entity) if entity.siteStatus == self.ACTIVE and (entity.latitude == None or entity.longitude == None): self.errors.append( 'Skipped active site without geocoding: {}'.format( entity.googleGroup)) return None, True return super(SiteImporter, self)._update_entity(entity, existing_entity, session, dry_run) def _insert_entity(self, entity, existing_map, session, dry_run): self._populate_lat_lng_and_time_zone(entity, None) if entity.siteStatus == self.ACTIVE and (entity.latitude == None or entity.longitude == None): self.errors.append( 'Skipped active site without geocoding: {}'.format( entity.googleGroup)) return False super(SiteImporter, self)._insert_entity(entity, existing_map, session, dry_run) def _populate_lat_lng_and_time_zone(self, site, existing_site): if site.address1 and site.city and site.state: if existing_site: if (existing_site.address1 == site.address1 and existing_site.city == site.city and existing_site.state == site.state and existing_site.latitude is not None and existing_site.longitude is not None and existing_site.timeZoneId is not None): # Address didn't change, use the existing lat/lng and time zone. site.latitude = existing_site.latitude site.longitude = existing_site.longitude site.timeZoneId = existing_site.timeZoneId return if self.geocode_flag: latitude, longitude = self._get_lat_long_for_site( site.address1, site.city, site.state) site.latitude = latitude site.longitude = longitude if latitude and longitude: site.timeZoneId = self._get_time_zone(latitude, longitude) else: if site.googleGroup not in self.status_exception_list: if site.siteStatus == self.ACTIVE: self.errors.append( 'Active site must have valid address. Site: {}, Group: {}' .format(site.siteName, site.googleGroup)) def _get_lat_long_for_site(self, address_1, city, state): self.full_address = address_1 + ' ' + city + ' ' + state try: self.api_key = os.environ.get('API_KEY') self.gmaps = googlemaps.Client(key=self.api_key) try: geocode_result = self.gmaps.geocode(address_1 + '' + city + ' ' + state)[0] except IndexError: self.errors.append( 'Bad address for {}, could not geocode.'.format( self.full_address)) return None, None if geocode_result: geometry = geocode_result.get('geometry') if geometry: location = geometry.get('location') if location: latitude = location.get('lat') longitude = location.get('lng') return latitude, longitude else: logging.warn('Can not find lat/long for %s', self.full_address) self.errors.append('Can not find lat/long for {}'.format( self.full_address)) return None, None else: logging.warn('Geocode results failed for %s.', self.full_address) self.errors.append('Geocode results failed for {}'.format( self.full_address)) return None, None except ValueError as e: logging.exception('Invalid geocode key: %s. ERROR: %s', self.api_key, e) self.errors.append('Invalid geocode key: {}. ERROR: {}'.format( self.api_key, e)) return None, None except IndexError as e: logging.exception( 'Geocoding failure Check that address is correct. ERROR: %s', e) self.errors.append( 'Geocoding failured Check that address is correct. ERROR: {}'. format(self.api_key, e)) return None, None def _get_time_zone(self, latitude, longitude): time_zone = self.gmaps.timezone(location=(latitude, longitude)) if time_zone['status'] == 'OK': time_zone_id = time_zone['timeZoneId'] return time_zone_id else: logging.info('can not retrieve time zone from %s', self.full_address) self.errors.append('Can not retrieve time zone from {}'.format( self.full_address)) return None
class ParticipantDao(UpdatableDao): def __init__(self): super(ParticipantDao, self).__init__(Participant) self.hpo_dao = HPODao() self.organization_dao = OrganizationDao() self.site_dao = SiteDao() def get_id(self, obj): return obj.participantId def insert_with_session(self, session, obj): obj.hpoId = self._get_hpo_id(obj) obj.version = 1 obj.signUpTime = clock.CLOCK.now().replace(microsecond=0) obj.lastModified = obj.signUpTime if obj.withdrawalStatus is None: obj.withdrawalStatus = WithdrawalStatus.NOT_WITHDRAWN if obj.suspensionStatus is None: obj.suspensionStatus = SuspensionStatus.NOT_SUSPENDED super(ParticipantDao, self).insert_with_session(session, obj) history = ParticipantHistory() history.fromdict(obj.asdict(), allow_pk=True) session.add(history) return obj def insert(self, obj): if obj.participantId: assert obj.biobankId return super(ParticipantDao, self).insert(obj) assert not obj.biobankId return self._insert_with_random_id(obj, ('participantId', 'biobankId')) def update_ghost_participant(self, session, pid): if not pid: raise Forbidden('Can not update participant without id') participant = self.get_for_update(session, pid) if participant is None: logging.warn('Tried to mark participant with id: [%r] as ghost but participant does not' 'exist. Wrong environment?' % pid) else: participant.isGhostId = 1 participant.dateAddedGhost = clock.CLOCK.now() self._update_history(session, participant, participant) super(ParticipantDao, self)._do_update(session, participant, participant) def _check_if_external_id_exists(self, obj): with self.session() as session: return session.query(Participant).filter_by(externalId=obj.externalId).first() def _update_history(self, session, obj, existing_obj): # Increment the version and add a new history entry. obj.version = existing_obj.version + 1 history = ParticipantHistory() history.fromdict(obj.asdict(), allow_pk=True) session.add(history) def _validate_update(self, session, obj, existing_obj): # Withdrawal and suspension have default values assigned on insert, so they should always have # explicit values in updates. if obj.withdrawalStatus is None: raise BadRequest('missing withdrawal status in update') if obj.suspensionStatus is None: raise BadRequest('missing suspension status in update') if obj.withdrawalReason != WithdrawalReason.UNSET and obj.withdrawalReason is not None and \ obj.withdrawalReasonJustification is None: raise BadRequest('missing withdrawalReasonJustification in update') super(ParticipantDao, self)._validate_update(session, obj, existing_obj) # Once a participant marks their withdrawal status as NO_USE, it can't be changed back. # TODO: Consider the future ability to un-withdraw. if (existing_obj.withdrawalStatus == WithdrawalStatus.NO_USE and obj.withdrawalStatus != WithdrawalStatus.NO_USE): raise Forbidden('Participant %d has withdrawn, cannot unwithdraw' % obj.participantId) def get_for_update(self, session, obj_id): # Fetch the participant summary at the same time as the participant, as we are potentially # updating both. return self.get_with_session(session, obj_id, for_update=True, options=joinedload(Participant.participantSummary)) def _do_update(self, session, obj, existing_obj): """Updates the associated ParticipantSummary, and extracts HPO ID from the provider link or set pairing at another level (site/organization/awardee) with parent/child enforcement.""" obj.lastModified = clock.CLOCK.now() obj.signUpTime = existing_obj.signUpTime obj.biobankId = existing_obj.biobankId obj.withdrawalTime = existing_obj.withdrawalTime obj.suspensionTime = existing_obj.suspensionTime need_new_summary = False if obj.withdrawalStatus != existing_obj.withdrawalStatus: obj.withdrawalTime = (obj.lastModified if obj.withdrawalStatus == WithdrawalStatus.NO_USE else None) obj.withdrawalAuthored = obj.withdrawalAuthored \ if obj.withdrawalStatus == WithdrawalStatus.NO_USE else None need_new_summary = True if obj.suspensionStatus != existing_obj.suspensionStatus: obj.suspensionTime = (obj.lastModified if obj.suspensionStatus == SuspensionStatus.NO_CONTACT else None) need_new_summary = True update_pairing = True if obj.siteId is None and obj.organizationId is None and obj.hpoId is None and \ obj.providerLink == 'null': # Prevent unpairing if /PUT is sent with no pairing levels. update_pairing = False if update_pairing is True: has_id = False if obj.organizationId or obj.siteId or (obj.hpoId >= 0): has_id = True provider_link_unchanged = True if obj.providerLink is not None: if existing_obj.providerLink: provider_link_unchanged = json.loads(obj.providerLink) == \ json.loads(existing_obj.providerLink) else: provider_link_unchanged = False null_provider_link = obj.providerLink == 'null' # site,org,or awardee is sent in request: Get relationships and try to set provider link. if has_id and (provider_link_unchanged or null_provider_link): site, organization, awardee = self.get_pairing_level(obj) obj.organizationId = organization obj.siteId = site obj.hpoId = awardee if awardee is not None and (obj.hpoId != existing_obj.hpoId): # get provider link for hpo_id (awardee) obj.providerLink = make_primary_provider_link_for_id(awardee) need_new_summary = True else: # providerLink has changed # If the provider link changes, update the HPO ID on the participant and its summary. if obj.hpoId is None: obj.hpoId = existing_obj.hpoId new_hpo_id = self._get_hpo_id(obj) if new_hpo_id != existing_obj.hpoId: obj.hpoId = new_hpo_id obj.siteId = None obj.organizationId = None need_new_summary = True # No pairing updates sent, keep existing values. if update_pairing == False: obj.siteId = existing_obj.siteId obj.organizationId = existing_obj.organizationId obj.hpoId = existing_obj.hpoId obj.providerLink = existing_obj.providerLink if need_new_summary and existing_obj.participantSummary: # Copy the existing participant summary, and mutate the fields that # come from participant. summary = existing_obj.participantSummary summary.hpoId = obj.hpoId summary.organizationId = obj.organizationId summary.siteId = obj.siteId summary.withdrawalStatus = obj.withdrawalStatus summary.withdrawalReason = obj.withdrawalReason summary.withdrawalReasonJustification = obj.withdrawalReasonJustification summary.withdrawalTime = obj.withdrawalTime summary.withdrawalAuthored = obj.withdrawalAuthored summary.suspensionStatus = obj.suspensionStatus summary.suspensionTime = obj.suspensionTime summary.lastModified = clock.CLOCK.now() make_transient(summary) make_transient(obj) obj.participantSummary = summary self._update_history(session, obj, existing_obj) super(ParticipantDao, self)._do_update(session, obj, existing_obj) def get_pairing_level(self, obj): organization_id = obj.organizationId site_id = obj.siteId awardee_id = obj.hpoId # TODO: DO WE WANT TO PREVENT PAIRING IF EXISTING SITE HAS PM/BIO. if site_id != UNSET and site_id is not None: site = self.site_dao.get(site_id) if site is None: raise BadRequest('Site with site id %s does not exist.' % site_id) organization_id = site.organizationId awardee_id = site.hpoId return site_id, organization_id, awardee_id elif organization_id != UNSET and organization_id is not None: organization = self.organization_dao.get(organization_id) if organization is None: raise BadRequest('Organization with id %s does not exist.' % organization_id) awardee_id = organization.hpoId return None, organization_id, awardee_id return None, None, awardee_id @staticmethod def create_summary_for_participant(obj): return ParticipantSummary( participantId=obj.participantId, lastModified=obj.lastModified, biobankId=obj.biobankId, signUpTime=obj.signUpTime, hpoId=obj.hpoId, organizationId=obj.organizationId, siteId=obj.siteId, withdrawalStatus=obj.withdrawalStatus, withdrawalReason=obj.withdrawalReason, withdrawalReasonJustification=obj.withdrawalReasonJustification, suspensionStatus=obj.suspensionStatus, enrollmentStatus=EnrollmentStatus.INTERESTED, ehrStatus=EhrStatus.NOT_PRESENT) @staticmethod def _get_hpo_id(obj): hpo_name = _get_hpo_name_from_participant(obj) if hpo_name: hpo = HPODao().get_by_name(hpo_name) if not hpo: raise BadRequest('No HPO found with name %s' % hpo_name) return hpo.hpoId else: return UNSET_HPO_ID def validate_participant_reference(self, session, obj): """Raises BadRequest if an object has a missing or invalid participantId reference, or if the participant has a withdrawal status of NO_USE.""" if obj.participantId is None: raise BadRequest('%s.participantId required.' % obj.__class__.__name__) return self.validate_participant_id(session, obj.participantId) def validate_participant_id(self, session, participant_id): """Raises BadRequest if a participant ID is invalid, or if the participant has a withdrawal status of NO_USE.""" participant = self.get_with_session(session, participant_id) if participant is None: raise BadRequest('Participant with ID %d is not found.' % participant_id) raise_if_withdrawn(participant) return participant def get_biobank_ids_sample(self, session, percentage, batch_size): """Returns biobank ID and signUpTime for a percentage of participants. Used in generating fake biobank samples.""" return (session.query(Participant.biobankId, Participant.signUpTime) .filter(Participant.biobankId % 100 <= percentage * 100) .yield_per(batch_size)) def to_client_json(self, model): client_json = { 'participantId': to_client_participant_id(model.participantId), 'externalId': model.externalId, 'hpoId': model.hpoId, 'awardee': model.hpoId, 'organization': model.organizationId, 'siteId': model.siteId, 'biobankId': to_client_biobank_id(model.biobankId), 'lastModified': model.lastModified.isoformat(), 'signUpTime': model.signUpTime.isoformat(), 'providerLink': json.loads(model.providerLink), 'withdrawalStatus': model.withdrawalStatus, 'withdrawalReason': model.withdrawalReason, 'withdrawalReasonJustification': model.withdrawalReasonJustification, 'withdrawalTime': model.withdrawalTime, 'withdrawalAuthored': model.withdrawalAuthored, 'suspensionStatus': model.suspensionStatus, 'suspensionTime': model.suspensionTime } format_json_hpo(client_json, self.hpo_dao, 'hpoId'), format_json_org(client_json, self.organization_dao, 'organization'), format_json_site(client_json, self.site_dao, 'site'), format_json_enum(client_json, 'withdrawalStatus') format_json_enum(client_json, 'withdrawalReason') format_json_enum(client_json, 'suspensionStatus') format_json_date(client_json, 'withdrawalTime') format_json_date(client_json, 'suspensionTime') client_json['awardee'] = client_json['hpoId'] if 'siteId' in client_json: del client_json['siteId'] return client_json def from_client_json(self, resource_json, id_=None, expected_version=None, client_id=None): parse_json_enum(resource_json, 'withdrawalStatus', WithdrawalStatus) parse_json_enum(resource_json, 'withdrawalReason', WithdrawalReason) parse_json_enum(resource_json, 'suspensionStatus', SuspensionStatus) if 'withdrawalTimeStamp' in resource_json and resource_json['withdrawalTimeStamp'] is not None: try: resource_json['withdrawalTimeStamp'] = datetime.datetime\ .utcfromtimestamp(float(resource_json['withdrawalTimeStamp'])/1000) except (ValueError, TypeError): raise ValueError("Could not parse {} as TIMESTAMP" .format(resource_json['withdrawalTimeStamp'])) # biobankId, lastModified, signUpTime are set by DAO. return Participant( participantId=id_, externalId=resource_json.get('externalId'), version=expected_version, providerLink=json.dumps(resource_json.get('providerLink')), clientId=client_id, withdrawalStatus=resource_json.get('withdrawalStatus'), withdrawalReason=resource_json.get('withdrawalReason'), withdrawalAuthored=resource_json.get('withdrawalTimeStamp'), withdrawalReasonJustification=resource_json.get('withdrawalReasonJustification'), suspensionStatus=resource_json.get('suspensionStatus'), organizationId=get_organization_id_from_external_id(resource_json, self.organization_dao), hpoId=get_awardee_id_from_name(resource_json, self.hpo_dao), siteId=get_site_id_from_google_group(resource_json, self.site_dao)) def add_missing_hpo_from_site(self, session, participant_id, site_id): if site_id is None: raise BadRequest('No site ID given for auto-pairing participant.') site = SiteDao().get_with_session(session, site_id) if site is None: raise BadRequest('Invalid siteId reference %r.' % site_id) participant = self.get_for_update(session, participant_id) if participant is None: raise BadRequest('No participant %r for HPO ID udpate.' % participant_id) if participant.siteId == site.siteId: return participant.hpoId = site.hpoId participant.organizationId = site.organizationId participant.siteId = site.siteId participant.providerLink = make_primary_provider_link_for_id(site.hpoId) if participant.participantSummary is None: raise RuntimeError('No ParticipantSummary available for P%d.' % participant_id) participant.participantSummary.hpoId = site.hpoId participant.lastModified = clock.CLOCK.now() # Update the version and add history row self._do_update(session, participant, participant) def switch_to_test_account(self, session, participant): test_hpo_id = HPODao().get_by_name(TEST_HPO_NAME).hpoId if participant is None: raise BadRequest('No participant %r for HPO ID udpate.') if participant.hpoId == test_hpo_id: return participant.hpoId = test_hpo_id participant.organizationId = None participant.siteId = None # Update the version and add history row self._do_update(session, participant, participant) def handle_integrity_error(self, tried_ids, e, obj): if 'external_id' in e.message: existing_participant = self._check_if_external_id_exists(obj) if existing_participant: return existing_participant return super(ParticipantDao, self).handle_integrity_error(tried_ids, e, obj)
class MetricsEhrApiTestBase(FlaskTestBase): def setUp(self, **kwargs): super(MetricsEhrApiTestBase, self).setUp(use_mysql=True, **kwargs) self.dao = ParticipantDao() self.ps_dao = ParticipantSummaryDao() self.ehr_receipt_dao = EhrReceiptDao() self.ps = ParticipantSummary() self.calendar_dao = CalendarDao() self.site_dao = SiteDao() self.hpo_dao = HPODao() self.org_dao = OrganizationDao() self.hpo_test = self._make_hpo(hpoId=TEST_HPO_ID, name=TEST_HPO_NAME, displayName='Test', organizationType=OrganizationType.UNSET) self.hpo_foo = self._make_hpo(hpoId=10, name='FOO', displayName='Foo') self.hpo_bar = self._make_hpo(hpoId=11, name='BAR', displayName='Bar') self.org_foo_a = self._make_org(organizationId=10, externalId='FOO_A', displayName='Foo A', hpoId=self.hpo_foo.hpoId) self.org_bar_a = self._make_org(organizationId=11, externalId='BAR_A', displayName='Bar A', hpoId=self.hpo_bar.hpoId) def _make_hpo(self, **kwargs): hpo = HPO(**kwargs) self.hpo_dao.insert(hpo) return hpo def _make_org(self, **kwargs): org = Organization(**kwargs) self.org_dao.insert(org) return org def _make_participant(self, participant, first_name=None, last_name=None, hpo=None, organization=None, unconsented=False, time_int=None, time_study=None, time_mem=None, time_fp=None, time_fp_stored=None, gender_id=None, dob=None, state_id=None): """ Create a participant in a transient test database. Note: copied from ParticipantCountsOverTimeApiTest :param participant: Participant object :param first_name: First name :param last_name: Last name :param time_int: Time that participant fulfilled INTERESTED criteria :param time_mem: Time that participant fulfilled MEMBER criteria :param time_fp: Time that participant fulfilled FULL_PARTICIPANT criteria :return: Participant object """ participant.hpoId = hpo.hpoId participant.organizationId = organization.organizationId if unconsented is True: enrollment_status = None elif time_mem is None: enrollment_status = EnrollmentStatus.INTERESTED elif time_fp is None: enrollment_status = EnrollmentStatus.MEMBER else: enrollment_status = EnrollmentStatus.FULL_PARTICIPANT with FakeClock(time_int): self.dao.insert(participant) participant.providerLink = make_primary_provider_link_for_name( hpo.name) with FakeClock(time_mem): self.dao.update(participant) if enrollment_status is None: return None summary = self.participant_summary(participant) if first_name: summary.firstName = first_name if last_name: summary.lastName = last_name if gender_id: summary.genderIdentityId = gender_id if dob: summary.dateOfBirth = dob else: summary.dateOfBirth = datetime.date(1978, 10, 10) if state_id: summary.stateId = state_id summary.enrollmentStatus = enrollment_status summary.enrollmentStatusMemberTime = time_mem summary.enrollmentStatusCoreOrderedSampleTime = time_fp summary.enrollmentStatusCoreStoredSampleTime = time_fp_stored summary.hpoId = hpo.hpoId summary.organizationId = organization.organizationId if time_study is not None: with FakeClock(time_mem): summary.consentForStudyEnrollment = QuestionnaireStatus.SUBMITTED summary.consentForStudyEnrollmentTime = time_study if time_mem is not None: with FakeClock(time_mem): summary.consentForElectronicHealthRecords = QuestionnaireStatus.SUBMITTED summary.consentForElectronicHealthRecordsTime = time_mem if time_fp is not None: with FakeClock(time_fp): if not summary.consentForElectronicHealthRecords: summary.consentForElectronicHealthRecords = QuestionnaireStatus.SUBMITTED summary.consentForElectronicHealthRecordsTime = time_fp summary.questionnaireOnTheBasicsTime = time_fp summary.questionnaireOnLifestyleTime = time_fp summary.questionnaireOnOverallHealthTime = time_fp summary.physicalMeasurementsFinalizedTime = time_fp summary.physicalMeasurementsTime = time_fp summary.sampleOrderStatus1ED04Time = time_fp summary.sampleOrderStatus1SALTime = time_fp summary.sampleStatus1ED04Time = time_fp summary.sampleStatus1SALTime = time_fp summary.biospecimenOrderTime = time_fp summary.numCompletedBaselinePPIModules = REQUIRED_PPI_MODULE_COUNT self.ps_dao.insert(summary) return summary def _update_ehr(self, participant_summary, update_time): receipt = EhrReceipt(organizationId=participant_summary.organizationId, receiptTime=update_time) self.ehr_receipt_dao.insert(receipt) self.ps_dao.update_ehr_status(participant_summary, update_time) self.ps_dao.update(participant_summary)
class ParticipantSummaryDao(UpdatableDao): def __init__(self): super(ParticipantSummaryDao, self).__init__(ParticipantSummary, order_by_ending=_ORDER_BY_ENDING) self.hpo_dao = HPODao() self.code_dao = CodeDao() self.site_dao = SiteDao() self.organization_dao = OrganizationDao() def get_id(self, obj): return obj.participantId def _validate_update(self, session, obj, existing_obj): # pylint: disable=unused-argument """Participant summaries don't have a version value; drop it from validation logic.""" if not existing_obj: raise NotFound('%s with id %s does not exist' % (self.model_type.__name__, id)) def _has_withdrawn_filter(self, query): for field_filter in query.field_filters: if (field_filter.field_name == 'withdrawalStatus' and field_filter.value == WithdrawalStatus.NO_USE): return True if field_filter.field_name == 'withdrawalTime' and field_filter.value is not None: return True return False def _get_non_withdrawn_filter_field(self, query): """Returns the first field referenced in query filters which isn't in WITHDRAWN_PARTICIPANT_FIELDS.""" for field_filter in query.field_filters: if not field_filter.field_name in WITHDRAWN_PARTICIPANT_FIELDS: return field_filter.field_name return None def _initialize_query(self, session, query_def): non_withdrawn_field = self._get_non_withdrawn_filter_field(query_def) if self._has_withdrawn_filter(query_def): if non_withdrawn_field: raise BadRequest( "Can't query on %s for withdrawn participants" % non_withdrawn_field) # When querying for withdrawn participants, ensure that the only fields being filtered on or # ordered by are in WITHDRAWN_PARTICIPANT_FIELDS. return super(ParticipantSummaryDao, self)._initialize_query(session, query_def) else: query = super(ParticipantSummaryDao, self)._initialize_query(session, query_def) if non_withdrawn_field: # When querying on fields that aren't available for withdrawn participants, # ensure that we only return participants # who have not withdrawn or withdrew in the past 48 hours. withdrawn_visible_start = clock.CLOCK.now( ) - WITHDRAWN_PARTICIPANT_VISIBILITY_TIME return query.filter( or_( ParticipantSummary.withdrawalStatus != WithdrawalStatus.NO_USE, ParticipantSummary.withdrawalTime >= withdrawn_visible_start)) else: # When querying on fields that are available for withdrawn participants, return everybody; # withdrawn participants will have all but WITHDRAWN_PARTICIPANT_FIELDS cleared out 48 # hours after withdrawing. return query def _get_order_by_ending(self, query): if self._has_withdrawn_filter(query): return _WITHDRAWN_ORDER_BY_ENDING return self.order_by_ending def _add_order_by(self, query, order_by, field_names, fields): if order_by.field_name in _CODE_FILTER_FIELDS: return super(ParticipantSummaryDao, self)._add_order_by( query, OrderBy(order_by.field_name + 'Id', order_by.ascending), field_names, fields) return super(ParticipantSummaryDao, self)._add_order_by(query, order_by, field_names, fields) def make_query_filter(self, field_name, value): """Handle HPO and code values when parsing filter values.""" if field_name == 'biobankId': value = from_client_biobank_id(value, log_exception=True) if field_name == 'hpoId' or field_name == 'awardee': hpo = self.hpo_dao.get_by_name(value) if not hpo: raise BadRequest('No HPO found with name %s' % value) if field_name == 'awardee': field_name = 'hpoId' return super(ParticipantSummaryDao, self).make_query_filter(field_name, hpo.hpoId) if field_name == 'organization': organization = self.organization_dao.get_by_external_id(value) if not organization: raise BadRequest('No organization found with name %s' % value) return super(ParticipantSummaryDao, self).make_query_filter(field_name + 'Id', organization.organizationId) if field_name in _SITE_FIELDS: if value == UNSET: return super(ParticipantSummaryDao, self).make_query_filter(field_name + 'Id', None) site = self.site_dao.get_by_google_group(value) if not site: raise BadRequest('No site found with google group %s' % value) return super(ParticipantSummaryDao, self).make_query_filter(field_name + 'Id', site.siteId) if field_name in _CODE_FILTER_FIELDS: if value == UNSET: return super(ParticipantSummaryDao, self).make_query_filter(field_name + 'Id', None) # Note: we do not at present support querying for UNMAPPED code values. code = self.code_dao.get_code(PPI_SYSTEM, value) if not code: raise BadRequest('No code found: %s' % value) return super(ParticipantSummaryDao, self).make_query_filter(field_name + 'Id', code.codeId) return super(ParticipantSummaryDao, self).make_query_filter(field_name, value) def update_from_biobank_stored_samples(self, participant_id=None): """Rewrites sample-related summary data. Call this after updating BiobankStoredSamples. If participant_id is provided, only that participant will have their summary updated.""" now = clock.CLOCK.now() sample_sql, sample_params = _get_sample_sql_and_params(now) baseline_tests_sql, baseline_tests_params = _get_baseline_sql_and_params( ) dna_tests_sql, dna_tests_params = _get_dna_isolates_sql_and_params() sample_status_time_sql = _get_sample_status_time_sql_and_params() sample_status_time_params = {} counts_sql = """ UPDATE participant_summary SET num_baseline_samples_arrived = {baseline_tests_sql}, samples_to_isolate_dna = {dna_tests_sql}, last_modified = :now WHERE num_baseline_samples_arrived != {baseline_tests_sql} OR samples_to_isolate_dna != {dna_tests_sql} """.format(baseline_tests_sql=baseline_tests_sql, dna_tests_sql=dna_tests_sql) counts_params = {'now': now} counts_params.update(baseline_tests_params) counts_params.update(dna_tests_params) enrollment_status_sql = _ENROLLMENT_STATUS_SQL enrollment_status_params = { 'submitted': int(QuestionnaireStatus.SUBMITTED), 'unset': int(QuestionnaireStatus.UNSET), 'num_baseline_ppi_modules': self._get_num_baseline_ppi_modules(), 'completed': int(PhysicalMeasurementsStatus.COMPLETED), 'received': int(SampleStatus.RECEIVED), 'full_participant': int(EnrollmentStatus.FULL_PARTICIPANT), 'member': int(EnrollmentStatus.MEMBER), 'interested': int(EnrollmentStatus.INTERESTED), 'now': now } # If participant_id is provided, add the participant ID filter to all update statements. if participant_id: sample_sql += ' AND participant_id = :participant_id' sample_params['participant_id'] = participant_id counts_sql += ' AND participant_id = :participant_id' counts_params['participant_id'] = participant_id enrollment_status_sql += ' AND participant_id = :participant_id' enrollment_status_params['participant_id'] = participant_id sample_status_time_sql += ' AND a.participant_id = :participant_id' sample_status_time_params['participant_id'] = participant_id sample_sql = replace_null_safe_equals(sample_sql) counts_sql = replace_null_safe_equals(counts_sql) with self.session() as session: session.execute(sample_sql, sample_params) session.execute(counts_sql, counts_params) session.execute(enrollment_status_sql, enrollment_status_params) # TODO: Change this to the optimized sql in _update_dv_stored_samples() session.execute(sample_status_time_sql, sample_status_time_params) def _get_num_baseline_ppi_modules(self): return len( config.getSettingList(config.BASELINE_PPI_QUESTIONNAIRE_FIELDS)) def update_enrollment_status(self, summary): """Updates the enrollment status field on the provided participant summary to the correct value based on the other fields on it. Called after a questionnaire response or physical measurements are submitted.""" consent = (summary.consentForStudyEnrollment == QuestionnaireStatus.SUBMITTED and summary.consentForElectronicHealthRecords == QuestionnaireStatus.SUBMITTED) or \ (summary.consentForStudyEnrollment == QuestionnaireStatus.SUBMITTED and summary.consentForElectronicHealthRecords is None and summary.consentForDvElectronicHealthRecordsSharing == QuestionnaireStatus.SUBMITTED) enrollment_status = self.calculate_enrollment_status( consent, summary.numCompletedBaselinePPIModules, summary.physicalMeasurementsStatus, summary.samplesToIsolateDNA) summary.enrollmentStatusMemberTime = self.calculate_member_time( consent, summary) summary.enrollmentStatusCoreOrderedSampleTime = self.calculate_core_ordered_sample_time( consent, summary) summary.enrollmentStatusCoreStoredSampleTime = self.calculate_core_stored_sample_time( consent, summary) # Update last modified date if status changes if summary.enrollmentStatus != enrollment_status: summary.lastModified = clock.CLOCK.now() summary.enrollmentStatus = enrollment_status def calculate_enrollment_status(self, consent, num_completed_baseline_ppi_modules, physical_measurements_status, samples_to_isolate_dna): if consent: if (num_completed_baseline_ppi_modules == self._get_num_baseline_ppi_modules() and physical_measurements_status == PhysicalMeasurementsStatus.COMPLETED and samples_to_isolate_dna == SampleStatus.RECEIVED): return EnrollmentStatus.FULL_PARTICIPANT return EnrollmentStatus.MEMBER return EnrollmentStatus.INTERESTED def calculate_member_time(self, consent, participant_summary): if consent and participant_summary.enrollmentStatusMemberTime is not None: return participant_summary.enrollmentStatusMemberTime elif consent: if participant_summary.consentForElectronicHealthRecords is None and \ participant_summary.consentForDvElectronicHealthRecordsSharing == \ QuestionnaireStatus.SUBMITTED: return participant_summary.consentForDvElectronicHealthRecordsSharingTime return participant_summary.consentForElectronicHealthRecordsTime else: return None def calculate_core_stored_sample_time(self, consent, participant_summary): if consent and \ participant_summary.numCompletedBaselinePPIModules == \ self._get_num_baseline_ppi_modules() and \ participant_summary.physicalMeasurementsStatus == PhysicalMeasurementsStatus.COMPLETED and \ participant_summary.samplesToIsolateDNA == SampleStatus.RECEIVED: max_core_sample_time = self.calculate_max_core_sample_time( participant_summary, field_name_prefix='sampleStatus') if max_core_sample_time and participant_summary.enrollmentStatusCoreStoredSampleTime: return participant_summary.enrollmentStatusCoreStoredSampleTime else: return max_core_sample_time else: return None def calculate_core_ordered_sample_time(self, consent, participant_summary): if consent and \ participant_summary.numCompletedBaselinePPIModules == \ self._get_num_baseline_ppi_modules() and \ participant_summary.physicalMeasurementsStatus == PhysicalMeasurementsStatus.COMPLETED: max_core_sample_time = self.calculate_max_core_sample_time( participant_summary, field_name_prefix='sampleOrderStatus') if max_core_sample_time and participant_summary.enrollmentStatusCoreOrderedSampleTime: return participant_summary.enrollmentStatusCoreOrderedSampleTime else: return max_core_sample_time else: return None def calculate_max_core_sample_time(self, participant_summary, field_name_prefix='sampleStatus'): keys = [ field_name_prefix + '%sTime' % test for test in config.getSettingList(config.DNA_SAMPLE_TEST_CODES) ] sample_time_list = \ [v for k, v in participant_summary if k in keys and v is not None] sample_time = min(sample_time_list) if sample_time_list else None if sample_time is not None: return max([ sample_time, participant_summary.enrollmentStatusMemberTime, participant_summary.questionnaireOnTheBasicsTime, participant_summary.questionnaireOnLifestyleTime, participant_summary.questionnaireOnOverallHealthTime, participant_summary.physicalMeasurementsFinalizedTime ]) else: return None def calculate_distinct_visits(self, pid, finalized_time, id_, amendment=False): """ Participants may get PM or biobank samples on same day. This should be considered as a single visit in terms of program payment to participant. return Boolean: true if there has not been an order on same date.""" from dao.biobank_order_dao import BiobankOrderDao from dao.physical_measurements_dao import PhysicalMeasurementsDao day_has_order, day_has_measurement = False, False existing_orders = BiobankOrderDao().get_biobank_orders_for_participant( pid) ordered_samples = BiobankOrderDao( ).get_ordered_samples_for_participant(pid) existing_measurements = PhysicalMeasurementsDao( ).get_measuremnets_for_participant(pid) order_id_to_finalized_date = { sample.biobankOrderId: sample.finalized.date() for sample in ordered_samples if sample.finalized } if existing_orders and finalized_time: for order in existing_orders: order_finalized_date = order_id_to_finalized_date.get( order.biobankOrderId) if order_finalized_date == finalized_time.date() and order.biobankOrderId != id_ and \ order.orderStatus != BiobankOrderStatus.CANCELLED: day_has_order = True elif order.biobankOrderId == id_ and order.orderStatus == BiobankOrderStatus.AMENDED: day_has_order = True elif not finalized_time and amendment: day_has_order = True if existing_measurements and finalized_time: for measurement in existing_measurements: if not measurement.finalized: continue if measurement.finalized.date() == finalized_time.date() and measurement.physicalMeasurementsId\ != id_: day_has_measurement = True is_distinct_visit = not (day_has_order or day_has_measurement) return is_distinct_visit def to_client_json(self, model): result = model.asdict() # Participants that withdrew more than 48 hours ago should have fields other than # WITHDRAWN_PARTICIPANT_FIELDS cleared. if (model.withdrawalStatus == WithdrawalStatus.NO_USE and (model.withdrawalTime is None or model.withdrawalTime < clock.CLOCK.now() - WITHDRAWN_PARTICIPANT_VISIBILITY_TIME)): result = {k: result.get(k) for k in WITHDRAWN_PARTICIPANT_FIELDS} elif model.withdrawalStatus != WithdrawalStatus.NO_USE and \ model.suspensionStatus == SuspensionStatus.NO_CONTACT: for i in SUSPENDED_PARTICIPANT_FIELDS: result[i] = UNSET result['participantId'] = to_client_participant_id(model.participantId) biobank_id = result.get('biobankId') if biobank_id: result['biobankId'] = to_client_biobank_id(biobank_id) date_of_birth = result.get('dateOfBirth') if date_of_birth: result['ageRange'] = get_bucketed_age(date_of_birth, clock.CLOCK.now()) else: result['ageRange'] = UNSET if result.get('primaryLanguage') is None: result['primaryLanguage'] = UNSET if 'organizationId' in result: result['organization'] = result['organizationId'] del result['organizationId'] format_json_org(result, self.organization_dao, 'organization') format_json_hpo(result, self.hpo_dao, 'hpoId') result['awardee'] = result['hpoId'] _initialize_field_type_sets() for fieldname in _DATE_FIELDS: format_json_date(result, fieldname) for fieldname in _CODE_FIELDS: format_json_code(result, self.code_dao, fieldname) for fieldname in _ENUM_FIELDS: format_json_enum(result, fieldname) for fieldname in _SITE_FIELDS: format_json_site(result, self.site_dao, fieldname) if (model.withdrawalStatus == WithdrawalStatus.NO_USE or model.suspensionStatus == SuspensionStatus.NO_CONTACT): result['recontactMethod'] = 'NO_CONTACT' # Strip None values. result = {k: v for k, v in result.iteritems() if v is not None} return result def _decode_token(self, query_def, fields): """ If token exists in participant_summary api, decode and use lastModified to add a buffer of 60 seconds. This ensures when a _sync link is used no one is missed. This will return at a minimum, the last participant and any more that have been modified in the previous 60 seconds. Duplicate participants returned should be handled on the client side.""" decoded_vals = super(ParticipantSummaryDao, self)._decode_token(query_def, fields) if query_def.order_by and (query_def.order_by.field_name == 'lastModified' and query_def.always_return_token is True and query_def.backfill_sync is True): decoded_vals[0] = decoded_vals[0] - datetime.timedelta( seconds=config.LAST_MODIFIED_BUFFER_SECONDS) return decoded_vals @staticmethod def update_ehr_status(summary, update_time): summary.ehrStatus = EhrStatus.PRESENT if not summary.ehrReceiptTime: summary.ehrReceiptTime = update_time summary.ehrUpdateTime = update_time return summary
class SiteImporter(CsvImporter): def __init__(self): args = parser.parse_args() self.organization_dao = OrganizationDao() self.stub_geocoding = args.stub_geocoding self.ACTIVE = SiteStatus.ACTIVE self.status_exception_list = ['hpo-site-walgreensphoenix'] self.instance = args.instance self.creds_file = args.creds_file self.new_sites_list = [] self.project = None if args.project: self.project = args.project if self.project in ENV_LIST: self.environment = ' ' + self.project.split('-')[-1].upper() else: self.environment = ' ' + ENV_TEST.split('-')[-1].upper() super(SiteImporter, self).__init__('site', SiteDao(), 'siteId', 'googleGroup', [ SITE_ORGANIZATION_ID_COLUMN, SITE_SITE_ID_COLUMN, SITE_SITE_COLUMN, SITE_STATUS_COLUMN + self.environment, ENROLLING_STATUS_COLUMN + self.environment, DIGITAL_SCHEDULING_STATUS_COLUMN + self.environment ]) def run(self, filename, dry_run): super(SiteImporter, self).run(filename, dry_run) insert_participants = False if not dry_run: if self.environment: current_env = ENV_STABLE if self.environment.strip() == 'STABLE' and len( self.new_sites_list) > 0: from googleapiclient.discovery import build logging.info( 'Starting reboot of app instances to insert new test participants' ) service = build('appengine', 'v1', cache_discovery=False) request = service.apps().services().versions().list( appsId=current_env, servicesId='default') versions = request.execute() for version in versions['versions']: if version['servingStatus'] == 'SERVING': _id = version['id'] request = service.apps().services().versions( ).instances().list(servicesId='default', versionsId=_id, appsId=current_env) instances = request.execute() try: for instance in instances['instances']: sha = instance['name'].split('/')[-1] delete_instance = service.apps().services( ).versions().instances().delete( appsId=current_env, servicesId='default', versionsId=_id, instancesId=sha) response = delete_instance.execute() if response['done']: insert_participants = True logging.info( 'Reboot of instance: %s in stable complete.', instance['name']) else: logging.warn( 'Not able to reboot instance on server, Error: %s', response) except KeyError: logging.warn('No running instance for %s', version['name']) if insert_participants: logging.info('Starting import of test participants.') self._insert_new_participants(self.new_sites_list) def delete_sql_statement(self, session, str_list): sql = """ DELETE FROM site WHERE site_id IN ({str_list}) AND NOT EXISTS( SELECT * FROM participant WHERE site_id = site.site_id) AND NOT EXISTS( SELECT * FROM participant_summary WHERE site_id = site.site_id OR physical_measurements_finalized_site_id = site.site_id OR physical_measurements_created_site_id = site.site_id OR biospecimen_source_site_id = site.site_id OR biospecimen_collected_site_id = site.site_id OR biospecimen_processed_site_id = site.site_id OR biospecimen_finalized_site_id = site.site_id ) AND NOT EXISTS( SELECT * FROM participant_history WHERE site_id = site.site_id) AND NOT EXISTS( SELECT * FROM physical_measurements WHERE created_site_id = site.site_id OR finalized_site_id = site.site_id) AND NOT EXISTS( SELECT * FROM biobank_order WHERE finalized_site_id = site.site_id OR source_site_id = site.site_id OR collected_site_id = site.site_id OR processed_site_id = site.site_id ) """.format(str_list=str_list) session.execute(sql) def _cleanup_old_entities(self, session, row_list, dry_run): log_prefix = '(dry run) ' if dry_run else '' self.site_dao = SiteDao() existing_sites = set(site.googleGroup for site in self.site_dao.get_all()) site_group_list_from_sheet = [ str(row[SITE_SITE_ID_COLUMN].lower()) for row in row_list ] sites_to_remove = existing_sites - set(site_group_list_from_sheet) if sites_to_remove: site_id_list = [] for site in sites_to_remove: logging.info( log_prefix + 'Deleting old Site no longer in Google sheet: %s', site) old_site = self.site_dao.get_by_google_group(site) if old_site and old_site.isObsolete != ObsoleteStatus.OBSOLETE: site_id_list.append(old_site.siteId) self.deletion_count += 1 elif old_site and old_site.isObsolete == ObsoleteStatus.OBSOLETE: logging.info( 'Not attempting to delete site [%s] with existing obsolete status', old_site.googleGroup) if site_id_list and not dry_run: str_list = ','.join([str(i) for i in site_id_list]) logging.info(log_prefix + 'Marking old site as obsolete : %s', old_site) sql = """ UPDATE site SET is_obsolete = 1 WHERE site_id in ({site_id_list})""".format(site_id_list=str_list) session.execute(sql) self.site_dao._invalidate_cache() # Try to delete old sites. self.delete_sql_statement(session, str_list) def _insert_new_participants(self, entity): num_participants = 0 participants = { 'zip_code': '20001', 'date_of_birth': '1933-3-3', 'gender_identity': 'GenderIdentity_Woman', 'withdrawalStatus': 'NOT_WITHDRAWN', 'suspensionStatus': 'NOT_SUSPENDED' } client = Client('rdr/v1', False, self.creds_file, self.instance) client_log.setLevel(logging.WARN) questionnaire_to_questions, consent_questionnaire_id_and_version = _setup_questionnaires( client) consent_questions = questionnaire_to_questions[ consent_questionnaire_id_and_version] for site in entity: for participant, v in enumerate(range(1, 21), 1): num_participants += 1 participant = participants participant.update( {'last_name': site.googleGroup.split('-')[-1]}) participant.update({'first_name': 'Participant {}'.format(v)}) participant.update({'site': site.googleGroup}) import_participant(participant, client, consent_questionnaire_id_and_version, questionnaire_to_questions, consent_questions, num_participants) logging.info('%d participants imported.' % num_participants) def _entity_from_row(self, row): google_group = row[SITE_SITE_ID_COLUMN].lower() organization = self.organization_dao.get_by_external_id( row[SITE_ORGANIZATION_ID_COLUMN].upper()) if organization is None: logging.warn('Invalid organization ID %s importing site %s', row[SITE_ORGANIZATION_ID_COLUMN].upper(), google_group) self.errors.append( 'Invalid organization ID {} importing site {}'.format( row[SITE_ORGANIZATION_ID_COLUMN].upper(), google_group)) return None launch_date = None launch_date_str = row.get(SITE_LAUNCH_DATE_COLUMN) if launch_date_str: try: launch_date = parse(launch_date_str).date() except ValueError: logging.warn('Invalid launch date %s for site %s', launch_date_str, google_group) self.errors.append('Invalid launch date {} for site {}'.format( launch_date_str, google_group)) return None name = row[SITE_SITE_COLUMN] mayolink_client_number = None mayolink_client_number_str = row.get( SITE_MAYOLINK_CLIENT_NUMBER_COLUMN) if mayolink_client_number_str: try: mayolink_client_number = int(mayolink_client_number_str) except ValueError: logging.warn('Invalid Mayolink Client # %s for site %s', mayolink_client_number_str, google_group) self.errors.append( 'Invalid Mayolink Client # {} for site {}'.format( mayolink_client_number_str, google_group)) return None notes = row.get(SITE_NOTES_COLUMN) notes_es = row.get(SITE_NOTES_COLUMN_ES) try: site_status = SiteStatus(row[SITE_STATUS_COLUMN + self.environment].upper()) except TypeError: logging.warn('Invalid site status %s for site %s', row[SITE_STATUS_COLUMN + self.environment], google_group) self.errors.append('Invalid site status {} for site {}'.format( row[SITE_STATUS_COLUMN + self.environment], google_group)) return None try: enrolling_status = EnrollingStatus(row[ENROLLING_STATUS_COLUMN + self.environment].upper()) except TypeError: logging.warn('Invalid enrollment site status %s for site %s', row[ENROLLING_STATUS_COLUMN + self.environment], google_group) self.errors.append( 'Invalid enrollment site status {} for site {}'.format( row[ENROLLING_STATUS_COLUMN + self.environment], google_group)) directions = row.get(SITE_DIRECTIONS_COLUMN) physical_location_name = row.get(SITE_PHYSICAL_LOCATION_NAME_COLUMN) address_1 = row.get(SITE_ADDRESS_1_COLUMN) address_2 = row.get(SITE_ADDRESS_2_COLUMN) city = row.get(SITE_CITY_COLUMN) state = row.get(SITE_STATE_COLUMN) zip_code = row.get(SITE_ZIP_COLUMN) phone = row.get(SITE_PHONE_COLUMN) admin_email_addresses = row.get(SITE_ADMIN_EMAIL_ADDRESSES_COLUMN) link = row.get(SITE_LINK_COLUMN) digital_scheduling_status = DigitalSchedulingStatus( row[DIGITAL_SCHEDULING_STATUS_COLUMN + self.environment].upper()) schedule_instructions = row.get(SCHEDULING_INSTRUCTIONS) schedule_instructions_es = row.get(SCHEDULING_INSTRUCTIONS_ES) return Site(siteName=name, googleGroup=google_group, mayolinkClientNumber=mayolink_client_number, organizationId=organization.organizationId, hpoId=organization.hpoId, siteStatus=site_status, enrollingStatus=enrolling_status, digitalSchedulingStatus=digital_scheduling_status, scheduleInstructions=schedule_instructions, scheduleInstructions_ES=schedule_instructions_es, launchDate=launch_date, notes=notes, notes_ES=notes_es, directions=directions, physicalLocationName=physical_location_name, address1=address_1, address2=address_2, city=city, state=state, zipCode=zip_code, phoneNumber=phone, adminEmails=admin_email_addresses, link=link) def _update_entity(self, entity, existing_entity, session, dry_run): self._populate_lat_lng_and_time_zone(entity, existing_entity) if entity.siteStatus == self.ACTIVE and (entity.latitude == None or entity.longitude == None): self.errors.append( 'Skipped active site without geocoding: {}'.format( entity.googleGroup)) return None, True return super(SiteImporter, self)._update_entity(entity, existing_entity, session, dry_run) def _insert_entity(self, entity, existing_map, session, dry_run): self._populate_lat_lng_and_time_zone(entity, None) if entity.siteStatus == self.ACTIVE and (entity.latitude == None or entity.longitude == None): self.errors.append( 'Skipped active site without geocoding: {}'.format( entity.googleGroup)) return False self.new_sites_list.append(entity) super(SiteImporter, self)._insert_entity(entity, existing_map, session, dry_run) def _populate_lat_lng_and_time_zone(self, site, existing_site): if site.address1 and site.city and site.state: if existing_site: if (existing_site.address1 == site.address1 and existing_site.city == site.city and existing_site.state == site.state and existing_site.latitude is not None and existing_site.longitude is not None and existing_site.timeZoneId is not None): # Address didn't change, use the existing lat/lng and time zone. site.latitude = existing_site.latitude site.longitude = existing_site.longitude site.timeZoneId = existing_site.timeZoneId return if self.stub_geocoding: # Set dummy latitude and longitude when importing sites locally / on a CircleCI box. site.latitude = 32.176 site.longitude = -110.93 site.timeZoneId = 'America/Phoenix' else: latitude, longitude = self._get_lat_long_for_site( site.address1, site.city, site.state) site.latitude = latitude site.longitude = longitude if latitude and longitude: site.timeZoneId = self._get_time_zone(latitude, longitude) else: if site.googleGroup not in self.status_exception_list: if site.siteStatus == self.ACTIVE: self.errors.append( 'Active site must have valid address. Site: {}, Group: {}' .format(site.siteName, site.googleGroup)) def _get_lat_long_for_site(self, address_1, city, state): self.full_address = address_1 + ' ' + city + ' ' + state try: self.api_key = os.environ.get('API_KEY') self.gmaps = googlemaps.Client(key=self.api_key) try: geocode_result = self.gmaps.geocode(address_1 + '' + city + ' ' + state)[0] except IndexError: self.errors.append( 'Bad address for {}, could not geocode.'.format( self.full_address)) return None, None if geocode_result: geometry = geocode_result.get('geometry') if geometry: location = geometry.get('location') if location: latitude = location.get('lat') longitude = location.get('lng') return latitude, longitude else: logging.warn('Can not find lat/long for %s', self.full_address) self.errors.append('Can not find lat/long for {}'.format( self.full_address)) return None, None else: logging.warn('Geocode results failed for %s.', self.full_address) self.errors.append('Geocode results failed for {}'.format( self.full_address)) return None, None except ValueError as e: logging.exception('Invalid geocode key: %s. ERROR: %s', self.api_key, e) self.errors.append('Invalid geocode key: {}. ERROR: {}'.format( self.api_key, e)) return None, None except IndexError as e: logging.exception( 'Geocoding failure Check that address is correct. ERROR: %s', e) self.errors.append( 'Geocoding failured Check that address is correct. ERROR: {}'. format(self.api_key, e)) return None, None def _get_time_zone(self, latitude, longitude): time_zone = self.gmaps.timezone(location=(latitude, longitude)) if time_zone['status'] == 'OK': time_zone_id = time_zone['timeZoneId'] return time_zone_id else: logging.info('can not retrieve time zone from %s', self.full_address) self.errors.append('Can not retrieve time zone from {}'.format( self.full_address)) return None
def setUp(self): super(OrganizationDaoTest, self).setUp() self.organization_dao = OrganizationDao() self.participant_dao = ParticipantDao() self.ps_dao = ParticipantSummaryDao() self.ps_history = ParticipantHistoryDao()
class OrganizationDaoTest(SqlTestBase): def setUp(self): super(OrganizationDaoTest, self).setUp() self.organization_dao = OrganizationDao() self.participant_dao = ParticipantDao() self.ps_dao = ParticipantSummaryDao() self.ps_history = ParticipantHistoryDao() def test_insert(self): organization = Organization(externalId='myorg', displayName='myorg_display', hpoId=PITT_HPO_ID, isObsolete=1) created_organization = self.organization_dao.insert(organization) new_organization = self.organization_dao.get(created_organization.organizationId) organization.organizationId = created_organization.organizationId organization.isObsolete = new_organization.isObsolete self.assertEquals(organization.asdict(), new_organization.asdict()) def test_participant_pairing_updates_onchange(self): provider_link = '[{"organization": {"reference": "Organization/AZ_TUCSON"}, "primary": true}]' TIME = datetime.datetime(2018, 1, 1) TIME2 = datetime.datetime(2018, 1, 2) insert_org = self.organization_dao.insert( Organization(externalId='tardis', displayName='bluebox', hpoId=PITT_HPO_ID)) with FakeClock(TIME): self.participant_dao.insert(Participant(participantId=1, biobankId=2)) participant = self.participant_dao.get(1) participant.organizationId = insert_org.organizationId self.participant_dao.update(participant) self.assertEquals(participant.hpoId, insert_org.hpoId) participant = self.participant_dao.get(1) p_summary = self.ps_dao.insert(self.participant_summary(participant)) with FakeClock(TIME2): insert_org.hpoId = AZ_HPO_ID self.organization_dao.update(insert_org) new_org = self.organization_dao.get_by_external_id('tardis') ps = self.ps_dao.get(p_summary.participantId) ph = self.ps_history.get([participant.participantId, 2]) participant = self.participant_dao.get(1) self.assertEquals(ps.lastModified, TIME2) self.assertEquals(ps.hpoId, new_org.hpoId) self.assertEquals(ph.hpoId, insert_org.hpoId) self.assertEquals(ph.organizationId, insert_org.organizationId) self.assertEquals(new_org.hpoId, participant.hpoId) self.assertEquals(new_org.organizationId, participant.organizationId) self.assertIsNone(participant.siteId) self.assertEquals(participant.providerLink, provider_link) def test_participant_different_hpo_does_not_change(self): insert_org = self.organization_dao.insert( Organization(externalId='stark_industries', displayName='ironman', hpoId=PITT_HPO_ID)) self.participant_dao.insert(Participant(participantId=1, biobankId=2)) participant = self.participant_dao.get(1) participant.hpoId = UNSET_HPO_ID self.participant_dao.update(participant) insert_org.hpoId = AZ_HPO_ID self.organization_dao.update(insert_org) new_org = self.organization_dao.get_by_external_id('stark_industries') participant = self.participant_dao.get(1) self.assertNotEqual(new_org.hpoId, participant.hpoId) self.assertEqual(new_org.hpoId, AZ_HPO_ID) self.assertEqual(participant.hpoId, UNSET_HPO_ID)
def __init__(self): super(OrganizationHierarchySyncDao, self).__init__(HPO) self.hpo_dao = HPODao() self.organization_dao = OrganizationDao() self.site_dao = SiteDao()
class OrganizationHierarchySyncDao(BaseDao): def __init__(self): super(OrganizationHierarchySyncDao, self).__init__(HPO) self.hpo_dao = HPODao() self.organization_dao = OrganizationDao() self.site_dao = SiteDao() def from_client_json(self, resource_json, id_=None, expected_version=None, client_id=None): # pylint: disable=unused-argument try: fhir_org = lib_fhir.fhirclient_3_0_0.models.organization.Organization( resource_json) except FHIRValidationError: raise BadRequest('Invalid FHIR format in payload data.') if not fhir_org.meta or not fhir_org.meta.versionId: raise BadRequest('No versionId info found in payload data.') try: fhir_org.version = int(fhir_org.meta.versionId) except ValueError: raise BadRequest('Invalid versionId in payload data.') return fhir_org def to_client_json(self, hierarchy_org_obj): return hierarchy_org_obj.as_json() def get_etag(self, id_, pid): # pylint: disable=unused-argument return None def update(self, hierarchy_org_obj): obj_type = self._get_type(hierarchy_org_obj) operation_funcs = { 'AWARDEE': self._update_awardee, 'ORGANIZATION': self._update_organization, 'SITE': self._update_site } if obj_type not in operation_funcs: raise BadRequest('No awardee-type info found in payload data.') operation_funcs[obj_type](hierarchy_org_obj) def _update_awardee(self, hierarchy_org_obj): if hierarchy_org_obj.id is None: raise BadRequest('No id found in payload data.') awardee_id = self._get_value_from_identifier( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'awardee-id') if awardee_id is None: raise BadRequest( 'No organization-identifier info found in payload data.') is_obsolete = ObsoleteStatus( 'OBSOLETE') if not hierarchy_org_obj.active else None awardee_type = self._get_value_from_extention( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'awardee-type') try: organization_type = OrganizationType(awardee_type) if organization_type == OrganizationType.UNSET: organization_type = None except TypeError: raise BadRequest( 'Invalid organization type {} for awardee {}'.format( awardee_type, awardee_id)) entity = HPO(name=awardee_id.upper(), displayName=hierarchy_org_obj.name, organizationType=organization_type, isObsolete=is_obsolete, resourceId=hierarchy_org_obj.id) existing_map = { entity.name: entity for entity in self.hpo_dao.get_all() } existing_entity = existing_map.get(entity.name) with self.hpo_dao.session() as session: if existing_entity: hpo_id = existing_entity.hpoId new_dict = entity.asdict() new_dict['hpoId'] = None existing_dict = existing_entity.asdict() existing_dict['hpoId'] = None if existing_dict == new_dict: logging.info('Not updating {}.'.format(new_dict['name'])) else: existing_entity.displayName = entity.displayName existing_entity.organizationType = entity.organizationType existing_entity.isObsolete = entity.isObsolete existing_entity.resourceId = entity.resourceId self.hpo_dao.update_with_session(session, existing_entity) else: entity.hpoId = len(existing_map) hpo_id = entity.hpoId self.hpo_dao.insert_with_session(session, entity) bq_hpo_update_by_id(hpo_id) def _update_organization(self, hierarchy_org_obj): if hierarchy_org_obj.id is None: raise BadRequest('No id found in payload data.') organization_id = self._get_value_from_identifier( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'organization-id') if organization_id is None: raise BadRequest( 'No organization-identifier info found in payload data.') is_obsolete = ObsoleteStatus( 'OBSOLETE') if not hierarchy_org_obj.active else None resource_id = self._get_reference(hierarchy_org_obj) hpo = self.hpo_dao.get_by_resource_id(resource_id) if hpo is None: raise BadRequest( 'Invalid partOf reference {} importing organization {}'.format( resource_id, organization_id)) entity = Organization(externalId=organization_id.upper(), displayName=hierarchy_org_obj.name, hpoId=hpo.hpoId, isObsolete=is_obsolete, resourceId=hierarchy_org_obj.id) existing_map = { entity.externalId: entity for entity in self.organization_dao.get_all() } existing_entity = existing_map.get(entity.externalId) with self.organization_dao.session() as session: if existing_entity: new_dict = entity.asdict() new_dict['organizationId'] = None existing_dict = existing_entity.asdict() existing_dict['organizationId'] = None if existing_dict == new_dict: logging.info('Not updating {}.'.format( new_dict['externalId'])) else: existing_entity.displayName = entity.displayName existing_entity.hpoId = entity.hpoId existing_entity.isObsolete = entity.isObsolete existing_entity.resourceId = entity.resourceId self.organization_dao.update_with_session( session, existing_entity) else: self.organization_dao.insert_with_session(session, entity) org_id = self.organization_dao.get_by_external_id( organization_id.upper()).organizationId bq_organization_update_by_id(org_id) def _update_site(self, hierarchy_org_obj): if hierarchy_org_obj.id is None: raise BadRequest('No id found in payload data.') google_group = self._get_value_from_identifier( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'site-id') if google_group is None: raise BadRequest( 'No organization-identifier info found in payload data.') google_group = google_group.lower() is_obsolete = ObsoleteStatus( 'OBSOLETE') if not hierarchy_org_obj.active else None resource_id = self._get_reference(hierarchy_org_obj) organization = self.organization_dao.get_by_resource_id(resource_id) if organization is None: raise BadRequest( 'Invalid partOf reference {} importing site {}'.format( resource_id, google_group)) launch_date = None launch_date_str = self._get_value_from_extention( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'anticipatedLaunchDate', 'valueDate') if launch_date_str: try: launch_date = parse(launch_date_str).date() except ValueError: raise BadRequest('Invalid launch date {} for site {}'.format( launch_date_str, google_group)) name = hierarchy_org_obj.name mayolink_client_number = None mayolink_client_number_str = self._get_value_from_identifier( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'mayo-link-identifier') if mayolink_client_number_str: try: mayolink_client_number = int(mayolink_client_number_str) except ValueError: raise BadRequest( 'Invalid Mayolink Client # {} for site {}'.format( mayolink_client_number_str, google_group)) notes = self._get_value_from_extention( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'notes') site_status_bool = self._get_value_from_extention( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'schedulingStatusActive', 'valueBoolean') try: site_status = SiteStatus( 'ACTIVE' if site_status_bool else 'INACTIVE') except TypeError: raise BadRequest('Invalid site status {} for site {}'.format( site_status, google_group)) enrolling_status_bool = self._get_value_from_extention( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'enrollmentStatusActive', 'valueBoolean') try: enrolling_status = EnrollingStatus( 'ACTIVE' if enrolling_status_bool else 'INACTIVE') except TypeError: raise BadRequest( 'Invalid enrollment site status {} for site {}'.format( enrolling_status_bool, google_group)) digital_scheduling_bool = self._get_value_from_extention( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'digitalSchedulingStatusActive', 'valueBoolean') try: digital_scheduling_status = DigitalSchedulingStatus( 'ACTIVE' if digital_scheduling_bool else 'INACTIVE') except TypeError: raise BadRequest( 'Invalid digital scheduling status {} for site {}'.format( digital_scheduling_bool, google_group)) directions = self._get_value_from_extention( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'directions') physical_location_name = self._get_value_from_extention( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'locationName') address_1, address_2, city, state, zip_code = self._get_address( hierarchy_org_obj) phone = self._get_contact_point(hierarchy_org_obj, 'phone') admin_email_addresses = self._get_contact_point( hierarchy_org_obj, 'email') link = self._get_contact_point(hierarchy_org_obj, 'url') schedule_instructions = self._get_value_from_extention( hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'schedulingInstructions') entity = Site(siteName=name, googleGroup=google_group, mayolinkClientNumber=mayolink_client_number, organizationId=organization.organizationId, hpoId=organization.hpoId, siteStatus=site_status, enrollingStatus=enrolling_status, digitalSchedulingStatus=digital_scheduling_status, scheduleInstructions=schedule_instructions, scheduleInstructions_ES='', launchDate=launch_date, notes=notes, notes_ES='', directions=directions, physicalLocationName=physical_location_name, address1=address_1, address2=address_2, city=city, state=state, zipCode=zip_code, phoneNumber=phone, adminEmails=admin_email_addresses, link=link, isObsolete=is_obsolete, resourceId=hierarchy_org_obj.id) existing_map = { entity.googleGroup: entity for entity in self.site_dao.get_all() } existing_entity = existing_map.get(entity.googleGroup) with self.site_dao.session() as session: if existing_entity: self._populate_lat_lng_and_time_zone(entity, existing_entity) if entity.siteStatus == SiteStatus.ACTIVE and \ (entity.latitude is None or entity.longitude is None): raise BadRequest( 'Active site without geocoding: {}'.format( entity.googleGroup)) new_dict = entity.asdict() new_dict['siteId'] = None existing_dict = existing_entity.asdict() existing_dict['siteId'] = None if existing_dict == new_dict: logging.info('Not updating {}.'.format( new_dict['googleGroup'])) else: for k, v in entity.asdict().iteritems(): if k != 'siteId' and k != 'googleGroup': setattr(existing_entity, k, v) self.site_dao.update_with_session(session, existing_entity) else: self._populate_lat_lng_and_time_zone(entity, None) if entity.siteStatus == SiteStatus.ACTIVE and \ (entity.latitude is None or entity.longitude is None): raise BadRequest( 'Active site without geocoding: {}'.format( entity.googleGroup)) self.site_dao.insert_with_session(session, entity) site_id = self.site_dao.get_by_google_group(google_group).siteId bq_site_update_by_id(site_id) def _get_type(self, hierarchy_org_obj): obj_type = None type_arr = hierarchy_org_obj.type for type_item in type_arr: code_arr = type_item.coding for code_item in code_arr: if code_item.system == HIERARCHY_CONTENT_SYSTEM_PREFIX + 'type': obj_type = code_item.code break return obj_type def _get_value_from_identifier(self, hierarchy_org_obj, system): identifier_arr = hierarchy_org_obj.identifier for identifier in identifier_arr: if identifier.system == system: return identifier.value else: return None def _get_value_from_extention(self, hierarchy_org_obj, url, value_key='valueString'): extension_arr = hierarchy_org_obj.extension for extension in extension_arr: if extension.url == url: ext_json = extension.as_json() return ext_json[value_key] else: return None def _get_contact_point(self, hierarchy_org_obj, code): contact_arr = hierarchy_org_obj.contact for contact in contact_arr: telecom_arr = contact.telecom for telecom in telecom_arr: if telecom.system == code: return telecom.value else: return None def _get_address(self, hierarchy_org_obj): address = hierarchy_org_obj.address[0] address_1 = address.line[0] if len(address.line) > 0 else '' address_2 = address.line[1] if len(address.line) > 1 else '' city = address.city state = address.state postal_code = address.postalCode return address_1, address_2, city, state, postal_code def _get_reference(self, hierarchy_org_obj): try: return hierarchy_org_obj.partOf.reference.split('/')[1] except IndexError: return None def _populate_lat_lng_and_time_zone(self, site, existing_site): if site.address1 and site.city and site.state: if existing_site: if (existing_site.address1 == site.address1 and existing_site.city == site.city and existing_site.state == site.state and existing_site.latitude is not None and existing_site.longitude is not None and existing_site.timeZoneId is not None): # Address didn't change, use the existing lat/lng and time zone. site.latitude = existing_site.latitude site.longitude = existing_site.longitude site.timeZoneId = existing_site.timeZoneId return latitude, longitude = self._get_lat_long_for_site( site.address1, site.city, site.state) site.latitude = latitude site.longitude = longitude if latitude and longitude: site.timeZoneId = self._get_time_zone(latitude, longitude) else: if site.googleGroup not in self.status_exception_list: if site.siteStatus == self.ACTIVE: logging.warn( 'Active site must have valid address. Site: {}, Group: {}' .format(site.siteName, site.googleGroup)) def _get_lat_long_for_site(self, address_1, city, state): self.full_address = address_1 + ' ' + city + ' ' + state try: self.api_key = os.environ.get('API_KEY') self.gmaps = googlemaps.Client(key=self.api_key) try: geocode_result = self.gmaps.geocode(address_1 + '' + city + ' ' + state)[0] except IndexError: logging.warn('Bad address for {}, could not geocode.'.format( self.full_address)) return None, None if geocode_result: geometry = geocode_result.get('geometry') if geometry: location = geometry.get('location') if location: latitude = location.get('lat') longitude = location.get('lng') return latitude, longitude else: logging.warn('Can not find lat/long for %s', self.full_address) return None, None else: logging.warn('Geocode results failed for %s.', self.full_address) return None, None except ValueError as e: logging.exception('Invalid geocode key: %s. ERROR: %s', self.api_key, e) return None, None except IndexError as e: logging.exception( 'Geocoding failure Check that address is correct. ERROR: %s', e) return None, None def _get_time_zone(self, latitude, longitude): time_zone = self.gmaps.timezone(location=(latitude, longitude)) if time_zone['status'] == 'OK': time_zone_id = time_zone['timeZoneId'] return time_zone_id else: logging.info('can not retrieve time zone from %s', self.full_address) return None
def main(args): HPOImporter().run(args.awardee_file, args.dry_run) HPODao()._invalidate_cache() OrganizationImporter().run(args.organization_file, args.dry_run) OrganizationDao()._invalidate_cache() SiteImporter().run(args.site_file, args.dry_run)
class UpdateEhrStatusUpdatesTestCase(SqlTestBase): def setUp(self, **kwargs): super(UpdateEhrStatusUpdatesTestCase, self).setUp(use_mysql=True, **kwargs) self.hpo_dao = HPODao() self.org_dao = OrganizationDao() self.participant_dao = ParticipantDao() self.summary_dao = ParticipantSummaryDao() self.ehr_receipt_dao = EhrReceiptDao() self.hpo_foo = self._make_hpo(int_id=10, string_id='hpo_foo') self.hpo_bar = self._make_hpo(int_id=11, string_id='hpo_bar') self.org_foo_a = self._make_org(hpo=self.hpo_foo, int_id=10, external_id='FOO_A') self.org_foo_b = self._make_org(hpo=self.hpo_foo, int_id=11, external_id='FOO_B') self.org_bar_a = self._make_org(hpo=self.hpo_bar, int_id=12, external_id='BAR_A') self.participants = [ self._make_participant(hpo=self.hpo_foo, org=self.org_foo_a, int_id=11), self._make_participant(hpo=self.hpo_foo, org=self.org_foo_b, int_id=12), self._make_participant(hpo=self.hpo_bar, org=self.org_bar_a, int_id=13), self._make_participant(hpo=self.hpo_bar, org=self.org_bar_a, int_id=14), ] def _make_hpo(self, int_id, string_id): hpo = HPO(hpoId=int_id, name=string_id) self.hpo_dao.insert(hpo) return hpo def _make_org(self, hpo, int_id, external_id): org = Organization(organizationId=int_id, externalId=external_id, displayName='SOME ORG', hpoId=hpo.hpoId) self.org_dao.insert(org) return org def _make_participant(self, hpo, org, int_id): participant = self._participant_with_defaults(participantId=int_id, biobankId=int_id) participant.hpoId = hpo.hpoId participant.organizationId = org.organizationId self.participant_dao.insert(participant) summary = self.participant_summary(participant) self.summary_dao.insert(summary) return participant, summary # Mock BigQuery result types EhrUpdatePidRow = collections.namedtuple('EhrUpdatePidRow', [ 'person_id', ]) TableCountsRow = collections.namedtuple('TableCountsRow', [ 'org_id', 'person_upload_time', ]) @mock.patch('offline.update_ehr_status.update_organizations_from_job') @mock.patch( 'offline.update_ehr_status.update_participant_summaries_from_job') @mock.patch('offline.update_ehr_status.make_update_organizations_job') @mock.patch( 'offline.update_ehr_status.make_update_participant_summaries_job') def test_skips_when_no_job(self, mock_summary_job, mock_organization_job, mock_update_summaries, mock_update_organizations): mock_summary_job.return_value = None mock_organization_job.return_value = None with FakeClock(datetime.datetime(2019, 1, 1)): offline.update_ehr_status.update_ehr_status() self.assertFalse(mock_update_summaries.called) self.assertFalse(mock_update_organizations.called) @mock.patch('offline.update_ehr_status.make_update_organizations_job') @mock.patch( 'offline.update_ehr_status.make_update_participant_summaries_job') def test_updates_participant_summaries(self, mock_summary_job, mock_organization_job): mock_summary_job.return_value.__iter__.return_value = [[ self.EhrUpdatePidRow(11), ]] mock_organization_job.return_value.__iter__.return_value = [] with FakeClock(datetime.datetime(2019, 1, 1)): offline.update_ehr_status.update_ehr_status() mock_summary_job.return_value.__iter__.return_value = [[ self.EhrUpdatePidRow(11), self.EhrUpdatePidRow(12), ]] mock_organization_job.return_value.__iter__.return_value = [] with FakeClock(datetime.datetime(2019, 1, 2)): offline.update_ehr_status.update_ehr_status() summary = self.summary_dao.get(11) self.assertEqual(summary.ehrStatus, EhrStatus.PRESENT) self.assertEqual(summary.ehrReceiptTime, datetime.datetime(2019, 1, 1)) self.assertEqual(summary.ehrUpdateTime, datetime.datetime(2019, 1, 2)) summary = self.summary_dao.get(12) self.assertEqual(summary.ehrStatus, EhrStatus.PRESENT) self.assertEqual(summary.ehrReceiptTime, datetime.datetime(2019, 1, 2)) self.assertEqual(summary.ehrUpdateTime, datetime.datetime(2019, 1, 2)) @mock.patch('offline.update_ehr_status.make_update_organizations_job') @mock.patch( 'offline.update_ehr_status.make_update_participant_summaries_job') def test_creates_receipts(self, mock_summary_job, mock_organization_job): mock_summary_job.return_value.__iter__.return_value = [] mock_organization_job.return_value.__iter__.return_value = [ [ self.TableCountsRow(org_id='FOO_A', person_upload_time=datetime.datetime( 2019, 1, 1).replace(tzinfo=pytz.UTC)), ], ] with FakeClock(datetime.datetime(2019, 1, 1)): offline.update_ehr_status.update_ehr_status() foo_a_receipts = self.ehr_receipt_dao.get_by_organization_id( self.org_foo_a.organizationId) self.assertEqual(len(foo_a_receipts), 1) self.assertEqual(foo_a_receipts[0].receiptTime, datetime.datetime(2019, 1, 1)) foo_b_receipts = self.ehr_receipt_dao.get_by_organization_id( self.org_foo_b.organizationId) self.assertEqual(len(foo_b_receipts), 0) mock_summary_job.return_value.__iter__.return_value = [] mock_organization_job.return_value.__iter__.return_value = [ [ self.TableCountsRow(org_id='FOO_A', person_upload_time=datetime.datetime( 2019, 1, 1).replace(tzinfo=pytz.UTC)), self.TableCountsRow(org_id='FOO_A', person_upload_time=datetime.datetime( 2019, 1, 2).replace(tzinfo=pytz.UTC)), self.TableCountsRow(org_id='FOO_B', person_upload_time=datetime.datetime( 2019, 1, 2).replace(tzinfo=pytz.UTC)), ], ] with FakeClock(datetime.datetime(2019, 1, 2)): offline.update_ehr_status.update_ehr_status() foo_a_receipts = self.ehr_receipt_dao.get_by_organization_id( self.org_foo_a.organizationId) self.assertEqual(len(foo_a_receipts), 2) self.assertEqual(foo_a_receipts[0].receiptTime, datetime.datetime(2019, 1, 1)) self.assertEqual(foo_a_receipts[1].receiptTime, datetime.datetime(2019, 1, 2)) foo_b_receipts = self.ehr_receipt_dao.get_by_organization_id( self.org_foo_b.organizationId) self.assertEqual(len(foo_b_receipts), 1) self.assertEqual(foo_b_receipts[0].receiptTime, datetime.datetime(2019, 1, 2)) @mock.patch('offline.update_ehr_status.make_update_organizations_job') @mock.patch( 'offline.update_ehr_status.make_update_participant_summaries_job') def test_ignores_bad_data(self, mock_summary_job, mock_organization_job): invalid_participant_id = -1 mock_summary_job.return_value.__iter__.return_value = [[ self.EhrUpdatePidRow(invalid_participant_id), ]] mock_organization_job.return_value.__iter__.return_value = [ [ self.TableCountsRow( org_id='FOO_A', person_upload_time="an invalid date string"), self.TableCountsRow(org_id='AN_ORG_THAT_DOESNT_EXIST', person_upload_time=datetime.datetime( 2019, 1, 1).replace(tzinfo=pytz.UTC)), self.TableCountsRow(org_id='AN_ORG_THAT_DOESNT_EXIST', person_upload_time=None), ], ] with FakeClock(datetime.datetime(2019, 1, 1)): offline.update_ehr_status.update_ehr_status() foo_a_receipts = self.ehr_receipt_dao.get_all() self.assertEqual(len(foo_a_receipts), 0)