class SiteDaoTest(SqlTestBase):
    def setUp(self):
        super(SiteDaoTest, self).setUp()
        self.site_dao = SiteDao()

    def test_get_no_sites(self):
        self.assertIsNone(self.site_dao.get(9999))
        self.assertIsNone(
            self.site_dao.get_by_google_group('*****@*****.**'))

    def test_insert(self):
        site = Site(siteName='site',
                    googleGroup='*****@*****.**',
                    consortiumName='consortium',
                    mayolinkClientNumber=12345,
                    hpoId=PITT_HPO_ID)
        created_site = self.site_dao.insert(site)
        new_site = self.site_dao.get(created_site.siteId)
        site.siteId = created_site.siteId
        self.assertEquals(site.asdict(), new_site.asdict())
        self.assertEquals(
            site.asdict(),
            self.site_dao.get_by_google_group(
                '*****@*****.**').asdict())

    def test_update(self):
        site = Site(siteName='site',
                    googleGroup='*****@*****.**',
                    consortiumName='consortium',
                    mayolinkClientNumber=12345,
                    hpoId=PITT_HPO_ID)
        created_site = self.site_dao.insert(site)
        new_site = Site(siteId=created_site.siteId,
                        siteName='site2',
                        googleGroup='*****@*****.**',
                        consortiumName='consortium2',
                        mayolinkClientNumber=123456,
                        hpoId=UNSET_HPO_ID)
        self.site_dao.update(new_site)
        fetched_site = self.site_dao.get(created_site.siteId)
        self.assertEquals(new_site.asdict(), fetched_site.asdict())
        self.assertEquals(
            new_site.asdict(),
            self.site_dao.get_by_google_group(
                '*****@*****.**').asdict())
        self.assertIsNone(
            self.site_dao.get_by_google_group('*****@*****.**'))
Example #2
0
def get_site_id_by_site_value(obj):
    if 'site' in obj:
        from dao.site_dao import SiteDao
        site_dao = SiteDao()
        site = site_dao.get_by_google_group(obj['site']['value'])
        if site is not None:
            return site.siteId
    return None
class SiteImporter(CsvImporter):
    def __init__(self):
        args = parser.parse_args()
        self.organization_dao = OrganizationDao()
        self.stub_geocoding = args.stub_geocoding
        self.ACTIVE = SiteStatus.ACTIVE
        self.status_exception_list = ['hpo-site-walgreensphoenix']
        self.instance = args.instance
        self.creds_file = args.creds_file
        self.new_sites_list = []
        self.project = None
        if args.project:
            self.project = args.project

        if self.project in ENV_LIST:
            self.environment = ' ' + self.project.split('-')[-1].upper()
        else:
            self.environment = ' ' + ENV_TEST.split('-')[-1].upper()

        super(SiteImporter,
              self).__init__('site', SiteDao(), 'siteId', 'googleGroup', [
                  SITE_ORGANIZATION_ID_COLUMN, SITE_SITE_ID_COLUMN,
                  SITE_SITE_COLUMN, SITE_STATUS_COLUMN + self.environment,
                  ENROLLING_STATUS_COLUMN + self.environment,
                  DIGITAL_SCHEDULING_STATUS_COLUMN + self.environment
              ])

    def run(self, filename, dry_run):
        super(SiteImporter, self).run(filename, dry_run)
        insert_participants = False
        if not dry_run:
            if self.environment:
                current_env = ENV_STABLE
                if self.environment.strip() == 'STABLE' and len(
                        self.new_sites_list) > 0:
                    from googleapiclient.discovery import build
                    logging.info(
                        'Starting reboot of app instances to insert new test participants'
                    )
                    service = build('appengine', 'v1', cache_discovery=False)
                    request = service.apps().services().versions().list(
                        appsId=current_env, servicesId='default')
                    versions = request.execute()

                    for version in versions['versions']:
                        if version['servingStatus'] == 'SERVING':
                            _id = version['id']
                            request = service.apps().services().versions(
                            ).instances().list(servicesId='default',
                                               versionsId=_id,
                                               appsId=current_env)
                            instances = request.execute()

                            try:
                                for instance in instances['instances']:
                                    sha = instance['name'].split('/')[-1]
                                    delete_instance = service.apps().services(
                                    ).versions().instances().delete(
                                        appsId=current_env,
                                        servicesId='default',
                                        versionsId=_id,
                                        instancesId=sha)

                                    response = delete_instance.execute()
                                    if response['done']:
                                        insert_participants = True
                                        logging.info(
                                            'Reboot of instance: %s in stable complete.',
                                            instance['name'])
                                    else:
                                        logging.warn(
                                            'Not able to reboot instance on server, Error: %s',
                                            response)

                            except KeyError:
                                logging.warn('No running instance for %s',
                                             version['name'])

                    if insert_participants:
                        logging.info('Starting import of test participants.')
                        self._insert_new_participants(self.new_sites_list)

    def delete_sql_statement(self, session, str_list):
        sql = """
          DELETE FROM site
          WHERE site_id IN ({str_list})
          AND NOT EXISTS(
          SELECT * FROM participant WHERE site_id = site.site_id)
          AND NOT EXISTS(
          SELECT * FROM participant_summary WHERE site_id = site.site_id
          OR physical_measurements_finalized_site_id = site.site_id
          OR physical_measurements_created_site_id = site.site_id
          OR biospecimen_source_site_id = site.site_id
          OR biospecimen_collected_site_id = site.site_id
          OR biospecimen_processed_site_id = site.site_id
          OR biospecimen_finalized_site_id = site.site_id
          )
          AND NOT EXISTS(
          SELECT * FROM participant_history WHERE site_id = site.site_id)
          AND NOT EXISTS(
          SELECT * FROM physical_measurements WHERE created_site_id = site.site_id
          OR finalized_site_id = site.site_id)
          AND NOT EXISTS(
          SELECT * FROM biobank_order WHERE finalized_site_id = site.site_id
          OR source_site_id = site.site_id
          OR collected_site_id = site.site_id
          OR processed_site_id = site.site_id
          )
          """.format(str_list=str_list)

        session.execute(sql)

    def _cleanup_old_entities(self, session, row_list, dry_run):
        log_prefix = '(dry run) ' if dry_run else ''
        self.site_dao = SiteDao()
        existing_sites = set(site.googleGroup
                             for site in self.site_dao.get_all())
        site_group_list_from_sheet = [
            str(row[SITE_SITE_ID_COLUMN].lower()) for row in row_list
        ]

        sites_to_remove = existing_sites - set(site_group_list_from_sheet)
        if sites_to_remove:
            site_id_list = []
            for site in sites_to_remove:
                logging.info(
                    log_prefix +
                    'Deleting old Site no longer in Google sheet: %s', site)
                old_site = self.site_dao.get_by_google_group(site)
            if old_site and old_site.isObsolete != ObsoleteStatus.OBSOLETE:
                site_id_list.append(old_site.siteId)
                self.deletion_count += 1
            elif old_site and old_site.isObsolete == ObsoleteStatus.OBSOLETE:
                logging.info(
                    'Not attempting to delete site [%s] with existing obsolete status',
                    old_site.googleGroup)

            if site_id_list and not dry_run:
                str_list = ','.join([str(i) for i in site_id_list])
                logging.info(log_prefix + 'Marking old site as obsolete : %s',
                             old_site)
                sql = """ UPDATE site
            SET is_obsolete = 1
            WHERE site_id in ({site_id_list})""".format(site_id_list=str_list)

                session.execute(sql)

                self.site_dao._invalidate_cache()
                # Try to delete old sites.
                self.delete_sql_statement(session, str_list)

    def _insert_new_participants(self, entity):
        num_participants = 0
        participants = {
            'zip_code': '20001',
            'date_of_birth': '1933-3-3',
            'gender_identity': 'GenderIdentity_Woman',
            'withdrawalStatus': 'NOT_WITHDRAWN',
            'suspensionStatus': 'NOT_SUSPENDED'
        }

        client = Client('rdr/v1', False, self.creds_file, self.instance)
        client_log.setLevel(logging.WARN)
        questionnaire_to_questions, consent_questionnaire_id_and_version = _setup_questionnaires(
            client)
        consent_questions = questionnaire_to_questions[
            consent_questionnaire_id_and_version]
        for site in entity:
            for participant, v in enumerate(range(1, 21), 1):
                num_participants += 1
                participant = participants
                participant.update(
                    {'last_name': site.googleGroup.split('-')[-1]})
                participant.update({'first_name': 'Participant {}'.format(v)})
                participant.update({'site': site.googleGroup})

                import_participant(participant, client,
                                   consent_questionnaire_id_and_version,
                                   questionnaire_to_questions,
                                   consent_questions, num_participants)

        logging.info('%d participants imported.' % num_participants)

    def _entity_from_row(self, row):
        google_group = row[SITE_SITE_ID_COLUMN].lower()
        organization = self.organization_dao.get_by_external_id(
            row[SITE_ORGANIZATION_ID_COLUMN].upper())
        if organization is None:
            logging.warn('Invalid organization ID %s importing site %s',
                         row[SITE_ORGANIZATION_ID_COLUMN].upper(),
                         google_group)
            self.errors.append(
                'Invalid organization ID {} importing site {}'.format(
                    row[SITE_ORGANIZATION_ID_COLUMN].upper(), google_group))
            return None

        launch_date = None
        launch_date_str = row.get(SITE_LAUNCH_DATE_COLUMN)
        if launch_date_str:
            try:
                launch_date = parse(launch_date_str).date()
            except ValueError:
                logging.warn('Invalid launch date %s for site %s',
                             launch_date_str, google_group)
                self.errors.append('Invalid launch date {} for site {}'.format(
                    launch_date_str, google_group))
                return None
        name = row[SITE_SITE_COLUMN]
        mayolink_client_number = None
        mayolink_client_number_str = row.get(
            SITE_MAYOLINK_CLIENT_NUMBER_COLUMN)
        if mayolink_client_number_str:
            try:
                mayolink_client_number = int(mayolink_client_number_str)
            except ValueError:
                logging.warn('Invalid Mayolink Client # %s for site %s',
                             mayolink_client_number_str, google_group)
                self.errors.append(
                    'Invalid Mayolink Client # {} for site {}'.format(
                        mayolink_client_number_str, google_group))
                return None
        notes = row.get(SITE_NOTES_COLUMN)
        notes_es = row.get(SITE_NOTES_COLUMN_ES)
        try:
            site_status = SiteStatus(row[SITE_STATUS_COLUMN +
                                         self.environment].upper())
        except TypeError:
            logging.warn('Invalid site status %s for site %s',
                         row[SITE_STATUS_COLUMN + self.environment],
                         google_group)
            self.errors.append('Invalid site status {} for site {}'.format(
                row[SITE_STATUS_COLUMN + self.environment], google_group))
            return None
        try:
            enrolling_status = EnrollingStatus(row[ENROLLING_STATUS_COLUMN +
                                                   self.environment].upper())
        except TypeError:
            logging.warn('Invalid enrollment site status %s for site %s',
                         row[ENROLLING_STATUS_COLUMN + self.environment],
                         google_group)
            self.errors.append(
                'Invalid enrollment site status {} for site {}'.format(
                    row[ENROLLING_STATUS_COLUMN + self.environment],
                    google_group))

        directions = row.get(SITE_DIRECTIONS_COLUMN)
        physical_location_name = row.get(SITE_PHYSICAL_LOCATION_NAME_COLUMN)
        address_1 = row.get(SITE_ADDRESS_1_COLUMN)
        address_2 = row.get(SITE_ADDRESS_2_COLUMN)
        city = row.get(SITE_CITY_COLUMN)
        state = row.get(SITE_STATE_COLUMN)
        zip_code = row.get(SITE_ZIP_COLUMN)
        phone = row.get(SITE_PHONE_COLUMN)
        admin_email_addresses = row.get(SITE_ADMIN_EMAIL_ADDRESSES_COLUMN)
        link = row.get(SITE_LINK_COLUMN)
        digital_scheduling_status = DigitalSchedulingStatus(
            row[DIGITAL_SCHEDULING_STATUS_COLUMN + self.environment].upper())
        schedule_instructions = row.get(SCHEDULING_INSTRUCTIONS)
        schedule_instructions_es = row.get(SCHEDULING_INSTRUCTIONS_ES)
        return Site(siteName=name,
                    googleGroup=google_group,
                    mayolinkClientNumber=mayolink_client_number,
                    organizationId=organization.organizationId,
                    hpoId=organization.hpoId,
                    siteStatus=site_status,
                    enrollingStatus=enrolling_status,
                    digitalSchedulingStatus=digital_scheduling_status,
                    scheduleInstructions=schedule_instructions,
                    scheduleInstructions_ES=schedule_instructions_es,
                    launchDate=launch_date,
                    notes=notes,
                    notes_ES=notes_es,
                    directions=directions,
                    physicalLocationName=physical_location_name,
                    address1=address_1,
                    address2=address_2,
                    city=city,
                    state=state,
                    zipCode=zip_code,
                    phoneNumber=phone,
                    adminEmails=admin_email_addresses,
                    link=link)

    def _update_entity(self, entity, existing_entity, session, dry_run):
        self._populate_lat_lng_and_time_zone(entity, existing_entity)
        if entity.siteStatus == self.ACTIVE and (entity.latitude == None
                                                 or entity.longitude == None):
            self.errors.append(
                'Skipped active site without geocoding: {}'.format(
                    entity.googleGroup))
            return None, True
        return super(SiteImporter,
                     self)._update_entity(entity, existing_entity, session,
                                          dry_run)

    def _insert_entity(self, entity, existing_map, session, dry_run):
        self._populate_lat_lng_and_time_zone(entity, None)
        if entity.siteStatus == self.ACTIVE and (entity.latitude == None
                                                 or entity.longitude == None):
            self.errors.append(
                'Skipped active site without geocoding: {}'.format(
                    entity.googleGroup))
            return False
        self.new_sites_list.append(entity)
        super(SiteImporter, self)._insert_entity(entity, existing_map, session,
                                                 dry_run)

    def _populate_lat_lng_and_time_zone(self, site, existing_site):
        if site.address1 and site.city and site.state:
            if existing_site:
                if (existing_site.address1 == site.address1
                        and existing_site.city == site.city
                        and existing_site.state == site.state
                        and existing_site.latitude is not None
                        and existing_site.longitude is not None
                        and existing_site.timeZoneId is not None):
                    # Address didn't change, use the existing lat/lng and time zone.
                    site.latitude = existing_site.latitude
                    site.longitude = existing_site.longitude
                    site.timeZoneId = existing_site.timeZoneId
                    return
            if self.stub_geocoding:
                # Set dummy latitude and longitude when importing sites locally / on a CircleCI box.
                site.latitude = 32.176
                site.longitude = -110.93
                site.timeZoneId = 'America/Phoenix'
            else:
                latitude, longitude = self._get_lat_long_for_site(
                    site.address1, site.city, site.state)
                site.latitude = latitude
                site.longitude = longitude
                if latitude and longitude:
                    site.timeZoneId = self._get_time_zone(latitude, longitude)
        else:
            if site.googleGroup not in self.status_exception_list:
                if site.siteStatus == self.ACTIVE:
                    self.errors.append(
                        'Active site must have valid address. Site: {}, Group: {}'
                        .format(site.siteName, site.googleGroup))

    def _get_lat_long_for_site(self, address_1, city, state):
        self.full_address = address_1 + ' ' + city + ' ' + state
        try:
            self.api_key = os.environ.get('API_KEY')
            self.gmaps = googlemaps.Client(key=self.api_key)
            try:
                geocode_result = self.gmaps.geocode(address_1 + '' + city +
                                                    ' ' + state)[0]
            except IndexError:
                self.errors.append(
                    'Bad address for {}, could not geocode.'.format(
                        self.full_address))
                return None, None
            if geocode_result:
                geometry = geocode_result.get('geometry')
                if geometry:
                    location = geometry.get('location')
                if location:
                    latitude = location.get('lat')
                    longitude = location.get('lng')
                    return latitude, longitude
                else:
                    logging.warn('Can not find lat/long for %s',
                                 self.full_address)
                    self.errors.append('Can not find lat/long for {}'.format(
                        self.full_address))
                    return None, None
            else:
                logging.warn('Geocode results failed for %s.',
                             self.full_address)
                self.errors.append('Geocode results failed for {}'.format(
                    self.full_address))
                return None, None
        except ValueError as e:
            logging.exception('Invalid geocode key: %s. ERROR: %s',
                              self.api_key, e)
            self.errors.append('Invalid geocode key: {}. ERROR: {}'.format(
                self.api_key, e))
            return None, None
        except IndexError as e:
            logging.exception(
                'Geocoding failure Check that address is correct. ERROR: %s',
                e)
            self.errors.append(
                'Geocoding failured Check that address is correct. ERROR: {}'.
                format(self.api_key, e))
            return None, None

    def _get_time_zone(self, latitude, longitude):
        time_zone = self.gmaps.timezone(location=(latitude, longitude))
        if time_zone['status'] == 'OK':
            time_zone_id = time_zone['timeZoneId']
            return time_zone_id
        else:
            logging.info('can not retrieve time zone from %s',
                         self.full_address)
            self.errors.append('Can not retrieve time zone from {}'.format(
                self.full_address))
            return None
Example #4
0
class SiteDaoTest(SqlTestBase):
    def setUp(self):
        super(SiteDaoTest, self).setUp()
        self.site_dao = SiteDao()
        self.participant_dao = ParticipantDao()
        self.ps_dao = ParticipantSummaryDao()
        self.ps_history = ParticipantHistoryDao()

    def test_get_no_sites(self):
        self.assertIsNone(self.site_dao.get(9999))
        self.assertIsNone(
            self.site_dao.get_by_google_group('*****@*****.**'))

    def test_insert(self):
        site = Site(siteName='site',
                    googleGroup='*****@*****.**',
                    mayolinkClientNumber=12345,
                    hpoId=PITT_HPO_ID)
        created_site = self.site_dao.insert(site)
        new_site = self.site_dao.get(created_site.siteId)
        site.siteId = created_site.siteId
        self.assertEquals(site.asdict(), new_site.asdict())
        self.assertEquals(
            site.asdict(),
            self.site_dao.get_by_google_group(
                '*****@*****.**').asdict())

    def test_update(self):
        site = Site(siteName='site',
                    googleGroup='*****@*****.**',
                    mayolinkClientNumber=12345,
                    hpoId=PITT_HPO_ID)
        created_site = self.site_dao.insert(site)
        new_site = Site(siteId=created_site.siteId,
                        siteName='site2',
                        googleGroup='*****@*****.**',
                        mayolinkClientNumber=123456,
                        hpoId=UNSET_HPO_ID)
        self.site_dao.update(new_site)
        fetched_site = self.site_dao.get(created_site.siteId)
        self.assertEquals(new_site.asdict(), fetched_site.asdict())
        self.assertEquals(
            new_site.asdict(),
            self.site_dao.get_by_google_group(
                '*****@*****.**').asdict())
        self.assertIsNone(
            self.site_dao.get_by_google_group('*****@*****.**'))

    def test_participant_pairing_updates_on_change(self):
        TIME = datetime.datetime(2018, 1, 1)
        TIME2 = datetime.datetime(2018, 1, 2)
        provider_link = '[{"organization": {"reference": "Organization/AZ_TUCSON"}, "primary": true}]'
        site = Site(siteName='site',
                    googleGroup='*****@*****.**',
                    mayolinkClientNumber=12345,
                    hpoId=PITT_HPO_ID,
                    organizationId=PITT_ORG_ID)
        created_site = self.site_dao.insert(site)

        with FakeClock(TIME):
            p = Participant(participantId=1,
                            biobankId=2,
                            siteId=created_site.siteId)
            self.participant_dao.insert(p)
            fetch_p = self.participant_dao.get(p.participantId)
            updated_p = self.participant_dao.get(fetch_p.participantId)
            p_summary = self.ps_dao.insert(self.participant_summary(updated_p))

        with FakeClock(TIME2):
            update_site_parent = Site(siteId=created_site.siteId,
                                      siteName='site2',
                                      googleGroup='*****@*****.**',
                                      mayolinkClientNumber=123456,
                                      hpoId=AZ_HPO_ID,
                                      organizationId=AZ_ORG_ID)
            self.site_dao.update(update_site_parent)

        updated_p = self.participant_dao.get(fetch_p.participantId)
        ps = self.ps_dao.get(p_summary.participantId)
        ph = self.ps_history.get([updated_p.participantId, 1])

        self.assertEquals(update_site_parent.hpoId, updated_p.hpoId)
        self.assertEquals(update_site_parent.organizationId,
                          updated_p.organizationId)
        self.assertEquals(ps.organizationId, update_site_parent.organizationId)
        self.assertEquals(ps.hpoId, update_site_parent.hpoId)
        self.assertEquals(ps.organizationId, update_site_parent.organizationId)
        self.assertEquals(ph.organizationId, update_site_parent.organizationId)
        self.assertEquals(updated_p.providerLink, provider_link)
        self.assertEquals(ps.lastModified, TIME2)
class ParticipantSummaryDao(UpdatableDao):
    def __init__(self):
        super(ParticipantSummaryDao,
              self).__init__(ParticipantSummary,
                             order_by_ending=_ORDER_BY_ENDING)
        self.hpo_dao = HPODao()
        self.code_dao = CodeDao()
        self.site_dao = SiteDao()
        self.organization_dao = OrganizationDao()

    def get_id(self, obj):
        return obj.participantId

    def _validate_update(self, session, obj, existing_obj):  # pylint: disable=unused-argument
        """Participant summaries don't have a version value; drop it from validation logic."""
        if not existing_obj:
            raise NotFound('%s with id %s does not exist' %
                           (self.model_type.__name__, id))

    def _has_withdrawn_filter(self, query):
        for field_filter in query.field_filters:
            if (field_filter.field_name == 'withdrawalStatus'
                    and field_filter.value == WithdrawalStatus.NO_USE):
                return True
            if field_filter.field_name == 'withdrawalTime' and field_filter.value is not None:
                return True
        return False

    def _get_non_withdrawn_filter_field(self, query):
        """Returns the first field referenced in query filters which isn't in
    WITHDRAWN_PARTICIPANT_FIELDS."""
        for field_filter in query.field_filters:
            if not field_filter.field_name in WITHDRAWN_PARTICIPANT_FIELDS:
                return field_filter.field_name
        return None

    def _initialize_query(self, session, query_def):
        non_withdrawn_field = self._get_non_withdrawn_filter_field(query_def)
        if self._has_withdrawn_filter(query_def):
            if non_withdrawn_field:
                raise BadRequest(
                    "Can't query on %s for withdrawn participants" %
                    non_withdrawn_field)
            # When querying for withdrawn participants, ensure that the only fields being filtered on or
            # ordered by are in WITHDRAWN_PARTICIPANT_FIELDS.
            return super(ParticipantSummaryDao,
                         self)._initialize_query(session, query_def)
        else:
            query = super(ParticipantSummaryDao,
                          self)._initialize_query(session, query_def)
            if non_withdrawn_field:
                # When querying on fields that aren't available for withdrawn participants,
                # ensure that we only return participants
                # who have not withdrawn or withdrew in the past 48 hours.
                withdrawn_visible_start = clock.CLOCK.now(
                ) - WITHDRAWN_PARTICIPANT_VISIBILITY_TIME
                return query.filter(
                    or_(
                        ParticipantSummary.withdrawalStatus !=
                        WithdrawalStatus.NO_USE,
                        ParticipantSummary.withdrawalTime >=
                        withdrawn_visible_start))
            else:
                # When querying on fields that are available for withdrawn participants, return everybody;
                # withdrawn participants will have all but WITHDRAWN_PARTICIPANT_FIELDS cleared out 48
                # hours after withdrawing.
                return query

    def _get_order_by_ending(self, query):
        if self._has_withdrawn_filter(query):
            return _WITHDRAWN_ORDER_BY_ENDING
        return self.order_by_ending

    def _add_order_by(self, query, order_by, field_names, fields):
        if order_by.field_name in _CODE_FILTER_FIELDS:
            return super(ParticipantSummaryDao, self)._add_order_by(
                query, OrderBy(order_by.field_name + 'Id', order_by.ascending),
                field_names, fields)
        return super(ParticipantSummaryDao,
                     self)._add_order_by(query, order_by, field_names, fields)

    def make_query_filter(self, field_name, value):
        """Handle HPO and code values when parsing filter values."""
        if field_name == 'biobankId':
            value = from_client_biobank_id(value, log_exception=True)
        if field_name == 'hpoId' or field_name == 'awardee':
            hpo = self.hpo_dao.get_by_name(value)
            if not hpo:
                raise BadRequest('No HPO found with name %s' % value)
            if field_name == 'awardee':
                field_name = 'hpoId'
            return super(ParticipantSummaryDao,
                         self).make_query_filter(field_name, hpo.hpoId)
        if field_name == 'organization':
            organization = self.organization_dao.get_by_external_id(value)
            if not organization:
                raise BadRequest('No organization found with name %s' % value)
            return super(ParticipantSummaryDao,
                         self).make_query_filter(field_name + 'Id',
                                                 organization.organizationId)
        if field_name in _SITE_FIELDS:
            if value == UNSET:
                return super(ParticipantSummaryDao,
                             self).make_query_filter(field_name + 'Id', None)
            site = self.site_dao.get_by_google_group(value)
            if not site:
                raise BadRequest('No site found with google group %s' % value)
            return super(ParticipantSummaryDao,
                         self).make_query_filter(field_name + 'Id',
                                                 site.siteId)
        if field_name in _CODE_FILTER_FIELDS:
            if value == UNSET:
                return super(ParticipantSummaryDao,
                             self).make_query_filter(field_name + 'Id', None)
            # Note: we do not at present support querying for UNMAPPED code values.
            code = self.code_dao.get_code(PPI_SYSTEM, value)
            if not code:
                raise BadRequest('No code found: %s' % value)
            return super(ParticipantSummaryDao,
                         self).make_query_filter(field_name + 'Id',
                                                 code.codeId)
        return super(ParticipantSummaryDao,
                     self).make_query_filter(field_name, value)

    def update_from_biobank_stored_samples(self, participant_id=None):
        """Rewrites sample-related summary data. Call this after updating BiobankStoredSamples.
    If participant_id is provided, only that participant will have their summary updated."""
        now = clock.CLOCK.now()
        sample_sql, sample_params = _get_sample_sql_and_params(now)

        baseline_tests_sql, baseline_tests_params = _get_baseline_sql_and_params(
        )
        dna_tests_sql, dna_tests_params = _get_dna_isolates_sql_and_params()

        sample_status_time_sql = _get_sample_status_time_sql_and_params()
        sample_status_time_params = {}

        counts_sql = """
    UPDATE
      participant_summary
    SET
      num_baseline_samples_arrived = {baseline_tests_sql},
      samples_to_isolate_dna = {dna_tests_sql},
      last_modified = :now
    WHERE
      num_baseline_samples_arrived != {baseline_tests_sql} OR
      samples_to_isolate_dna != {dna_tests_sql}
    """.format(baseline_tests_sql=baseline_tests_sql,
               dna_tests_sql=dna_tests_sql)
        counts_params = {'now': now}
        counts_params.update(baseline_tests_params)
        counts_params.update(dna_tests_params)

        enrollment_status_sql = _ENROLLMENT_STATUS_SQL
        enrollment_status_params = {
            'submitted': int(QuestionnaireStatus.SUBMITTED),
            'unset': int(QuestionnaireStatus.UNSET),
            'num_baseline_ppi_modules': self._get_num_baseline_ppi_modules(),
            'completed': int(PhysicalMeasurementsStatus.COMPLETED),
            'received': int(SampleStatus.RECEIVED),
            'full_participant': int(EnrollmentStatus.FULL_PARTICIPANT),
            'member': int(EnrollmentStatus.MEMBER),
            'interested': int(EnrollmentStatus.INTERESTED),
            'now': now
        }

        # If participant_id is provided, add the participant ID filter to all update statements.
        if participant_id:
            sample_sql += ' AND participant_id = :participant_id'
            sample_params['participant_id'] = participant_id
            counts_sql += ' AND participant_id = :participant_id'
            counts_params['participant_id'] = participant_id
            enrollment_status_sql += ' AND participant_id = :participant_id'
            enrollment_status_params['participant_id'] = participant_id
            sample_status_time_sql += ' AND a.participant_id = :participant_id'
            sample_status_time_params['participant_id'] = participant_id

        sample_sql = replace_null_safe_equals(sample_sql)
        counts_sql = replace_null_safe_equals(counts_sql)

        with self.session() as session:
            session.execute(sample_sql, sample_params)
            session.execute(counts_sql, counts_params)
            session.execute(enrollment_status_sql, enrollment_status_params)
            # TODO: Change this to the optimized sql in _update_dv_stored_samples()
            session.execute(sample_status_time_sql, sample_status_time_params)

    def _get_num_baseline_ppi_modules(self):
        return len(
            config.getSettingList(config.BASELINE_PPI_QUESTIONNAIRE_FIELDS))

    def update_enrollment_status(self, summary):
        """Updates the enrollment status field on the provided participant summary to
    the correct value based on the other fields on it. Called after
    a questionnaire response or physical measurements are submitted."""
        consent = (summary.consentForStudyEnrollment == QuestionnaireStatus.SUBMITTED and
                   summary.consentForElectronicHealthRecords == QuestionnaireStatus.SUBMITTED) or \
                  (summary.consentForStudyEnrollment == QuestionnaireStatus.SUBMITTED and
                   summary.consentForElectronicHealthRecords is None and
                   summary.consentForDvElectronicHealthRecordsSharing == QuestionnaireStatus.SUBMITTED)

        enrollment_status = self.calculate_enrollment_status(
            consent, summary.numCompletedBaselinePPIModules,
            summary.physicalMeasurementsStatus, summary.samplesToIsolateDNA)
        summary.enrollmentStatusMemberTime = self.calculate_member_time(
            consent, summary)
        summary.enrollmentStatusCoreOrderedSampleTime = self.calculate_core_ordered_sample_time(
            consent, summary)
        summary.enrollmentStatusCoreStoredSampleTime = self.calculate_core_stored_sample_time(
            consent, summary)

        # Update last modified date if status changes
        if summary.enrollmentStatus != enrollment_status:
            summary.lastModified = clock.CLOCK.now()

        summary.enrollmentStatus = enrollment_status

    def calculate_enrollment_status(self, consent,
                                    num_completed_baseline_ppi_modules,
                                    physical_measurements_status,
                                    samples_to_isolate_dna):
        if consent:
            if (num_completed_baseline_ppi_modules
                    == self._get_num_baseline_ppi_modules()
                    and physical_measurements_status
                    == PhysicalMeasurementsStatus.COMPLETED
                    and samples_to_isolate_dna == SampleStatus.RECEIVED):
                return EnrollmentStatus.FULL_PARTICIPANT
            return EnrollmentStatus.MEMBER
        return EnrollmentStatus.INTERESTED

    def calculate_member_time(self, consent, participant_summary):
        if consent and participant_summary.enrollmentStatusMemberTime is not None:
            return participant_summary.enrollmentStatusMemberTime
        elif consent:
            if participant_summary.consentForElectronicHealthRecords is None and \
              participant_summary.consentForDvElectronicHealthRecordsSharing == \
              QuestionnaireStatus.SUBMITTED:
                return participant_summary.consentForDvElectronicHealthRecordsSharingTime
            return participant_summary.consentForElectronicHealthRecordsTime
        else:
            return None

    def calculate_core_stored_sample_time(self, consent, participant_summary):
        if consent and \
          participant_summary.numCompletedBaselinePPIModules == \
          self._get_num_baseline_ppi_modules() and \
          participant_summary.physicalMeasurementsStatus == PhysicalMeasurementsStatus.COMPLETED and \
          participant_summary.samplesToIsolateDNA == SampleStatus.RECEIVED:

            max_core_sample_time = self.calculate_max_core_sample_time(
                participant_summary, field_name_prefix='sampleStatus')

            if max_core_sample_time and participant_summary.enrollmentStatusCoreStoredSampleTime:
                return participant_summary.enrollmentStatusCoreStoredSampleTime
            else:
                return max_core_sample_time
        else:
            return None

    def calculate_core_ordered_sample_time(self, consent, participant_summary):
        if consent and \
          participant_summary.numCompletedBaselinePPIModules == \
          self._get_num_baseline_ppi_modules() and \
          participant_summary.physicalMeasurementsStatus == PhysicalMeasurementsStatus.COMPLETED:

            max_core_sample_time = self.calculate_max_core_sample_time(
                participant_summary, field_name_prefix='sampleOrderStatus')

            if max_core_sample_time and participant_summary.enrollmentStatusCoreOrderedSampleTime:
                return participant_summary.enrollmentStatusCoreOrderedSampleTime
            else:
                return max_core_sample_time
        else:
            return None

    def calculate_max_core_sample_time(self,
                                       participant_summary,
                                       field_name_prefix='sampleStatus'):

        keys = [
            field_name_prefix + '%sTime' % test
            for test in config.getSettingList(config.DNA_SAMPLE_TEST_CODES)
        ]
        sample_time_list = \
          [v for k, v in participant_summary if k in keys and v is not None]

        sample_time = min(sample_time_list) if sample_time_list else None

        if sample_time is not None:
            return max([
                sample_time, participant_summary.enrollmentStatusMemberTime,
                participant_summary.questionnaireOnTheBasicsTime,
                participant_summary.questionnaireOnLifestyleTime,
                participant_summary.questionnaireOnOverallHealthTime,
                participant_summary.physicalMeasurementsFinalizedTime
            ])
        else:
            return None

    def calculate_distinct_visits(self,
                                  pid,
                                  finalized_time,
                                  id_,
                                  amendment=False):
        """ Participants may get PM or biobank samples on same day. This should be considered as
    a single visit in terms of program payment to participant.
    return Boolean: true if there has not been an order on same date."""
        from dao.biobank_order_dao import BiobankOrderDao
        from dao.physical_measurements_dao import PhysicalMeasurementsDao

        day_has_order, day_has_measurement = False, False
        existing_orders = BiobankOrderDao().get_biobank_orders_for_participant(
            pid)
        ordered_samples = BiobankOrderDao(
        ).get_ordered_samples_for_participant(pid)
        existing_measurements = PhysicalMeasurementsDao(
        ).get_measuremnets_for_participant(pid)

        order_id_to_finalized_date = {
            sample.biobankOrderId: sample.finalized.date()
            for sample in ordered_samples if sample.finalized
        }

        if existing_orders and finalized_time:
            for order in existing_orders:
                order_finalized_date = order_id_to_finalized_date.get(
                    order.biobankOrderId)
                if order_finalized_date == finalized_time.date() and order.biobankOrderId != id_ and \
                  order.orderStatus != BiobankOrderStatus.CANCELLED:
                    day_has_order = True
                elif order.biobankOrderId == id_ and order.orderStatus == BiobankOrderStatus.AMENDED:
                    day_has_order = True
        elif not finalized_time and amendment:
            day_has_order = True

        if existing_measurements and finalized_time:
            for measurement in existing_measurements:
                if not measurement.finalized:
                    continue
                if measurement.finalized.date() == finalized_time.date() and measurement.physicalMeasurementsId\
                  != id_:
                    day_has_measurement = True

        is_distinct_visit = not (day_has_order or day_has_measurement)
        return is_distinct_visit

    def to_client_json(self, model):
        result = model.asdict()
        # Participants that withdrew more than 48 hours ago should have fields other than
        # WITHDRAWN_PARTICIPANT_FIELDS cleared.
        if (model.withdrawalStatus == WithdrawalStatus.NO_USE and
            (model.withdrawalTime is None or model.withdrawalTime <
             clock.CLOCK.now() - WITHDRAWN_PARTICIPANT_VISIBILITY_TIME)):
            result = {k: result.get(k) for k in WITHDRAWN_PARTICIPANT_FIELDS}

        elif model.withdrawalStatus != WithdrawalStatus.NO_USE and \
          model.suspensionStatus == SuspensionStatus.NO_CONTACT:
            for i in SUSPENDED_PARTICIPANT_FIELDS:
                result[i] = UNSET

        result['participantId'] = to_client_participant_id(model.participantId)
        biobank_id = result.get('biobankId')
        if biobank_id:
            result['biobankId'] = to_client_biobank_id(biobank_id)
        date_of_birth = result.get('dateOfBirth')
        if date_of_birth:
            result['ageRange'] = get_bucketed_age(date_of_birth,
                                                  clock.CLOCK.now())
        else:
            result['ageRange'] = UNSET

        if result.get('primaryLanguage') is None:
            result['primaryLanguage'] = UNSET

        if 'organizationId' in result:
            result['organization'] = result['organizationId']
            del result['organizationId']
            format_json_org(result, self.organization_dao, 'organization')

        format_json_hpo(result, self.hpo_dao, 'hpoId')
        result['awardee'] = result['hpoId']
        _initialize_field_type_sets()
        for fieldname in _DATE_FIELDS:
            format_json_date(result, fieldname)
        for fieldname in _CODE_FIELDS:
            format_json_code(result, self.code_dao, fieldname)
        for fieldname in _ENUM_FIELDS:
            format_json_enum(result, fieldname)
        for fieldname in _SITE_FIELDS:
            format_json_site(result, self.site_dao, fieldname)
        if (model.withdrawalStatus == WithdrawalStatus.NO_USE
                or model.suspensionStatus == SuspensionStatus.NO_CONTACT):
            result['recontactMethod'] = 'NO_CONTACT'
        # Strip None values.
        result = {k: v for k, v in result.iteritems() if v is not None}

        return result

    def _decode_token(self, query_def, fields):
        """ If token exists in participant_summary api, decode and use lastModified to add a buffer
    of 60 seconds. This ensures when a _sync link is used no one is missed. This will return
    at a minimum, the last participant and any more that have been modified in the previous 60
    seconds. Duplicate participants returned should be handled on the client side."""
        decoded_vals = super(ParticipantSummaryDao,
                             self)._decode_token(query_def, fields)
        if query_def.order_by and (query_def.order_by.field_name
                                   == 'lastModified'
                                   and query_def.always_return_token is True
                                   and query_def.backfill_sync is True):
            decoded_vals[0] = decoded_vals[0] - datetime.timedelta(
                seconds=config.LAST_MODIFIED_BUFFER_SECONDS)

        return decoded_vals

    @staticmethod
    def update_ehr_status(summary, update_time):
        summary.ehrStatus = EhrStatus.PRESENT
        if not summary.ehrReceiptTime:
            summary.ehrReceiptTime = update_time
        summary.ehrUpdateTime = update_time
        return summary
class OrganizationHierarchySyncDao(BaseDao):
    def __init__(self):
        super(OrganizationHierarchySyncDao, self).__init__(HPO)
        self.hpo_dao = HPODao()
        self.organization_dao = OrganizationDao()
        self.site_dao = SiteDao()

    def from_client_json(self,
                         resource_json,
                         id_=None,
                         expected_version=None,
                         client_id=None):  # pylint: disable=unused-argument
        try:
            fhir_org = lib_fhir.fhirclient_3_0_0.models.organization.Organization(
                resource_json)
        except FHIRValidationError:
            raise BadRequest('Invalid FHIR format in payload data.')

        if not fhir_org.meta or not fhir_org.meta.versionId:
            raise BadRequest('No versionId info found in payload data.')
        try:
            fhir_org.version = int(fhir_org.meta.versionId)
        except ValueError:
            raise BadRequest('Invalid versionId in payload data.')

        return fhir_org

    def to_client_json(self, hierarchy_org_obj):
        return hierarchy_org_obj.as_json()

    def get_etag(self, id_, pid):  # pylint: disable=unused-argument
        return None

    def update(self, hierarchy_org_obj):
        obj_type = self._get_type(hierarchy_org_obj)

        operation_funcs = {
            'AWARDEE': self._update_awardee,
            'ORGANIZATION': self._update_organization,
            'SITE': self._update_site
        }

        if obj_type not in operation_funcs:
            raise BadRequest('No awardee-type info found in payload data.')

        operation_funcs[obj_type](hierarchy_org_obj)

    def _update_awardee(self, hierarchy_org_obj):
        if hierarchy_org_obj.id is None:
            raise BadRequest('No id found in payload data.')
        awardee_id = self._get_value_from_identifier(
            hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'awardee-id')
        if awardee_id is None:
            raise BadRequest(
                'No organization-identifier info found in payload data.')
        is_obsolete = ObsoleteStatus(
            'OBSOLETE') if not hierarchy_org_obj.active else None
        awardee_type = self._get_value_from_extention(
            hierarchy_org_obj,
            HIERARCHY_CONTENT_SYSTEM_PREFIX + 'awardee-type')

        try:
            organization_type = OrganizationType(awardee_type)
            if organization_type == OrganizationType.UNSET:
                organization_type = None
        except TypeError:
            raise BadRequest(
                'Invalid organization type {} for awardee {}'.format(
                    awardee_type, awardee_id))

        entity = HPO(name=awardee_id.upper(),
                     displayName=hierarchy_org_obj.name,
                     organizationType=organization_type,
                     isObsolete=is_obsolete,
                     resourceId=hierarchy_org_obj.id)

        existing_map = {
            entity.name: entity
            for entity in self.hpo_dao.get_all()
        }
        existing_entity = existing_map.get(entity.name)

        with self.hpo_dao.session() as session:
            if existing_entity:
                hpo_id = existing_entity.hpoId
                new_dict = entity.asdict()
                new_dict['hpoId'] = None
                existing_dict = existing_entity.asdict()
                existing_dict['hpoId'] = None
                if existing_dict == new_dict:
                    logging.info('Not updating {}.'.format(new_dict['name']))
                else:
                    existing_entity.displayName = entity.displayName
                    existing_entity.organizationType = entity.organizationType
                    existing_entity.isObsolete = entity.isObsolete
                    existing_entity.resourceId = entity.resourceId
                    self.hpo_dao.update_with_session(session, existing_entity)
            else:
                entity.hpoId = len(existing_map)
                hpo_id = entity.hpoId
                self.hpo_dao.insert_with_session(session, entity)
        bq_hpo_update_by_id(hpo_id)

    def _update_organization(self, hierarchy_org_obj):
        if hierarchy_org_obj.id is None:
            raise BadRequest('No id found in payload data.')
        organization_id = self._get_value_from_identifier(
            hierarchy_org_obj,
            HIERARCHY_CONTENT_SYSTEM_PREFIX + 'organization-id')
        if organization_id is None:
            raise BadRequest(
                'No organization-identifier info found in payload data.')
        is_obsolete = ObsoleteStatus(
            'OBSOLETE') if not hierarchy_org_obj.active else None
        resource_id = self._get_reference(hierarchy_org_obj)

        hpo = self.hpo_dao.get_by_resource_id(resource_id)
        if hpo is None:
            raise BadRequest(
                'Invalid partOf reference {} importing organization {}'.format(
                    resource_id, organization_id))

        entity = Organization(externalId=organization_id.upper(),
                              displayName=hierarchy_org_obj.name,
                              hpoId=hpo.hpoId,
                              isObsolete=is_obsolete,
                              resourceId=hierarchy_org_obj.id)
        existing_map = {
            entity.externalId: entity
            for entity in self.organization_dao.get_all()
        }
        existing_entity = existing_map.get(entity.externalId)
        with self.organization_dao.session() as session:
            if existing_entity:
                new_dict = entity.asdict()
                new_dict['organizationId'] = None
                existing_dict = existing_entity.asdict()
                existing_dict['organizationId'] = None
                if existing_dict == new_dict:
                    logging.info('Not updating {}.'.format(
                        new_dict['externalId']))
                else:
                    existing_entity.displayName = entity.displayName
                    existing_entity.hpoId = entity.hpoId
                    existing_entity.isObsolete = entity.isObsolete
                    existing_entity.resourceId = entity.resourceId
                    self.organization_dao.update_with_session(
                        session, existing_entity)
            else:
                self.organization_dao.insert_with_session(session, entity)
        org_id = self.organization_dao.get_by_external_id(
            organization_id.upper()).organizationId
        bq_organization_update_by_id(org_id)

    def _update_site(self, hierarchy_org_obj):
        if hierarchy_org_obj.id is None:
            raise BadRequest('No id found in payload data.')
        google_group = self._get_value_from_identifier(
            hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'site-id')
        if google_group is None:
            raise BadRequest(
                'No organization-identifier info found in payload data.')
        google_group = google_group.lower()
        is_obsolete = ObsoleteStatus(
            'OBSOLETE') if not hierarchy_org_obj.active else None
        resource_id = self._get_reference(hierarchy_org_obj)

        organization = self.organization_dao.get_by_resource_id(resource_id)
        if organization is None:
            raise BadRequest(
                'Invalid partOf reference {} importing site {}'.format(
                    resource_id, google_group))

        launch_date = None
        launch_date_str = self._get_value_from_extention(
            hierarchy_org_obj,
            HIERARCHY_CONTENT_SYSTEM_PREFIX + 'anticipatedLaunchDate',
            'valueDate')
        if launch_date_str:
            try:
                launch_date = parse(launch_date_str).date()
            except ValueError:
                raise BadRequest('Invalid launch date {} for site {}'.format(
                    launch_date_str, google_group))

        name = hierarchy_org_obj.name
        mayolink_client_number = None
        mayolink_client_number_str = self._get_value_from_identifier(
            hierarchy_org_obj,
            HIERARCHY_CONTENT_SYSTEM_PREFIX + 'mayo-link-identifier')
        if mayolink_client_number_str:
            try:
                mayolink_client_number = int(mayolink_client_number_str)
            except ValueError:
                raise BadRequest(
                    'Invalid Mayolink Client # {} for site {}'.format(
                        mayolink_client_number_str, google_group))

        notes = self._get_value_from_extention(
            hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'notes')

        site_status_bool = self._get_value_from_extention(
            hierarchy_org_obj,
            HIERARCHY_CONTENT_SYSTEM_PREFIX + 'schedulingStatusActive',
            'valueBoolean')
        try:
            site_status = SiteStatus(
                'ACTIVE' if site_status_bool else 'INACTIVE')
        except TypeError:
            raise BadRequest('Invalid site status {} for site {}'.format(
                site_status, google_group))

        enrolling_status_bool = self._get_value_from_extention(
            hierarchy_org_obj,
            HIERARCHY_CONTENT_SYSTEM_PREFIX + 'enrollmentStatusActive',
            'valueBoolean')
        try:
            enrolling_status = EnrollingStatus(
                'ACTIVE' if enrolling_status_bool else 'INACTIVE')
        except TypeError:
            raise BadRequest(
                'Invalid enrollment site status {} for site {}'.format(
                    enrolling_status_bool, google_group))

        digital_scheduling_bool = self._get_value_from_extention(
            hierarchy_org_obj,
            HIERARCHY_CONTENT_SYSTEM_PREFIX + 'digitalSchedulingStatusActive',
            'valueBoolean')
        try:
            digital_scheduling_status = DigitalSchedulingStatus(
                'ACTIVE' if digital_scheduling_bool else 'INACTIVE')
        except TypeError:
            raise BadRequest(
                'Invalid digital scheduling status {} for site {}'.format(
                    digital_scheduling_bool, google_group))

        directions = self._get_value_from_extention(
            hierarchy_org_obj, HIERARCHY_CONTENT_SYSTEM_PREFIX + 'directions')
        physical_location_name = self._get_value_from_extention(
            hierarchy_org_obj,
            HIERARCHY_CONTENT_SYSTEM_PREFIX + 'locationName')
        address_1, address_2, city, state, zip_code = self._get_address(
            hierarchy_org_obj)

        phone = self._get_contact_point(hierarchy_org_obj, 'phone')
        admin_email_addresses = self._get_contact_point(
            hierarchy_org_obj, 'email')
        link = self._get_contact_point(hierarchy_org_obj, 'url')

        schedule_instructions = self._get_value_from_extention(
            hierarchy_org_obj,
            HIERARCHY_CONTENT_SYSTEM_PREFIX + 'schedulingInstructions')

        entity = Site(siteName=name,
                      googleGroup=google_group,
                      mayolinkClientNumber=mayolink_client_number,
                      organizationId=organization.organizationId,
                      hpoId=organization.hpoId,
                      siteStatus=site_status,
                      enrollingStatus=enrolling_status,
                      digitalSchedulingStatus=digital_scheduling_status,
                      scheduleInstructions=schedule_instructions,
                      scheduleInstructions_ES='',
                      launchDate=launch_date,
                      notes=notes,
                      notes_ES='',
                      directions=directions,
                      physicalLocationName=physical_location_name,
                      address1=address_1,
                      address2=address_2,
                      city=city,
                      state=state,
                      zipCode=zip_code,
                      phoneNumber=phone,
                      adminEmails=admin_email_addresses,
                      link=link,
                      isObsolete=is_obsolete,
                      resourceId=hierarchy_org_obj.id)

        existing_map = {
            entity.googleGroup: entity
            for entity in self.site_dao.get_all()
        }

        existing_entity = existing_map.get(entity.googleGroup)
        with self.site_dao.session() as session:
            if existing_entity:
                self._populate_lat_lng_and_time_zone(entity, existing_entity)
                if entity.siteStatus == SiteStatus.ACTIVE and \
                  (entity.latitude is None or entity.longitude is None):
                    raise BadRequest(
                        'Active site without geocoding: {}'.format(
                            entity.googleGroup))

                new_dict = entity.asdict()
                new_dict['siteId'] = None
                existing_dict = existing_entity.asdict()
                existing_dict['siteId'] = None
                if existing_dict == new_dict:
                    logging.info('Not updating {}.'.format(
                        new_dict['googleGroup']))
                else:
                    for k, v in entity.asdict().iteritems():
                        if k != 'siteId' and k != 'googleGroup':
                            setattr(existing_entity, k, v)
                    self.site_dao.update_with_session(session, existing_entity)
            else:
                self._populate_lat_lng_and_time_zone(entity, None)
                if entity.siteStatus == SiteStatus.ACTIVE and \
                  (entity.latitude is None or entity.longitude is None):
                    raise BadRequest(
                        'Active site without geocoding: {}'.format(
                            entity.googleGroup))
                self.site_dao.insert_with_session(session, entity)
        site_id = self.site_dao.get_by_google_group(google_group).siteId
        bq_site_update_by_id(site_id)

    def _get_type(self, hierarchy_org_obj):
        obj_type = None
        type_arr = hierarchy_org_obj.type
        for type_item in type_arr:
            code_arr = type_item.coding
            for code_item in code_arr:
                if code_item.system == HIERARCHY_CONTENT_SYSTEM_PREFIX + 'type':
                    obj_type = code_item.code
                    break

        return obj_type

    def _get_value_from_identifier(self, hierarchy_org_obj, system):
        identifier_arr = hierarchy_org_obj.identifier
        for identifier in identifier_arr:
            if identifier.system == system:
                return identifier.value
        else:
            return None

    def _get_value_from_extention(self,
                                  hierarchy_org_obj,
                                  url,
                                  value_key='valueString'):
        extension_arr = hierarchy_org_obj.extension
        for extension in extension_arr:
            if extension.url == url:
                ext_json = extension.as_json()
                return ext_json[value_key]
        else:
            return None

    def _get_contact_point(self, hierarchy_org_obj, code):
        contact_arr = hierarchy_org_obj.contact
        for contact in contact_arr:
            telecom_arr = contact.telecom
            for telecom in telecom_arr:
                if telecom.system == code:
                    return telecom.value
        else:
            return None

    def _get_address(self, hierarchy_org_obj):
        address = hierarchy_org_obj.address[0]
        address_1 = address.line[0] if len(address.line) > 0 else ''
        address_2 = address.line[1] if len(address.line) > 1 else ''
        city = address.city
        state = address.state
        postal_code = address.postalCode

        return address_1, address_2, city, state, postal_code

    def _get_reference(self, hierarchy_org_obj):
        try:
            return hierarchy_org_obj.partOf.reference.split('/')[1]
        except IndexError:
            return None

    def _populate_lat_lng_and_time_zone(self, site, existing_site):
        if site.address1 and site.city and site.state:
            if existing_site:
                if (existing_site.address1 == site.address1
                        and existing_site.city == site.city
                        and existing_site.state == site.state
                        and existing_site.latitude is not None
                        and existing_site.longitude is not None
                        and existing_site.timeZoneId is not None):
                    # Address didn't change, use the existing lat/lng and time zone.
                    site.latitude = existing_site.latitude
                    site.longitude = existing_site.longitude
                    site.timeZoneId = existing_site.timeZoneId
                    return
            latitude, longitude = self._get_lat_long_for_site(
                site.address1, site.city, site.state)
            site.latitude = latitude
            site.longitude = longitude
            if latitude and longitude:
                site.timeZoneId = self._get_time_zone(latitude, longitude)
        else:
            if site.googleGroup not in self.status_exception_list:
                if site.siteStatus == self.ACTIVE:
                    logging.warn(
                        'Active site must have valid address. Site: {}, Group: {}'
                        .format(site.siteName, site.googleGroup))

    def _get_lat_long_for_site(self, address_1, city, state):
        self.full_address = address_1 + ' ' + city + ' ' + state
        try:
            self.api_key = os.environ.get('API_KEY')
            self.gmaps = googlemaps.Client(key=self.api_key)
            try:
                geocode_result = self.gmaps.geocode(address_1 + '' + city +
                                                    ' ' + state)[0]
            except IndexError:
                logging.warn('Bad address for {}, could not geocode.'.format(
                    self.full_address))
                return None, None
            if geocode_result:
                geometry = geocode_result.get('geometry')
                if geometry:
                    location = geometry.get('location')
                if location:
                    latitude = location.get('lat')
                    longitude = location.get('lng')
                    return latitude, longitude
                else:
                    logging.warn('Can not find lat/long for %s',
                                 self.full_address)
                    return None, None
            else:
                logging.warn('Geocode results failed for %s.',
                             self.full_address)
                return None, None
        except ValueError as e:
            logging.exception('Invalid geocode key: %s. ERROR: %s',
                              self.api_key, e)
            return None, None
        except IndexError as e:
            logging.exception(
                'Geocoding failure Check that address is correct. ERROR: %s',
                e)
            return None, None

    def _get_time_zone(self, latitude, longitude):
        time_zone = self.gmaps.timezone(location=(latitude, longitude))
        if time_zone['status'] == 'OK':
            time_zone_id = time_zone['timeZoneId']
            return time_zone_id
        else:
            logging.info('can not retrieve time zone from %s',
                         self.full_address)
            return None
class ParticipantSummaryDao(UpdatableDao):
    def __init__(self):
        super(ParticipantSummaryDao,
              self).__init__(ParticipantSummary,
                             order_by_ending=_ORDER_BY_ENDING)
        self.hpo_dao = HPODao()
        self.code_dao = CodeDao()
        self.site_dao = SiteDao()
        self.organization_dao = OrganizationDao()

    def get_id(self, obj):
        return obj.participantId

    def get_by_email(self, email):
        with self.session() as session:
            return session.query(ParticipantSummary).filter(
                ParticipantSummary.email == email).all()

    def _validate_update(self, session, obj, existing_obj):  # pylint: disable=unused-argument
        """Participant summaries don't have a version value; drop it from validation logic."""
        if not existing_obj:
            raise NotFound('%s with id %s does not exist' %
                           (self.model_type.__name__, id))

    def _has_withdrawn_filter(self, query):
        for field_filter in query.field_filters:
            if (field_filter.field_name == 'withdrawalStatus'
                    and field_filter.value == WithdrawalStatus.NO_USE):
                return True
            if field_filter.field_name == 'withdrawalTime' and field_filter.value is not None:
                return True
        return False

    def _get_non_withdrawn_filter_field(self, query):
        """Returns the first field referenced in query filters which isn't in
    WITHDRAWN_PARTICIPANT_FIELDS."""
        for field_filter in query.field_filters:
            if not field_filter.field_name in WITHDRAWN_PARTICIPANT_FIELDS:
                return field_filter.field_name
        return None

    def _initialize_query(self, session, query_def):
        non_withdrawn_field = self._get_non_withdrawn_filter_field(query_def)
        if self._has_withdrawn_filter(query_def):
            if non_withdrawn_field:
                raise BadRequest(
                    "Can't query on %s for withdrawn participants" %
                    non_withdrawn_field)
            # When querying for withdrawn participants, ensure that the only fields being filtered on or
            # ordered by are in WITHDRAWN_PARTICIPANT_FIELDS.
            return super(ParticipantSummaryDao,
                         self)._initialize_query(session, query_def)
        else:
            query = super(ParticipantSummaryDao,
                          self)._initialize_query(session, query_def)
            if non_withdrawn_field:
                # When querying on fields that aren't available for withdrawn participants,
                # ensure that we only return participants
                # who have not withdrawn or withdrew in the past 48 hours.
                withdrawn_visible_start = clock.CLOCK.now(
                ) - WITHDRAWN_PARTICIPANT_VISIBILITY_TIME
                return query.filter(
                    or_(
                        ParticipantSummary.withdrawalStatus !=
                        WithdrawalStatus.NO_USE,
                        ParticipantSummary.withdrawalTime >=
                        withdrawn_visible_start))
            else:
                # When querying on fields that are available for withdrawn participants, return everybody;
                # withdrawn participants will have all but WITHDRAWN_PARTICIPANT_FIELDS cleared out 48
                # hours after withdrawing.
                return query

    def _get_order_by_ending(self, query):
        if self._has_withdrawn_filter(query):
            return _WITHDRAWN_ORDER_BY_ENDING
        return self.order_by_ending

    def _add_order_by(self, query, order_by, field_names, fields):
        if order_by.field_name in _CODE_FILTER_FIELDS:
            return super(ParticipantSummaryDao, self)._add_order_by(
                query, OrderBy(order_by.field_name + 'Id', order_by.ascending),
                field_names, fields)
        return super(ParticipantSummaryDao,
                     self)._add_order_by(query, order_by, field_names, fields)

    def make_query_filter(self, field_name, value):
        """Handle HPO and code values when parsing filter values."""
        if field_name == 'hpoId' or field_name == 'awardee':
            hpo = self.hpo_dao.get_by_name(value)
            if not hpo:
                raise BadRequest('No HPO found with name %s' % value)
            if field_name == 'awardee':
                field_name = 'hpoId'
            return super(ParticipantSummaryDao,
                         self).make_query_filter(field_name, hpo.hpoId)
        if field_name == 'organization':
            organization = self.organization_dao.get_by_external_id(value)
            if not organization:
                raise BadRequest('No organization found with name %s' % value)
            return super(ParticipantSummaryDao,
                         self).make_query_filter(field_name + 'Id',
                                                 organization.organizationId)
        if field_name in _SITE_FIELDS:
            if value == UNSET:
                return super(ParticipantSummaryDao,
                             self).make_query_filter(field_name + 'Id', None)
            site = self.site_dao.get_by_google_group(value)
            if not site:
                raise BadRequest('No site found with google group %s' % value)
            return super(ParticipantSummaryDao,
                         self).make_query_filter(field_name + 'Id',
                                                 site.siteId)
        if field_name in _CODE_FILTER_FIELDS:
            if value == UNSET:
                return super(ParticipantSummaryDao,
                             self).make_query_filter(field_name + 'Id', None)
            # Note: we do not at present support querying for UNMAPPED code values.
            code = self.code_dao.get_code(PPI_SYSTEM, value)
            if not code:
                raise BadRequest('No code found: %s' % value)
            return super(ParticipantSummaryDao,
                         self).make_query_filter(field_name + 'Id',
                                                 code.codeId)
        return super(ParticipantSummaryDao,
                     self).make_query_filter(field_name, value)

    def update_from_biobank_stored_samples(self, participant_id=None):
        """Rewrites sample-related summary data. Call this after updating BiobankStoredSamples.
    If participant_id is provided, only that participant will have their summary updated."""
        baseline_tests_sql, baseline_tests_params = get_sql_and_params_for_array(
            config.getSettingList(config.BASELINE_SAMPLE_TEST_CODES),
            'baseline')
        dna_tests_sql, dna_tests_params = get_sql_and_params_for_array(
            config.getSettingList(config.DNA_SAMPLE_TEST_CODES), 'dna')
        sample_sql, sample_params = _get_sample_sql_and_params()
        sql = """
    UPDATE
      participant_summary
    SET
      num_baseline_samples_arrived = (
        SELECT
          COUNT(*)
        FROM
          biobank_stored_sample
        WHERE
          biobank_stored_sample.biobank_id = participant_summary.biobank_id
          AND biobank_stored_sample.test IN %s
      ),
      samples_to_isolate_dna = (
          CASE WHEN EXISTS(SELECT * FROM biobank_stored_sample
                           WHERE biobank_stored_sample.biobank_id = participant_summary.biobank_id
                           AND biobank_stored_sample.test IN %s)
          THEN :received ELSE :unset END
      ),
      last_modified = :now
       %s""" % (baseline_tests_sql, dna_tests_sql, sample_sql)
        params = {
            'received': int(SampleStatus.RECEIVED),
            'unset': int(SampleStatus.UNSET),
            'now': clock.CLOCK.now()
        }
        params.update(baseline_tests_params)
        params.update(dna_tests_params)
        params.update(sample_params)
        enrollment_status_params = {
            'submitted': int(QuestionnaireStatus.SUBMITTED),
            'num_baseline_ppi_modules': self._get_num_baseline_ppi_modules(),
            'completed': int(PhysicalMeasurementsStatus.COMPLETED),
            'received': int(SampleStatus.RECEIVED),
            'full_participant': int(EnrollmentStatus.FULL_PARTICIPANT),
            'member': int(EnrollmentStatus.MEMBER),
            'interested': int(EnrollmentStatus.INTERESTED)
        }

        enrollment_status_sql = _ENROLLMENT_STATUS_SQL
        # If participant_id is provided, add the participant ID filter to both update statements.
        if participant_id:
            sql += _PARTICIPANT_ID_FILTER
            params['participant_id'] = participant_id
            enrollment_status_sql += _PARTICIPANT_ID_FILTER
            enrollment_status_params['participant_id'] = participant_id

        sql = replace_null_safe_equals(sql)
        with self.session() as session:
            session.execute(sql, params)
            session.execute(enrollment_status_sql, enrollment_status_params)

    def _get_num_baseline_ppi_modules(self):
        return len(
            config.getSettingList(config.BASELINE_PPI_QUESTIONNAIRE_FIELDS))

    def update_enrollment_status(self, summary):
        """Updates the enrollment status field on the provided participant summary to
    the correct value based on the other fields on it. Called after
    a questionnaire response or physical measurements are submitted."""
        consent = (summary.consentForStudyEnrollment
                   == QuestionnaireStatus.SUBMITTED
                   and summary.consentForElectronicHealthRecords
                   == QuestionnaireStatus.SUBMITTED)
        enrollment_status = self.calculate_enrollment_status(
            consent, summary.numCompletedBaselinePPIModules,
            summary.physicalMeasurementsStatus, summary.samplesToIsolateDNA)
        summary.enrollmentStatus = enrollment_status

    def calculate_enrollment_status(self, consent_for_study_enrollment_and_ehr,
                                    num_completed_baseline_ppi_modules,
                                    physical_measurements_status,
                                    samples_to_isolate_dna):
        if consent_for_study_enrollment_and_ehr:
            if (num_completed_baseline_ppi_modules
                    == self._get_num_baseline_ppi_modules()
                    and physical_measurements_status
                    == PhysicalMeasurementsStatus.COMPLETED
                    and samples_to_isolate_dna == SampleStatus.RECEIVED):
                return EnrollmentStatus.FULL_PARTICIPANT
            return EnrollmentStatus.MEMBER
        return EnrollmentStatus.INTERESTED

    def to_client_json(self, model):
        result = model.asdict()
        # Participants that withdrew more than 48 hours ago should have fields other than
        # WITHDRAWN_PARTICIPANT_FIELDS cleared.
        if (model.withdrawalStatus == WithdrawalStatus.NO_USE
                and model.withdrawalTime <
                clock.CLOCK.now() - WITHDRAWN_PARTICIPANT_VISIBILITY_TIME):
            result = {k: result.get(k) for k in WITHDRAWN_PARTICIPANT_FIELDS}

        result['participantId'] = to_client_participant_id(model.participantId)
        biobank_id = result.get('biobankId')
        if biobank_id:
            result['biobankId'] = to_client_biobank_id(biobank_id)
        date_of_birth = result.get('dateOfBirth')
        if date_of_birth:
            result['ageRange'] = get_bucketed_age(date_of_birth,
                                                  clock.CLOCK.now())
        else:
            result['ageRange'] = UNSET

        if 'organizationId' in result:
            result['organization'] = result['organizationId']
            del result['organizationId']
            format_json_org(result, self.organization_dao, 'organization')

        format_json_hpo(result, self.hpo_dao, 'hpoId')
        result['awardee'] = result['hpoId']
        _initialize_field_type_sets()
        for fieldname in _DATE_FIELDS:
            format_json_date(result, fieldname)
        for fieldname in _CODE_FIELDS:
            format_json_code(result, self.code_dao, fieldname)
        for fieldname in _ENUM_FIELDS:
            format_json_enum(result, fieldname)
        for fieldname in _SITE_FIELDS:
            format_json_site(result, self.site_dao, fieldname)
        if (model.withdrawalStatus == WithdrawalStatus.NO_USE
                or model.suspensionStatus == SuspensionStatus.NO_CONTACT):
            result['recontactMethod'] = 'NO_CONTACT'
        # Strip None values.
        result = {k: v for k, v in result.iteritems() if v is not None}

        return result

    def _decode_token(self, query_def, fields):
        """ If token exists in participant_summary api, decode and use lastModified to add a buffer
    of 60 seconds. This ensures when a _sync link is used no one is missed. This will return
    at a minimum, the last participant and any more that have been modified in the previous 60
    seconds. Duplicate participants returned should be handled on the client side."""
        decoded_vals = super(ParticipantSummaryDao,
                             self)._decode_token(query_def, fields)
        if query_def.order_by and (query_def.order_by.field_name
                                   == 'lastModified'
                                   and query_def.always_return_token == True):
            decoded_vals[0] = decoded_vals[0] - datetime.timedelta(
                seconds=config.LAST_MODIFIED_BUFFER_SECONDS)

        return decoded_vals