def populate( dal: DalClinicalTrials, num_days: Optional[int] = None, chunk_size: Optional[int] = 1000, skip_populated: Optional[bool] = False, dry_run: Optional[bool] = False, ): with dal.session_scope() as session: # type: sqlalchemy.orm.Session studies_chunks = find_recent_studies( session=session, num_days=num_days, chunk_size=chunk_size, skip_populated=skip_populated, ) for studies_chunk in studies_chunks: studies = list(studies_chunk) # type: List[Study] logger.warning(len(studies)) for study in studies: logger.info(f"Processing study with ID {study.study_id}") for location in study.locations: logger.info( f"Processing location with ID {location.location_id}") facility_id = location.facility_id facility_canonical_id = ( location.facility.facility_canonical_id) _prefix = "[DRY RUN] " if dry_run else "" logger.info( f"{_prefix}IODUing `StudyFacility` with a `study_id`" f" of '{study.study_id}', `facility_id` of " f"{facility_id}, and `facility_canonical_id` of " f"'{facility_canonical_id}'.") if not dry_run: dal.iodu_study_facility( study_id=study.study_id, facility_id=facility_id, facility_canonical_id=facility_canonical_id, )
def find_recent_unmatched_facilities(dal: DalClinicalTrials): with dal.session_scope() as session: # type: sqlalchemy.orm.Session query = session.query(Facility) query = query.join(Facility.studies) query = query.join(Study.study_dates) # Filter down to facilities not matched with a canonical facility. query = query.filter(Facility.facility_canonical_id.is_(None)) # Filter down to studies updated in the last 2 days. query = query.filter( StudyDates.last_update_posted > (datetime.date.today() - datetime.timedelta(days=2))) query = query.group_by(Facility.facility_id) facilities_unmatched = query.all() return facilities_unmatched
def match(dal: DalClinicalTrials, retriever: RetrieverGoogleMaps): logger.info( "Retrieving unmatched facilities linked to studies updated in the " "past 2 days.") facilities_unmatched = find_recent_unmatched_facilities(dal=dal) logger.info( f"Retrieved {len(facilities_unmatched)} unmatched facilities linked " f"to studies updated in the past 2 days.") for facility in facilities_unmatched: logger.info(f"Processing facility {facility}.") logger.info(f"Matching facility {facility} against a Google Place.") place_response = find_facility_google_place(retriever=retriever, facility=facility) # Skip empty responses. if not place_response or not place_response.get("candidates"): logger.warning( f"No Google Place match found for facility {facility}." f" Skipping.") continue # Retrieving Google Place ID from the first candidate. google_place_id = place_response["candidates"][0]["place_id"] logger.info(f"Google Place with ID '{google_place_id}' found matching " f" facility {facility}.") with dal.session_scope() as session: # type: sqlalchemy.orm.Session # noinspection PyTypeChecker facility_canonical = dal.get_by_attr( orm_class=FacilityCanonical, attr_name="google_place_id", attr_value=google_place_id, session=session, ) # type: FacilityCanonical if facility_canonical: facility_canonical_id = facility_canonical.facility_canonical_id logger.info( f"Google Place with ID '{google_place_id}' previously " f"stored as canonical facility {facility_canonical}.") else: logger.info( f"Google Place with ID '{google_place_id}' not previously " f"stored as a canonical facility.") logger.info( f"Retrieving Google Place details for Google Place with " f"ID '{google_place_id}'.") details_response = get_place_details( google_place_id=google_place_id, retriever=retriever) if not details_response: logger.warning( f"No Google Place details retrieved for Google Place " f"with ID '{google_place_id}'. Skipping.") continue facility_canonical_id = iodu_canonical_facility_from_google( dal=dal, google_place_id=google_place_id, google_response=details_response, ) logger.info(f"Linking facility {facility} with canonical facility " f"{facility_canonical}.") dal.update_attr_value( orm_class=Facility, pk=facility.facility_id, attr_name="facility_canonical_id", attr_value=facility_canonical_id, session=session, )