def fetch(self, request: ScraperRequest) -> Optional[str]: centre = _parse_centre(request.get_url()) # Doctolib fetches multiple vaccination centers sometimes # so if a practice id is present in query, only related agendas # should be selected. practice_id = _parse_practice_id(request.get_url()) practice_same_adress = False rdata = None # We already have rdata if request.input_data: rdata = request.input_data else: centre_api_url = DOCTOLIB_API.get("booking", "").format(centre=centre) request.increase_request_count("booking") response = self._client.get(centre_api_url, headers=DOCTOLIB_HEADERS) if response.status_code == 403: raise BlockedByDoctolibError(centre_api_url) response.raise_for_status() time.sleep(self._cooldown_interval) data = response.json() rdata = data.get("data", {}) if not self.is_practice_id_valid(request, rdata): logger.warning( f"Invalid practice ID for this Doctolib center: {request.get_url()}" ) practice_id = None self.pop_practice_id(request) if practice_id: practice_id, practice_same_adress = link_practice_ids( practice_id, rdata) if len(rdata.get("places", [])) > 1 and practice_id is None: practice_id = rdata.get("places")[0].get("practice_ids", None) request.update_practitioner_type(parse_practitioner_type( centre, rdata)) set_doctolib_center_internal_id(request, rdata, practice_id, practice_same_adress) # Check if appointments are allowed if not is_allowing_online_appointments(rdata): request.set_appointments_only_by_phone(True) return None # visit_motive_categories # example: https://partners.doctolib.fr/hopital-public/tarbes/centre-de-vaccination-tarbes-ayguerote?speciality_id=5494&enable_cookies_consent=1 visit_motive_category_id = _find_visit_motive_category_id(rdata) # visit_motive_id visit_motive_ids = _find_visit_motive_id( rdata, visit_motive_category_id=visit_motive_category_id) if visit_motive_ids is None: return None all_agendas = parse_agenda_ids(rdata) first_availability = None appointment_schedules = request.get_appointment_schedules() start_date = request.get_start_date() for interval in INTERVAL_SPLIT_DAYS: chronodose = False if interval == CHRONODOSES["Interval"]: chronodose = True appointment_schedules = build_appointment_schedules( request, interval, append_date_days(start_date, 0), append_date_days(start_date, days=interval, seconds=-1), 0, appointment_schedules, chronodose, ) request.update_appointment_schedules(appointment_schedules) timetable_start_date = datetime.now() # shouldn't be datetime.now()!! for visit_motive_id in visit_motive_ids: agenda_ids, practice_ids = _find_agenda_and_practice_ids( rdata, visit_motive_id, practice_id_filter=practice_id) if not agenda_ids or not practice_ids: continue agenda_ids = self.sort_agenda_ids(all_agendas, agenda_ids) agenda_ids_q = "-".join(agenda_ids) practice_ids_q = "-".join(practice_ids) availability = self.get_timetables( request, visit_motive_ids, visit_motive_id, agenda_ids_q, practice_ids_q, timetable_start_date, appointment_schedules, ) if availability and (not first_availability or availability < first_availability): first_availability = availability return first_availability
def get_appointments( self, request: ScraperRequest, start_date: str, visit_motive_ids, motive_id: str, agenda_ids_q: str, practice_ids_q: str, limit: int, start_date_original: str, appointment_schedules: list, ): stop = False motive_availability = False first_availability = None appointment_count = 0 appointment_schedules_updated = None slots_api_url = DOCTOLIB_API.get("slots", "").format( start_date=start_date, motive_id=motive_id, agenda_ids_q=agenda_ids_q, practice_ids_q=practice_ids_q, limit=limit, ) request.increase_request_count("slots") try: response = self._client.get(slots_api_url, headers=DOCTOLIB_HEADERS) except httpx.ReadTimeout as hex: logger.warning( f"Doctolib returned error ReadTimeout for url {request.get_url()}" ) raise BlockedByDoctolibError(request.get_url()) if response.status_code == 403 or response.status_code == 400: raise BlockedByDoctolibError(request.get_url()) response.raise_for_status() time.sleep(self._cooldown_interval) slots = response.json() if slots.get("total"): appointment_count += int(slots.get("total", 0)) for availability in slots["availabilities"]: slot_list = availability.get("slots", None) if not slot_list or len(slot_list) == 0: continue if isinstance(slot_list[0], str): if not first_availability or slot_list[0] < first_availability: first_availability = slot_list[0] motive_availability = True for slot_info in slot_list: if isinstance(slot_info, str): continue sdate = slot_info.get("start_date", None) if not sdate: continue if not first_availability or sdate < first_availability: first_availability = sdate motive_availability = True if visit_motive_ids[motive_id]: visite_motive_vaccine = visit_motive_ids[motive_id] else: visite_motive_vaccine = None for interval in INTERVAL_SPLIT_DAYS: chronodose = False if visite_motive_vaccine in CHRONODOSES[ "Vaccine"] and interval == CHRONODOSES["Interval"]: chronodose = True appointment_schedules = build_appointment_schedules( request, interval, append_date_days(start_date_original, 0), append_date_days(start_date_original, days=interval, seconds=-1), 0, appointment_schedules, chronodose, ) if append_date_days(start_date_original, 0) <= append_date_days( start_date_original, interval): if availability.get("date"): if append_date_days(availability.get("date"), 0) < append_date_days( start_date_original, interval): appointment_schedules = build_appointment_schedules( request, interval, append_date_days(start_date_original, 0), append_date_days(start_date_original, days=interval, seconds=-1), len(availability.get("slots", [])), appointment_schedules, chronodose, ) if motive_availability: request.add_vaccine_type(visit_motive_ids[motive_id]) # Sometimes Doctolib does not allow to see slots for next weeks # which is a weird move, but still, we have to stop here. if not first_availability and not slots.get("next_slot", None): stop = True return first_availability, appointment_count, appointment_schedules, stop, slots.get( "next_slot")
def fetch(self, request: ScraperRequest) -> Optional[str]: centre = _parse_centre(request.get_url()) # Doctolib fetches multiple vaccination centers sometimes # so if a practice id is present in query, only related agendas # should be selected. practice_id = _parse_practice_id(request.get_url()) practice_same_adress = False centre_api_url = f"https://partners.doctolib.fr/booking/{centre}.json" response = self._client.get(centre_api_url, headers=DOCTOLIB_HEADERS) if response.status_code == 403: raise BlockedByDoctolibError(centre_api_url) response.raise_for_status() time.sleep(self._cooldown_interval) data = response.json() rdata = data.get("data", {}) if not self.is_practice_id_valid(request, rdata): logger.warning(f"Invalid practice ID for this Doctolib center: {request.get_url()}") practice_id = None self.pop_practice_id(request) if practice_id: practice_id, practice_same_adress = link_practice_ids(practice_id, rdata) if len(rdata.get("places", [])) > 1 and practice_id is None: practice_id = rdata.get("places")[0].get("practice_ids", None) request.update_practitioner_type(parse_practitioner_type(centre, rdata)) set_doctolib_center_internal_id(request, rdata, practice_id, practice_same_adress) # Check if appointments are allowed if not is_allowing_online_appointments(rdata): request.set_appointments_only_by_phone(True) return None # visit_motive_categories # example: https://partners.doctolib.fr/hopital-public/tarbes/centre-de-vaccination-tarbes-ayguerote?speciality_id=5494&enable_cookies_consent=1 visit_motive_category_id = _find_visit_motive_category_id(data) # visit_motive_id visit_motive_ids = _find_visit_motive_id(data, visit_motive_category_id=visit_motive_category_id) if visit_motive_ids is None: return None all_agendas = parse_agenda_ids(rdata) first_availability = None appointment_schedules = request.get_appointment_schedules() start_date = request.get_start_date() for interval in INTERVAL_SPLIT_DAYS: chronodose = False if interval == CHRONODOSES["Interval"]: chronodose = True appointment_schedules = build_appointment_schedules( request, interval, append_date_days(start_date, 0), append_date_days(start_date, interval, 1), 0, appointment_schedules, chronodose, ) request.update_appointment_schedules(appointment_schedules) for visit_motive_id in visit_motive_ids: agenda_ids, practice_ids = _find_agenda_and_practice_ids( data, visit_motive_id, practice_id_filter=practice_id ) if agenda_ids != [] and practice_ids != []: agenda_ids = self.sort_agenda_ids(all_agendas, agenda_ids) agenda_ids_q = "-".join(agenda_ids) practice_ids_q = "-".join(practice_ids) for i in range(DOCTOLIB_ITERATIONS): start_date_tmp = datetime.now() + timedelta(days=7 * i) start_date_tmp = start_date_tmp.strftime("%Y-%m-%d") sdate, appt, appointment_schedules, stop = self.get_appointments( request, start_date_tmp, visit_motive_ids, visit_motive_id, agenda_ids_q, practice_ids_q, DOCTOLIB_SLOT_LIMIT, start_date, appointment_schedules, ) if stop: break if not sdate: continue if not first_availability or sdate < first_availability: first_availability = sdate request.update_appointment_count(request.appointment_count + appt) if appointment_schedules: request.update_appointment_schedules(appointment_schedules) return first_availability
def test_append_days_date(): for test_date in TEST_DATES: item = test_date["item"] assert append_date_days(item[0], item[1]) == test_date["result"]
def get_appointments( self, request: ScraperRequest, start_date: str, visit_motive_ids, motive_id: str, agenda_ids_q: str, practice_ids_q: str, limit: int, start_date_original: str, appointment_schedules: list, ): stop = False motive_availability = False first_availability = None appointment_count = 0 appointment_schedules_updated = None slots_api_url = f"https://partners.doctolib.fr/availabilities.json?start_date={start_date}&visit_motive_ids={motive_id}&agenda_ids={agenda_ids_q}&insurance_sector=public&practice_ids={practice_ids_q}&destroy_temporary=true&limit={limit}" response = self._client.get(slots_api_url, headers=DOCTOLIB_HEADERS) if response.status_code == 403: raise BlockedByDoctolibError(request.get_url()) response.raise_for_status() time.sleep(self._cooldown_interval) slots = response.json() if slots.get("total"): appointment_count += int(slots.get("total", 0)) for availability in slots["availabilities"]: slot_list = availability.get("slots", None) if not slot_list or len(slot_list) == 0: continue if isinstance(slot_list[0], str): if not first_availability or slot_list[0] < first_availability: first_availability = slot_list[0] motive_availability = True for slot_info in slot_list: if isinstance(slot_info, str): continue sdate = slot_info.get("start_date", None) if not sdate: continue if not first_availability or sdate < first_availability: first_availability = sdate motive_availability = True if visit_motive_ids[motive_id]: visite_motive_vaccine = visit_motive_ids[motive_id] else: visite_motive_vaccine = None for interval in INTERVAL_SPLIT_DAYS: chronodose = False if visite_motive_vaccine in CHRONODOSES["Vaccine"] and interval == CHRONODOSES["Interval"]: chronodose = True appointment_schedules = build_appointment_schedules( request, interval, append_date_days(start_date_original, 0), append_date_days(start_date_original, interval, 1), 0, appointment_schedules, chronodose, ) if append_date_days(start_date_original, 0) <= append_date_days(start_date_original, interval): if availability.get("date"): if append_date_days(availability.get("date"), 0) < append_date_days( start_date_original, interval ): appointment_schedules = build_appointment_schedules( request, interval, append_date_days(start_date_original, 0), append_date_days(start_date_original, interval, 1), len(availability.get("slots", [])), appointment_schedules, chronodose, ) if motive_availability: request.add_vaccine_type(visit_motive_ids[motive_id]) # Sometimes Doctolib does not allow to see slots for next weeks # which is a weird move, but still, we have to stop here. if not first_availability and not slots.get("next_slot", None): stop = True return first_availability, appointment_count, appointment_schedules, stop