def get_timetables( self, request: ScraperRequest, visit_motive_ids, visit_motive_id, agenda_ids_q: str, practice_ids_q: str, start_date: datetime, appointment_schedules: list, page: int = 1, first_availability: Optional[str] = None, ) -> Optional[str]: """ Get timetables recursively with `doctolib.pagination.days` as the number of days to query. Recursively limited by `doctolib.pagination.pages` and appends new availabilities to a ’timetable’, freshly initialized at the beginning. Uses next_slot as a reference for next availability and in order to avoid useless requests when we already know if a timetable is empty. """ if page > DOCTOLIB_CONF.pagination["pages"]: return first_availability sdate, appt, schedules, ended, next_slot = self.get_appointments( request, start_date.strftime("%Y-%m-%d"), visit_motive_ids, visit_motive_id, agenda_ids_q, practice_ids_q, DOCTOLIB_CONF.pagination["days"], request.get_start_date(), appointment_schedules, ) if ended: return first_availability if next_slot: """ Optimize query count by jumping directly to the first availability date by using ’next_slot’ key """ next_expected_date = start_date + timedelta( days=DOCTOLIB_CONF.pagination["days"]) next_fetch_date = datetime.strptime(next_slot, "%Y-%m-%d") diff = next_fetch_date.replace( tzinfo=None) - next_expected_date.replace(tzinfo=None) if page > DOCTOLIB_CONF.pagination["pages"]: return first_availability return self.get_timetables( request, visit_motive_ids, visit_motive_id, agenda_ids_q, practice_ids_q, next_fetch_date, appointment_schedules, page=1 + max(0, floor(diff.days / DOCTOLIB_CONF.pagination["days"])) + page, first_availability=first_availability, ) if not sdate: return first_availability if not first_availability or sdate < first_availability: first_availability = sdate request.update_appointment_count(request.appointment_count + appt) if schedules: request.update_appointment_schedules(schedules) if page >= DOCTOLIB_CONF.pagination["pages"]: return first_availability return self.get_timetables( request, visit_motive_ids, visit_motive_id, agenda_ids_q, practice_ids_q, start_date + timedelta(days=DOCTOLIB_CONF.pagination["days"]), appointment_schedules, 1 + page, first_availability=first_availability, )
def fetch(self, request: ScraperRequest) -> Optional[str]: centre = _parse_centre(request.get_url()) # Doctolib fetches multiple vaccination centers sometimes # so if a practice id is present in query, only related agendas # should be selected. practice_id = _parse_practice_id(request.get_url()) practice_same_adress = False centre_api_url = DOCTOLIB_API.get("booking", "").format(centre=centre) request.increase_request_count("booking") response = self._client.get(centre_api_url, headers=DOCTOLIB_HEADERS) if response.status_code == 403: raise BlockedByDoctolibError(centre_api_url) response.raise_for_status() time.sleep(self._cooldown_interval) data = response.json() rdata = data.get("data", {}) if not self.is_practice_id_valid(request, rdata): logger.warning( f"Invalid practice ID for this Doctolib center: {request.get_url()}" ) practice_id = None self.pop_practice_id(request) if practice_id: practice_id, practice_same_adress = link_practice_ids( practice_id, rdata) if len(rdata.get("places", [])) > 1 and practice_id is None: practice_id = rdata.get("places")[0].get("practice_ids", None) request.update_practitioner_type(parse_practitioner_type( centre, rdata)) set_doctolib_center_internal_id(request, rdata, practice_id, practice_same_adress) # Check if appointments are allowed if not is_allowing_online_appointments(rdata): request.set_appointments_only_by_phone(True) return None # visit_motive_categories # example: https://partners.doctolib.fr/hopital-public/tarbes/centre-de-vaccination-tarbes-ayguerote?speciality_id=5494&enable_cookies_consent=1 visit_motive_category_id = _find_visit_motive_category_id(data) # visit_motive_id visit_motive_ids = _find_visit_motive_id( data, visit_motive_category_id=visit_motive_category_id) if visit_motive_ids is None: return None all_agendas = parse_agenda_ids(rdata) first_availability = None appointment_schedules = request.get_appointment_schedules() start_date = request.get_start_date() for interval in INTERVAL_SPLIT_DAYS: chronodose = False if interval == CHRONODOSES["Interval"]: chronodose = True appointment_schedules = build_appointment_schedules( request, interval, append_date_days(start_date, 0), append_date_days(start_date, days=interval, seconds=-1), 0, appointment_schedules, chronodose, ) request.update_appointment_schedules(appointment_schedules) timetable_start_date = datetime.now() # shouldn't be datetime.now()!! for visit_motive_id in visit_motive_ids: agenda_ids, practice_ids = _find_agenda_and_practice_ids( data, visit_motive_id, practice_id_filter=practice_id) if not agenda_ids or not practice_ids: continue agenda_ids = self.sort_agenda_ids(all_agendas, agenda_ids) agenda_ids_q = "-".join(agenda_ids) practice_ids_q = "-".join(practice_ids) availability = self.get_timetables(request, visit_motive_ids, visit_motive_id, agenda_ids_q, practice_ids_q, timetable_start_date, appointment_schedules) if availability and (not first_availability or availability < first_availability): first_availability = availability return first_availability
def test_center_info_fill(): center = CenterInfo("Paris", "Centre 1", "https://.../centre") newloc = CenterLocation(1.122, 2.391, "Ok") request = ScraperRequest(center.url, "2021-05-04") result = ScraperResult(request, "Doctolib", "2021-05-06") center.fill_localization(newloc) request.update_appointment_count(42) request.add_vaccine_type(Vaccine.PFIZER) request.add_vaccine_type(Vaccine.ASTRAZENECA) request.add_vaccine_type(Vaccine.MODERNA) request.update_internal_id("doctolibcentre1") request.update_practitioner_type(DRUG_STORE) request.set_appointments_only_by_phone(False) center.fill_result(result) assert center.location == newloc assert center.prochain_rdv == "2021-05-06" assert center.plateforme == "Doctolib" assert center.type == "drugstore" assert center.appointment_count == 42 assert center.internal_id == "doctolibcentre1" assert center.vaccine_type == ["Pfizer-BioNTech", "AstraZeneca", "Moderna"] assert not center.appointment_by_phone_only assert center.default() == { 'departement': 'Paris', 'nom': 'Centre 1', 'url': 'https://.../centre', 'location': { 'longitude': 1.122, 'latitude': 2.391, 'city': 'Ok' }, 'metadata': None, 'prochain_rdv': '2021-05-06', 'plateforme': 'Doctolib', 'type': 'drugstore', 'appointment_count': 42, 'internal_id': 'doctolibcentre1', 'vaccine_type': ['Pfizer-BioNTech', 'AstraZeneca', 'Moderna'], 'appointment_by_phone_only': False, 'erreur': None, 'last_scan_with_availabilities': None, 'appointment_schedules': None }
def get_appointments( self, request: ScraperRequest, start_date: str, visit_motive_ids, motive_id: str, agenda_ids_q: str, practice_ids_q: str, limit: int, start_date_original: str, appointment_schedules: list, ): stop = False motive_availability = False first_availability = None appointment_count = 0 appointment_schedules_updated = None slots_api_url = DOCTOLIB_API.get("slots", "").format( start_date=start_date, motive_id=motive_id, agenda_ids_q=agenda_ids_q, practice_ids_q=practice_ids_q, limit=limit, ) request.increase_request_count("slots") try: response = self._client.get(slots_api_url, headers=DOCTOLIB_HEADERS) except httpx.ReadTimeout as hex: logger.warning( f"Doctolib returned error ReadTimeout for url {request.get_url()}" ) raise BlockedByDoctolibError(request.get_url()) if response.status_code == 403 or response.status_code == 400: raise BlockedByDoctolibError(request.get_url()) response.raise_for_status() time.sleep(self._cooldown_interval) slots = response.json() if slots.get("total"): appointment_count += int(slots.get("total", 0)) for availability in slots["availabilities"]: slot_list = availability.get("slots", None) if not slot_list or len(slot_list) == 0: continue if isinstance(slot_list[0], str): if not first_availability or slot_list[0] < first_availability: first_availability = slot_list[0] motive_availability = True for slot_info in slot_list: if isinstance(slot_info, str): continue sdate = slot_info.get("start_date", None) if not sdate: continue if not first_availability or sdate < first_availability: first_availability = sdate motive_availability = True if visit_motive_ids[motive_id]: visite_motive_vaccine = visit_motive_ids[motive_id] else: visite_motive_vaccine = None for interval in INTERVAL_SPLIT_DAYS: chronodose = False if visite_motive_vaccine in CHRONODOSES[ "Vaccine"] and interval == CHRONODOSES["Interval"]: chronodose = True appointment_schedules = build_appointment_schedules( request, interval, append_date_days(start_date_original, 0), append_date_days(start_date_original, days=interval, seconds=-1), 0, appointment_schedules, chronodose, ) if append_date_days(start_date_original, 0) <= append_date_days( start_date_original, interval): if availability.get("date"): if append_date_days(availability.get("date"), 0) < append_date_days( start_date_original, interval): appointment_schedules = build_appointment_schedules( request, interval, append_date_days(start_date_original, 0), append_date_days(start_date_original, days=interval, seconds=-1), len(availability.get("slots", [])), appointment_schedules, chronodose, ) if motive_availability: request.add_vaccine_type(visit_motive_ids[motive_id]) # Sometimes Doctolib does not allow to see slots for next weeks # which is a weird move, but still, we have to stop here. if not first_availability and not slots.get("next_slot", None): stop = True return first_availability, appointment_count, appointment_schedules, stop, slots.get( "next_slot")
def test_fetch_slots(): # Basic full working test def app(request: httpx.Request) -> httpx.Response: if request.url.path == "/v1/public/entities/profile/pharmacie-oceane-paris": return httpx.Response( 200, json=json.loads( Path("tests/fixtures/ordoclic/fetchslot-profile.json"). read_text())) if request.url.path == "/v1/solar/entities/03674d71-b200-4682-8e0a-3ab9687b2b59/reasons": return httpx.Response( 200, json=json.loads( Path("tests/fixtures/ordoclic/fetchslot-reasons.json"). read_text())) if request.url.path == "/v1/solar/slots/availableSlots": return httpx.Response( 200, json=json.loads( Path("tests/fixtures/ordoclic/fetchslot-slots.json"). read_text())) return httpx.Response(403, json={}) client = httpx.Client(transport=httpx.MockTransport(app)) request = ScraperRequest( "https://app.ordoclic.fr/app/pharmacie/pharmacie-oceane-paris", "2021-05-08") res = fetch_slots(request, client) assert res == "2021-05-12T16:00:00+00:00" # Timeout test def app2(request: httpx.Request) -> httpx.Response: if request.url.path == "/v1/public/entities/profile/pharmacie-oceane-paris": return httpx.Response( 200, json=json.loads( Path("tests/fixtures/ordoclic/fetchslot-profile.json"). read_text())) if request.url.path == "/v1/solar/entities/03674d71-b200-4682-8e0a-3ab9687b2b59/reasons": return httpx.Response( 200, json=json.loads( Path("tests/fixtures/ordoclic/fetchslot-reasons.json"). read_text())) if request.url.path == "/v1/solar/slots/availableSlots": raise httpx.TimeoutException(message="Timeout", request=request) return httpx.Response(403, json={}) client = httpx.Client(transport=httpx.MockTransport(app2)) request = ScraperRequest( "https://app.ordoclic.fr/app/pharmacie/pharmacie-oceane-paris", "2021-05-08") res = fetch_slots(request, client) assert res is None # HTTP error test (available slots) def app3(request: httpx.Request) -> httpx.Response: if request.url.path == "/v1/public/entities/profile/pharmacie-oceane-paris": return httpx.Response( 200, json=json.loads( Path("tests/fixtures/ordoclic/fetchslot-profile.json"). read_text())) if request.url.path == "/v1/solar/entities/03674d71-b200-4682-8e0a-3ab9687b2b59/reasons": return httpx.Response( 200, json=json.loads( Path("tests/fixtures/ordoclic/fetchslot-reasons.json"). read_text())) return httpx.Response(403, json={}) client = httpx.Client(transport=httpx.MockTransport(app3)) request = ScraperRequest( "https://app.ordoclic.fr/app/pharmacie/pharmacie-oceane-paris", "2021-05-08") res = fetch_slots(request, client) assert res is None # HTTP error test (profile) def app4(request: httpx.Request) -> httpx.Response: return httpx.Response(403, json={}) client = httpx.Client(transport=httpx.MockTransport(app4)) request = ScraperRequest( "https://app.ordoclic.fr/app/pharmacie/pharmacie-oceane-paris", "2021-05-08") res = fetch_slots(request, client) assert res is None # Only appointments by phone test def app5(request: httpx.Request) -> httpx.Response: if request.url.path == "/v1/public/entities/profile/pharmacie-oceane-paris": return httpx.Response( 200, json=json.loads( Path("tests/fixtures/ordoclic/fetchslot-profile2.json"). read_text())) if request.url.path == "/v1/solar/entities/03674d71-b200-4682-8e0a-3ab9687b2b59/reasons": return httpx.Response( 200, json=json.loads( Path("tests/fixtures/ordoclic/fetchslot-reasons.json"). read_text())) if request.url.path == "/v1/solar/slots/availableSlots": return httpx.Response( 200, json=json.loads( Path("tests/fixtures/ordoclic/fetchslot-slots.json"). read_text())) return httpx.Response(403, json={}) client = httpx.Client(transport=httpx.MockTransport(app5)) request = ScraperRequest( "https://app.ordoclic.fr/app/pharmacie/pharmacie-oceane-paris", "2021-05-08") res = fetch_slots(request, client) assert res is None
def fetch_slots(request: ScraperRequest, client: httpx.Client = DEFAULT_CLIENT): first_availability = None profile = get_profile(request, client) if not profile: return None slug = profile["profileSlug"] entityId = profile["entityId"] attributes = profile.get("attributeValues") for settings in attributes: if settings["label"] == "booking_settings" and settings["value"].get( "option", "any") == "any": request.set_appointments_only_by_phone(True) return None # create appointment_schedules array with names and dates appointment_schedules = [] start_date = paris_tz.localize( isoparse(request.get_start_date()) + timedelta(days=0)) end_date = start_date + timedelta(days=CHRONODOSES["Interval"], seconds=-1) appointment_schedules.append({ "name": "chronodose", "from": start_date.isoformat(), "to": end_date.isoformat(), "total": 0 }) for n in INTERVAL_SPLIT_DAYS: end_date = start_date + timedelta(days=n, seconds=-1) appointment_schedules.append({ "name": f"{n}_days", "from": start_date.isoformat(), "to": end_date.isoformat(), "total": 0 }) for professional in profile["publicProfessionals"]: medicalStaffId = professional["id"] name = professional["fullName"] zip = professional["zip"] reasons = get_reasons(entityId) for reason in reasons["reasons"]: if not is_reason_valid(reason): continue request.add_vaccine_type(get_vaccine_name(reason.get("name", ""))) reasonId = reason["id"] date_obj = datetime.strptime(request.get_start_date(), "%Y-%m-%d") end_date = (date_obj + timedelta(days=50)).strftime("%Y-%m-%d") slots = get_slots(entityId, medicalStaffId, reasonId, request.get_start_date(), end_date, client) date = parse_ordoclic_slots(request, slots) if date is None: continue # add counts to appointment_schedules availabilities = slots.get("slots", None) for i in range(0, len(appointment_schedules)): start_date = isoparse(appointment_schedules[i]["from"]) end_date = isoparse(appointment_schedules[i]["to"]) # do not count chronodose if wrong vaccine if (appointment_schedules[i]["name"] == "chronodose" and get_vaccine_name(reason.get( "name", "")) not in CHRONODOSES["Vaccine"]): continue appointment_schedules[i]["total"] += count_appointements( availabilities, start_date, end_date) request.update_appointment_schedules(appointment_schedules) logger.debug(f"appointment_schedules: {appointment_schedules}") if first_availability is None or date < first_availability: first_availability = date request.update_appointment_schedules(appointment_schedules) if first_availability is None: return None logger.debug(f"appointment_schedules: {request.appointment_schedules}") return first_availability.isoformat()
def test_fetch_slots(): # Basic full working test def app(request: httpx.Request) -> httpx.Response: if request.url.path == "/v1/public/entities/profile/pharmacie-oceane-paris": return httpx.Response( 200, json=json.loads(Path("tests/fixtures/ordoclic/fetchslot-profile.json").read_text()) ) if request.url.path == "/v1/solar/entities/03674d71-b200-4682-8e0a-3ab9687b2b59/reasons": return httpx.Response( 200, json=json.loads(Path("tests/fixtures/ordoclic/fetchslot-reasons.json").read_text()) ) if request.url.path == "/v1/solar/slots/availableSlots": return httpx.Response( 200, json=json.loads(Path("tests/fixtures/ordoclic/fetchslot-slots.json").read_text()) ) return httpx.Response(403, json={}) client = httpx.Client(transport=httpx.MockTransport(app)) center_info = CenterInfo( departement="51", nom="Pharmacie Croix Dampierre", url="https://app.ordoclic.fr/app/pharmacie/pharmacie-croix-dampierre-chalons-en-champagne", location=CenterLocation(longitude=4.3858888, latitude=48.9422828, city="chalons-en-champagne", cp="51000"), metadata={ "address": "AV DU PRESIDENT ROOSEVELT CC CROIX DAMPIERRE, 51000 CHALONS EN CHAMPAGNE", "phone_number": "+33326219000", "business_hours": None, }, ) request = ScraperRequest("https://app.ordoclic.fr/app/pharmacie/pharmacie-oceane-paris", "2021-05-08", center_info) res = fetch_slots(request, client=client) assert res == "2021-05-12T16:00:00+00:00" # Timeout test def app2(request: httpx.Request) -> httpx.Response: if request.url.path == "/v1/public/entities/profile/pharmacie-oceane-paris": return httpx.Response( 200, json=json.loads(Path("tests/fixtures/ordoclic/fetchslot-profile.json").read_text()) ) if request.url.path == "/v1/solar/entities/03674d71-b200-4682-8e0a-3ab9687b2b59/reasons": return httpx.Response( 200, json=json.loads(Path("tests/fixtures/ordoclic/fetchslot-reasons.json").read_text()) ) if request.url.path == "/v1/solar/slots/availableSlots": raise httpx.TimeoutException(message="Timeout", request=request) return httpx.Response(403, json={}) client = httpx.Client(transport=httpx.MockTransport(app2)) request = ScraperRequest("https://app.ordoclic.fr/app/pharmacie/pharmacie-oceane-paris", "2021-05-08", center_info) res = fetch_slots(request, client=client) assert res is None # HTTP error test (available slots) def app3(request: httpx.Request) -> httpx.Response: if request.url.path == "/v1/public/entities/profile/pharmacie-oceane-paris": return httpx.Response( 200, json=json.loads(Path("tests/fixtures/ordoclic/fetchslot-profile.json").read_text()) ) if request.url.path == "/v1/solar/entities/03674d71-b200-4682-8e0a-3ab9687b2b59/reasons": return httpx.Response( 200, json=json.loads(Path("tests/fixtures/ordoclic/fetchslot-reasons.json").read_text()) ) return httpx.Response(403, json={}) client = httpx.Client(transport=httpx.MockTransport(app3)) request = ScraperRequest("https://app.ordoclic.fr/app/pharmacie/pharmacie-oceane-paris", "2021-05-08", center_info) res = fetch_slots(request, client=client) assert res is None # HTTP error test (profile) def app4(request: httpx.Request) -> httpx.Response: return httpx.Response(403, json={}) client = httpx.Client(transport=httpx.MockTransport(app4)) request = ScraperRequest("https://app.ordoclic.fr/app/pharmacie/pharmacie-oceane-paris", "2021-05-08", center_info) res = fetch_slots(request, client=client) assert res is None # Only appointments by phone test def app5(request: httpx.Request) -> httpx.Response: if request.url.path == "/v1/public/entities/profile/pharmacie-oceane-paris": return httpx.Response( 200, json=json.loads(Path("tests/fixtures/ordoclic/fetchslot-profile2.json").read_text()) ) if request.url.path == "/v1/solar/entities/03674d71-b200-4682-8e0a-3ab9687b2b59/reasons": return httpx.Response( 200, json=json.loads(Path("tests/fixtures/ordoclic/fetchslot-reasons.json").read_text()) ) if request.url.path == "/v1/solar/slots/availableSlots": return httpx.Response( 200, json=json.loads(Path("tests/fixtures/ordoclic/fetchslot-slots.json").read_text()) ) return httpx.Response(403, json={}) client = httpx.Client(transport=httpx.MockTransport(app5)) request = ScraperRequest("https://app.ordoclic.fr/app/pharmacie/pharmacie-oceane-paris", "2021-05-08", center_info) res = fetch_slots(request, client=client) assert res is None
def get_appointments( self, request: ScraperRequest, start_date: str, visit_motive_ids, motive_id: str, agenda_ids_q: str, practice_ids_q: str, limit: int, start_date_original: str, appointment_schedules: list, ): stop = False motive_availability = False first_availability = None appointment_count = 0 appointment_schedules_updated = None slots_api_url = f"https://partners.doctolib.fr/availabilities.json?start_date={start_date}&visit_motive_ids={motive_id}&agenda_ids={agenda_ids_q}&insurance_sector=public&practice_ids={practice_ids_q}&destroy_temporary=true&limit={limit}" response = self._client.get(slots_api_url, headers=DOCTOLIB_HEADERS) if response.status_code == 403: raise BlockedByDoctolibError(request.get_url()) response.raise_for_status() time.sleep(self._cooldown_interval) slots = response.json() if slots.get("total"): appointment_count += int(slots.get("total", 0)) for availability in slots["availabilities"]: slot_list = availability.get("slots", None) if not slot_list or len(slot_list) == 0: continue if isinstance(slot_list[0], str): if not first_availability or slot_list[0] < first_availability: first_availability = slot_list[0] motive_availability = True for slot_info in slot_list: if isinstance(slot_info, str): continue sdate = slot_info.get("start_date", None) if not sdate: continue if not first_availability or sdate < first_availability: first_availability = sdate motive_availability = True if visit_motive_ids[motive_id]: visite_motive_vaccine = visit_motive_ids[motive_id] else: visite_motive_vaccine = None for interval in INTERVAL_SPLIT_DAYS: chronodose = False if visite_motive_vaccine in CHRONODOSES[ "Vaccine"] and interval == CHRONODOSES["Interval"]: chronodose = True appointment_schedules = build_appointment_schedules( request, interval, append_date_days(start_date_original, 0), append_date_days(start_date_original, days=interval, seconds=-1), 0, appointment_schedules, chronodose, ) if append_date_days(start_date_original, 0) <= append_date_days( start_date_original, interval): if availability.get("date"): if append_date_days(availability.get("date"), 0) < append_date_days( start_date_original, interval): appointment_schedules = build_appointment_schedules( request, interval, append_date_days(start_date_original, 0), append_date_days(start_date_original, days=interval, seconds=-1), len(availability.get("slots", [])), appointment_schedules, chronodose, ) if motive_availability: request.add_vaccine_type(visit_motive_ids[motive_id]) # Sometimes Doctolib does not allow to see slots for next weeks # which is a weird move, but still, we have to stop here. if not first_availability and not slots.get("next_slot", None): stop = True return first_availability, appointment_count, appointment_schedules, stop