def get_appointments(self, request: ScraperRequest, start_date: str, visit_motive_ids, motive_id: str, agenda_ids_q: str, practice_ids_q: str, limit: int): stop = False motive_availability = False first_availability = None appointment_count = 0 slots_api_url = f'https://partners.doctolib.fr/availabilities.json?start_date={start_date}&visit_motive_ids={motive_id}&agenda_ids={agenda_ids_q}&insurance_sector=public&practice_ids={practice_ids_q}&destroy_temporary=true&limit={limit}' response = self._client.get(slots_api_url, headers=DOCTOLIB_HEADERS) if response.status_code == 403: raise BlockedByDoctolibError(request.get_url()) response.raise_for_status() time.sleep(self._cooldown_interval) slots = response.json() if slots.get('total'): appointment_count += int(slots.get('total', 0)) for availability in slots['availabilities']: slot_list = availability.get('slots', None) if not slot_list or len(slot_list) == 0: continue if isinstance(slot_list[0], str): if not first_availability or slot_list[0] < first_availability: first_availability = slot_list[0] motive_availability = True for slot_info in slot_list: sdate = slot_info.get('start_date', None) if not sdate: continue if not first_availability or sdate < first_availability: first_availability = sdate motive_availability = True if motive_availability: request.add_vaccine_type(visit_motive_ids[motive_id]) # Sometimes Doctolib does not allow to see slots for next weeks # which is a weird move, but still, we have to stop here. if not first_availability and not slots.get('next_slot', None): stop = True return first_availability, appointment_count, stop
def test_export_data_when_blocked(tmp_path): center_info1 = CenterInfo("59", "Clinique du Cambresis", "https://example.com/clinique-du-cambresis") center_info1.plateforme = "Maiia" center_info1.prochain_rdv = "2021-04-12:00:00" center_info1.erreur = None center_info1.appointment_count = 1 center_info2 = CenterInfo("14", "Hôpital magique", "https://example.com/hopital-magique") center_info2.plateforme = "Doctolib" center_info2.prochain_rdv = None center_info2.erreur = BlockedByDoctolibError( "https://example.com/hopital-magique") centres_cherchés = [center_info1, center_info2] out_dir = tmp_path / "out" out_dir.mkdir() outpath_format = str(out_dir / "{}.json") fake_now = dt.datetime(2021, 4, 4) with mock_datetime_now(fake_now): total, actifs, bloqués = export_data(centres_cherchés, outpath_format=outpath_format) # les totaux doivent être bons assert total == 2 assert actifs == 1 assert bloqués == 1 # Departements 14 and 59 should contain expected data. content = json.loads((out_dir / "14.json").read_text()) assert content == { "version": 1, "last_updated": "2021-04-04T00:00:00", "centres_disponibles": [], "centres_indisponibles": [{ "departement": "14", "nom": "Hôpital magique", "url": "https://example.com/hopital-magique", "location": None, "metadata": None, "prochain_rdv": None, "type": None, "plateforme": "Doctolib", "appointment_count": 0, "internal_id": None, "vaccine_type": None, "appointment_by_phone_only": False, "erreur": "ERREUR DE SCRAPPING (Doctolib): Doctolib bloque nos appels: 403 https://example.com/hopital-magique" }], "doctolib_bloqué": True } content = json.loads((out_dir / "59.json").read_text()) assert content == { "version": 1, "centres_disponibles": [ { "departement": "59", "nom": "Clinique du Cambresis", "url": "https://example.com/clinique-du-cambresis", "plateforme": "Maiia", "prochain_rdv": "2021-04-12:00:00", "location": None, "metadata": None, "type": None, "appointment_count": 1, "internal_id": None, "appointment_by_phone_only": False, "vaccine_type": None, "erreur": None }, ], "centres_indisponibles": [], "last_updated": "2021-04-04T00:00:00", }
def fetch(self, request: ScraperRequest) -> Optional[str]: centre = _parse_centre(request.get_url()) # Doctolib fetches multiple vaccination centers sometimes # so if a practice id is present in query, only related agendas # should be selected. practice_id = _parse_practice_id(request.get_url()) centre_api_url = f'https://partners.doctolib.fr/booking/{centre}.json' response = self._client.get(centre_api_url, headers=DOCTOLIB_HEADERS) if response.status_code == 403: raise BlockedByDoctolibError(centre_api_url) response.raise_for_status() time.sleep(self._cooldown_interval) data = response.json() rdata = data.get('data', {}) if not self.is_practice_id_valid(request, rdata): logger.warning( f"Invalid practice ID for this Doctolib center: {request.get_url()}" ) practice_id = None self.pop_practice_id(request) if practice_id: practice_id = link_practice_ids(practice_id, rdata) if len(rdata.get('places', [])) > 1 and practice_id is None: practice_id = rdata.get('places')[0].get('practice_ids', None) appointment_count = 0 request.update_practitioner_type(parse_practitioner_type( centre, rdata)) set_doctolib_center_internal_id(request, rdata, practice_id) # visit_motive_categories # example: https://partners.doctolib.fr/hopital-public/tarbes/centre-de-vaccination-tarbes-ayguerote?speciality_id=5494&enable_cookies_consent=1 visit_motive_category_id = _find_visit_motive_category_id(data) # visit_motive_id visit_motive_ids = _find_visit_motive_id( data, visit_motive_category_id=visit_motive_category_id) if visit_motive_ids is None: return None # practice_ids / agenda_ids agenda_ids, practice_ids = _find_agenda_and_practice_ids( data, visit_motive_ids, practice_id_filter=practice_id) if not agenda_ids or not practice_ids: return None all_agendas = parse_agenda_ids(rdata) agenda_ids = self.sort_agenda_ids(all_agendas, agenda_ids) # temporary_booking_disabled ?? agenda_ids_q = "-".join(agenda_ids) practice_ids_q = "-".join(practice_ids) start_date = request.get_start_date() first_availability = None start_date_tmp = start_date for motive_id in visit_motive_ids: for i in range(DOCTOLIB_ITERATIONS): sdate, appt, stop = self.get_appointments( request, start_date_tmp, visit_motive_ids, motive_id, agenda_ids_q, practice_ids_q, DOCTOLIB_SLOT_LIMIT) if stop: break start_date_tmp = datetime.now() + timedelta(days=7 * i) start_date_tmp = start_date_tmp.strftime("%Y-%m-%d") if not sdate: continue if not first_availability or sdate < first_availability: first_availability = sdate request.update_appointment_count(request.appointment_count + appt) return first_availability
def fetch(self, request: ScraperRequest) -> Optional[str]: centre = _parse_centre(request.get_url()) # Doctolib fetches multiple vaccination centers sometimes # so if a practice id is present in query, only related agendas # should be selected. practice_id = _parse_practice_id(request.get_url()) practice_same_adress = False rdata = None # We already have rdata if request.input_data: rdata = request.input_data else: centre_api_url = DOCTOLIB_API.get("booking", "").format(centre=centre) request.increase_request_count("booking") response = self._client.get(centre_api_url, headers=DOCTOLIB_HEADERS) if response.status_code == 403: raise BlockedByDoctolibError(centre_api_url) response.raise_for_status() time.sleep(self._cooldown_interval) data = response.json() rdata = data.get("data", {}) if not self.is_practice_id_valid(request, rdata): logger.warning( f"Invalid practice ID for this Doctolib center: {request.get_url()}" ) practice_id = None self.pop_practice_id(request) if practice_id: practice_id, practice_same_adress = link_practice_ids( practice_id, rdata) if len(rdata.get("places", [])) > 1 and practice_id is None: practice_id = rdata.get("places")[0].get("practice_ids", None) request.update_practitioner_type(parse_practitioner_type( centre, rdata)) set_doctolib_center_internal_id(request, rdata, practice_id, practice_same_adress) # Check if appointments are allowed if not is_allowing_online_appointments(rdata): request.set_appointments_only_by_phone(True) return None # visit_motive_categories # example: https://partners.doctolib.fr/hopital-public/tarbes/centre-de-vaccination-tarbes-ayguerote?speciality_id=5494&enable_cookies_consent=1 visit_motive_category_id = _find_visit_motive_category_id(rdata) # visit_motive_id visit_motive_ids = _find_visit_motive_id( rdata, visit_motive_category_id=visit_motive_category_id) if visit_motive_ids is None: return None all_agendas = parse_agenda_ids(rdata) first_availability = None appointment_schedules = request.get_appointment_schedules() start_date = request.get_start_date() for interval in INTERVAL_SPLIT_DAYS: chronodose = False if interval == CHRONODOSES["Interval"]: chronodose = True appointment_schedules = build_appointment_schedules( request, interval, append_date_days(start_date, 0), append_date_days(start_date, days=interval, seconds=-1), 0, appointment_schedules, chronodose, ) request.update_appointment_schedules(appointment_schedules) timetable_start_date = datetime.now() # shouldn't be datetime.now()!! for visit_motive_id in visit_motive_ids: agenda_ids, practice_ids = _find_agenda_and_practice_ids( rdata, visit_motive_id, practice_id_filter=practice_id) if not agenda_ids or not practice_ids: continue agenda_ids = self.sort_agenda_ids(all_agendas, agenda_ids) agenda_ids_q = "-".join(agenda_ids) practice_ids_q = "-".join(practice_ids) availability = self.get_timetables( request, visit_motive_ids, visit_motive_id, agenda_ids_q, practice_ids_q, timetable_start_date, appointment_schedules, ) if availability and (not first_availability or availability < first_availability): first_availability = availability return first_availability
def get_appointments( self, request: ScraperRequest, start_date: str, visit_motive_ids, motive_id: str, agenda_ids_q: str, practice_ids_q: str, limit: int, start_date_original: str, appointment_schedules: list, ): stop = False motive_availability = False first_availability = None appointment_count = 0 appointment_schedules_updated = None slots_api_url = DOCTOLIB_API.get("slots", "").format( start_date=start_date, motive_id=motive_id, agenda_ids_q=agenda_ids_q, practice_ids_q=practice_ids_q, limit=limit, ) request.increase_request_count("slots") try: response = self._client.get(slots_api_url, headers=DOCTOLIB_HEADERS) except httpx.ReadTimeout as hex: logger.warning( f"Doctolib returned error ReadTimeout for url {request.get_url()}" ) raise BlockedByDoctolibError(request.get_url()) if response.status_code == 403 or response.status_code == 400: raise BlockedByDoctolibError(request.get_url()) response.raise_for_status() time.sleep(self._cooldown_interval) slots = response.json() if slots.get("total"): appointment_count += int(slots.get("total", 0)) for availability in slots["availabilities"]: slot_list = availability.get("slots", None) if not slot_list or len(slot_list) == 0: continue if isinstance(slot_list[0], str): if not first_availability or slot_list[0] < first_availability: first_availability = slot_list[0] motive_availability = True for slot_info in slot_list: if isinstance(slot_info, str): continue sdate = slot_info.get("start_date", None) if not sdate: continue if not first_availability or sdate < first_availability: first_availability = sdate motive_availability = True if visit_motive_ids[motive_id]: visite_motive_vaccine = visit_motive_ids[motive_id] else: visite_motive_vaccine = None for interval in INTERVAL_SPLIT_DAYS: chronodose = False if visite_motive_vaccine in CHRONODOSES[ "Vaccine"] and interval == CHRONODOSES["Interval"]: chronodose = True appointment_schedules = build_appointment_schedules( request, interval, append_date_days(start_date_original, 0), append_date_days(start_date_original, days=interval, seconds=-1), 0, appointment_schedules, chronodose, ) if append_date_days(start_date_original, 0) <= append_date_days( start_date_original, interval): if availability.get("date"): if append_date_days(availability.get("date"), 0) < append_date_days( start_date_original, interval): appointment_schedules = build_appointment_schedules( request, interval, append_date_days(start_date_original, 0), append_date_days(start_date_original, days=interval, seconds=-1), len(availability.get("slots", [])), appointment_schedules, chronodose, ) if motive_availability: request.add_vaccine_type(visit_motive_ids[motive_id]) # Sometimes Doctolib does not allow to see slots for next weeks # which is a weird move, but still, we have to stop here. if not first_availability and not slots.get("next_slot", None): stop = True return first_availability, appointment_count, appointment_schedules, stop, slots.get( "next_slot")
def fetch(self, request: ScraperRequest) -> Optional[str]: centre = _parse_centre(request.get_url()) # Doctolib fetches multiple vaccination centers sometimes # so if a practice id is present in query, only related agendas # should be selected. practice_id = _parse_practice_id(request.get_url()) practice_same_adress = False centre_api_url = f"https://partners.doctolib.fr/booking/{centre}.json" response = self._client.get(centre_api_url, headers=DOCTOLIB_HEADERS) if response.status_code == 403: raise BlockedByDoctolibError(centre_api_url) response.raise_for_status() time.sleep(self._cooldown_interval) data = response.json() rdata = data.get("data", {}) if not self.is_practice_id_valid(request, rdata): logger.warning(f"Invalid practice ID for this Doctolib center: {request.get_url()}") practice_id = None self.pop_practice_id(request) if practice_id: practice_id, practice_same_adress = link_practice_ids(practice_id, rdata) if len(rdata.get("places", [])) > 1 and practice_id is None: practice_id = rdata.get("places")[0].get("practice_ids", None) request.update_practitioner_type(parse_practitioner_type(centre, rdata)) set_doctolib_center_internal_id(request, rdata, practice_id, practice_same_adress) # Check if appointments are allowed if not is_allowing_online_appointments(rdata): request.set_appointments_only_by_phone(True) return None # visit_motive_categories # example: https://partners.doctolib.fr/hopital-public/tarbes/centre-de-vaccination-tarbes-ayguerote?speciality_id=5494&enable_cookies_consent=1 visit_motive_category_id = _find_visit_motive_category_id(data) # visit_motive_id visit_motive_ids = _find_visit_motive_id(data, visit_motive_category_id=visit_motive_category_id) if visit_motive_ids is None: return None all_agendas = parse_agenda_ids(rdata) first_availability = None appointment_schedules = request.get_appointment_schedules() start_date = request.get_start_date() for interval in INTERVAL_SPLIT_DAYS: chronodose = False if interval == CHRONODOSES["Interval"]: chronodose = True appointment_schedules = build_appointment_schedules( request, interval, append_date_days(start_date, 0), append_date_days(start_date, interval, 1), 0, appointment_schedules, chronodose, ) request.update_appointment_schedules(appointment_schedules) for visit_motive_id in visit_motive_ids: agenda_ids, practice_ids = _find_agenda_and_practice_ids( data, visit_motive_id, practice_id_filter=practice_id ) if agenda_ids != [] and practice_ids != []: agenda_ids = self.sort_agenda_ids(all_agendas, agenda_ids) agenda_ids_q = "-".join(agenda_ids) practice_ids_q = "-".join(practice_ids) for i in range(DOCTOLIB_ITERATIONS): start_date_tmp = datetime.now() + timedelta(days=7 * i) start_date_tmp = start_date_tmp.strftime("%Y-%m-%d") sdate, appt, appointment_schedules, stop = self.get_appointments( request, start_date_tmp, visit_motive_ids, visit_motive_id, agenda_ids_q, practice_ids_q, DOCTOLIB_SLOT_LIMIT, start_date, appointment_schedules, ) if stop: break if not sdate: continue if not first_availability or sdate < first_availability: first_availability = sdate request.update_appointment_count(request.appointment_count + appt) if appointment_schedules: request.update_appointment_schedules(appointment_schedules) return first_availability
def get_appointments( self, request: ScraperRequest, start_date: str, visit_motive_ids, motive_id: str, agenda_ids_q: str, practice_ids_q: str, limit: int, start_date_original: str, appointment_schedules: list, ): stop = False motive_availability = False first_availability = None appointment_count = 0 appointment_schedules_updated = None slots_api_url = f"https://partners.doctolib.fr/availabilities.json?start_date={start_date}&visit_motive_ids={motive_id}&agenda_ids={agenda_ids_q}&insurance_sector=public&practice_ids={practice_ids_q}&destroy_temporary=true&limit={limit}" response = self._client.get(slots_api_url, headers=DOCTOLIB_HEADERS) if response.status_code == 403: raise BlockedByDoctolibError(request.get_url()) response.raise_for_status() time.sleep(self._cooldown_interval) slots = response.json() if slots.get("total"): appointment_count += int(slots.get("total", 0)) for availability in slots["availabilities"]: slot_list = availability.get("slots", None) if not slot_list or len(slot_list) == 0: continue if isinstance(slot_list[0], str): if not first_availability or slot_list[0] < first_availability: first_availability = slot_list[0] motive_availability = True for slot_info in slot_list: if isinstance(slot_info, str): continue sdate = slot_info.get("start_date", None) if not sdate: continue if not first_availability or sdate < first_availability: first_availability = sdate motive_availability = True if visit_motive_ids[motive_id]: visite_motive_vaccine = visit_motive_ids[motive_id] else: visite_motive_vaccine = None for interval in INTERVAL_SPLIT_DAYS: chronodose = False if visite_motive_vaccine in CHRONODOSES["Vaccine"] and interval == CHRONODOSES["Interval"]: chronodose = True appointment_schedules = build_appointment_schedules( request, interval, append_date_days(start_date_original, 0), append_date_days(start_date_original, interval, 1), 0, appointment_schedules, chronodose, ) if append_date_days(start_date_original, 0) <= append_date_days(start_date_original, interval): if availability.get("date"): if append_date_days(availability.get("date"), 0) < append_date_days( start_date_original, interval ): appointment_schedules = build_appointment_schedules( request, interval, append_date_days(start_date_original, 0), append_date_days(start_date_original, interval, 1), len(availability.get("slots", [])), appointment_schedules, chronodose, ) if motive_availability: request.add_vaccine_type(visit_motive_ids[motive_id]) # Sometimes Doctolib does not allow to see slots for next weeks # which is a weird move, but still, we have to stop here. if not first_availability and not slots.get("next_slot", None): stop = True return first_availability, appointment_count, appointment_schedules, stop