コード例 #1
0
    def get_appointments(self, request: ScraperRequest, start_date: str,
                         visit_motive_ids, motive_id: str, agenda_ids_q: str,
                         practice_ids_q: str, limit: int):
        stop = False
        motive_availability = False
        first_availability = None
        appointment_count = 0
        slots_api_url = f'https://partners.doctolib.fr/availabilities.json?start_date={start_date}&visit_motive_ids={motive_id}&agenda_ids={agenda_ids_q}&insurance_sector=public&practice_ids={practice_ids_q}&destroy_temporary=true&limit={limit}'
        response = self._client.get(slots_api_url, headers=DOCTOLIB_HEADERS)
        if response.status_code == 403:
            raise BlockedByDoctolibError(request.get_url())

        response.raise_for_status()
        time.sleep(self._cooldown_interval)

        slots = response.json()
        if slots.get('total'):
            appointment_count += int(slots.get('total', 0))
        for availability in slots['availabilities']:
            slot_list = availability.get('slots', None)
            if not slot_list or len(slot_list) == 0:
                continue
            if isinstance(slot_list[0], str):
                if not first_availability or slot_list[0] < first_availability:
                    first_availability = slot_list[0]
                    motive_availability = True
            for slot_info in slot_list:
                sdate = slot_info.get('start_date', None)
                if not sdate:
                    continue
                if not first_availability or sdate < first_availability:
                    first_availability = sdate
                    motive_availability = True

        if motive_availability:
            request.add_vaccine_type(visit_motive_ids[motive_id])
        # Sometimes Doctolib does not allow to see slots for next weeks
        # which is a weird move, but still, we have to stop here.
        if not first_availability and not slots.get('next_slot', None):
            stop = True
        return first_availability, appointment_count, stop
コード例 #2
0
def test_export_data_when_blocked(tmp_path):
    center_info1 = CenterInfo("59", "Clinique du Cambresis",
                              "https://example.com/clinique-du-cambresis")
    center_info1.plateforme = "Maiia"
    center_info1.prochain_rdv = "2021-04-12:00:00"
    center_info1.erreur = None
    center_info1.appointment_count = 1

    center_info2 = CenterInfo("14", "Hôpital magique",
                              "https://example.com/hopital-magique")
    center_info2.plateforme = "Doctolib"
    center_info2.prochain_rdv = None
    center_info2.erreur = BlockedByDoctolibError(
        "https://example.com/hopital-magique")
    centres_cherchés = [center_info1, center_info2]

    out_dir = tmp_path / "out"
    out_dir.mkdir()
    outpath_format = str(out_dir / "{}.json")

    fake_now = dt.datetime(2021, 4, 4)
    with mock_datetime_now(fake_now):
        total, actifs, bloqués = export_data(centres_cherchés,
                                             outpath_format=outpath_format)

    # les totaux doivent être bons
    assert total == 2
    assert actifs == 1
    assert bloqués == 1

    # Departements 14 and 59 should contain expected data.
    content = json.loads((out_dir / "14.json").read_text())
    assert content == {
        "version":
        1,
        "last_updated":
        "2021-04-04T00:00:00",
        "centres_disponibles": [],
        "centres_indisponibles": [{
            "departement":
            "14",
            "nom":
            "Hôpital magique",
            "url":
            "https://example.com/hopital-magique",
            "location":
            None,
            "metadata":
            None,
            "prochain_rdv":
            None,
            "type":
            None,
            "plateforme":
            "Doctolib",
            "appointment_count":
            0,
            "internal_id":
            None,
            "vaccine_type":
            None,
            "appointment_by_phone_only":
            False,
            "erreur":
            "ERREUR DE SCRAPPING (Doctolib): Doctolib bloque nos appels: 403 https://example.com/hopital-magique"
        }],
        "doctolib_bloqué":
        True
    }

    content = json.loads((out_dir / "59.json").read_text())
    assert content == {
        "version":
        1,
        "centres_disponibles": [
            {
                "departement": "59",
                "nom": "Clinique du Cambresis",
                "url": "https://example.com/clinique-du-cambresis",
                "plateforme": "Maiia",
                "prochain_rdv": "2021-04-12:00:00",
                "location": None,
                "metadata": None,
                "type": None,
                "appointment_count": 1,
                "internal_id": None,
                "appointment_by_phone_only": False,
                "vaccine_type": None,
                "erreur": None
            },
        ],
        "centres_indisponibles": [],
        "last_updated":
        "2021-04-04T00:00:00",
    }
コード例 #3
0
    def fetch(self, request: ScraperRequest) -> Optional[str]:
        centre = _parse_centre(request.get_url())

        # Doctolib fetches multiple vaccination centers sometimes
        # so if a practice id is present in query, only related agendas
        # should be selected.
        practice_id = _parse_practice_id(request.get_url())

        centre_api_url = f'https://partners.doctolib.fr/booking/{centre}.json'
        response = self._client.get(centre_api_url, headers=DOCTOLIB_HEADERS)
        if response.status_code == 403:
            raise BlockedByDoctolibError(centre_api_url)

        response.raise_for_status()
        time.sleep(self._cooldown_interval)
        data = response.json()
        rdata = data.get('data', {})

        if not self.is_practice_id_valid(request, rdata):
            logger.warning(
                f"Invalid practice ID for this Doctolib center: {request.get_url()}"
            )
            practice_id = None
            self.pop_practice_id(request)

        if practice_id:
            practice_id = link_practice_ids(practice_id, rdata)
        if len(rdata.get('places', [])) > 1 and practice_id is None:
            practice_id = rdata.get('places')[0].get('practice_ids', None)

        appointment_count = 0
        request.update_practitioner_type(parse_practitioner_type(
            centre, rdata))
        set_doctolib_center_internal_id(request, rdata, practice_id)
        # visit_motive_categories
        # example: https://partners.doctolib.fr/hopital-public/tarbes/centre-de-vaccination-tarbes-ayguerote?speciality_id=5494&enable_cookies_consent=1
        visit_motive_category_id = _find_visit_motive_category_id(data)
        # visit_motive_id
        visit_motive_ids = _find_visit_motive_id(
            data, visit_motive_category_id=visit_motive_category_id)

        if visit_motive_ids is None:
            return None
        # practice_ids / agenda_ids
        agenda_ids, practice_ids = _find_agenda_and_practice_ids(
            data, visit_motive_ids, practice_id_filter=practice_id)
        if not agenda_ids or not practice_ids:
            return None
        all_agendas = parse_agenda_ids(rdata)
        agenda_ids = self.sort_agenda_ids(all_agendas, agenda_ids)

        # temporary_booking_disabled ??

        agenda_ids_q = "-".join(agenda_ids)
        practice_ids_q = "-".join(practice_ids)
        start_date = request.get_start_date()

        first_availability = None
        start_date_tmp = start_date
        for motive_id in visit_motive_ids:
            for i in range(DOCTOLIB_ITERATIONS):
                sdate, appt, stop = self.get_appointments(
                    request, start_date_tmp, visit_motive_ids, motive_id,
                    agenda_ids_q, practice_ids_q, DOCTOLIB_SLOT_LIMIT)
                if stop:
                    break
                start_date_tmp = datetime.now() + timedelta(days=7 * i)
                start_date_tmp = start_date_tmp.strftime("%Y-%m-%d")
                if not sdate:
                    continue
                if not first_availability or sdate < first_availability:
                    first_availability = sdate
                request.update_appointment_count(request.appointment_count +
                                                 appt)

        return first_availability
コード例 #4
0
    def fetch(self, request: ScraperRequest) -> Optional[str]:

        centre = _parse_centre(request.get_url())

        # Doctolib fetches multiple vaccination centers sometimes
        # so if a practice id is present in query, only related agendas
        # should be selected.
        practice_id = _parse_practice_id(request.get_url())

        practice_same_adress = False

        rdata = None
        # We already have rdata
        if request.input_data:
            rdata = request.input_data
        else:
            centre_api_url = DOCTOLIB_API.get("booking",
                                              "").format(centre=centre)
            request.increase_request_count("booking")
            response = self._client.get(centre_api_url,
                                        headers=DOCTOLIB_HEADERS)
            if response.status_code == 403:
                raise BlockedByDoctolibError(centre_api_url)

            response.raise_for_status()
            time.sleep(self._cooldown_interval)
            data = response.json()
            rdata = data.get("data", {})

        if not self.is_practice_id_valid(request, rdata):
            logger.warning(
                f"Invalid practice ID for this Doctolib center: {request.get_url()}"
            )
            practice_id = None
            self.pop_practice_id(request)
        if practice_id:
            practice_id, practice_same_adress = link_practice_ids(
                practice_id, rdata)
        if len(rdata.get("places", [])) > 1 and practice_id is None:
            practice_id = rdata.get("places")[0].get("practice_ids", None)

        request.update_practitioner_type(parse_practitioner_type(
            centre, rdata))
        set_doctolib_center_internal_id(request, rdata, practice_id,
                                        practice_same_adress)
        # Check if  appointments are allowed
        if not is_allowing_online_appointments(rdata):
            request.set_appointments_only_by_phone(True)
            return None

        # visit_motive_categories
        # example: https://partners.doctolib.fr/hopital-public/tarbes/centre-de-vaccination-tarbes-ayguerote?speciality_id=5494&enable_cookies_consent=1
        visit_motive_category_id = _find_visit_motive_category_id(rdata)
        # visit_motive_id
        visit_motive_ids = _find_visit_motive_id(
            rdata, visit_motive_category_id=visit_motive_category_id)
        if visit_motive_ids is None:
            return None

        all_agendas = parse_agenda_ids(rdata)
        first_availability = None

        appointment_schedules = request.get_appointment_schedules()

        start_date = request.get_start_date()

        for interval in INTERVAL_SPLIT_DAYS:
            chronodose = False
            if interval == CHRONODOSES["Interval"]:
                chronodose = True

            appointment_schedules = build_appointment_schedules(
                request,
                interval,
                append_date_days(start_date, 0),
                append_date_days(start_date, days=interval, seconds=-1),
                0,
                appointment_schedules,
                chronodose,
            )
        request.update_appointment_schedules(appointment_schedules)

        timetable_start_date = datetime.now()  # shouldn't be datetime.now()!!
        for visit_motive_id in visit_motive_ids:
            agenda_ids, practice_ids = _find_agenda_and_practice_ids(
                rdata, visit_motive_id, practice_id_filter=practice_id)
            if not agenda_ids or not practice_ids:
                continue
            agenda_ids = self.sort_agenda_ids(all_agendas, agenda_ids)

            agenda_ids_q = "-".join(agenda_ids)
            practice_ids_q = "-".join(practice_ids)
            availability = self.get_timetables(
                request,
                visit_motive_ids,
                visit_motive_id,
                agenda_ids_q,
                practice_ids_q,
                timetable_start_date,
                appointment_schedules,
            )
            if availability and (not first_availability
                                 or availability < first_availability):
                first_availability = availability
        return first_availability
コード例 #5
0
    def get_appointments(
        self,
        request: ScraperRequest,
        start_date: str,
        visit_motive_ids,
        motive_id: str,
        agenda_ids_q: str,
        practice_ids_q: str,
        limit: int,
        start_date_original: str,
        appointment_schedules: list,
    ):
        stop = False
        motive_availability = False
        first_availability = None
        appointment_count = 0
        appointment_schedules_updated = None
        slots_api_url = DOCTOLIB_API.get("slots", "").format(
            start_date=start_date,
            motive_id=motive_id,
            agenda_ids_q=agenda_ids_q,
            practice_ids_q=practice_ids_q,
            limit=limit,
        )
        request.increase_request_count("slots")
        try:
            response = self._client.get(slots_api_url,
                                        headers=DOCTOLIB_HEADERS)
        except httpx.ReadTimeout as hex:
            logger.warning(
                f"Doctolib returned error ReadTimeout for url {request.get_url()}"
            )
            raise BlockedByDoctolibError(request.get_url())
        if response.status_code == 403 or response.status_code == 400:
            raise BlockedByDoctolibError(request.get_url())

        response.raise_for_status()
        time.sleep(self._cooldown_interval)
        slots = response.json()
        if slots.get("total"):
            appointment_count += int(slots.get("total", 0))

        for availability in slots["availabilities"]:
            slot_list = availability.get("slots", None)
            if not slot_list or len(slot_list) == 0:
                continue
            if isinstance(slot_list[0], str):
                if not first_availability or slot_list[0] < first_availability:
                    first_availability = slot_list[0]
                    motive_availability = True

            for slot_info in slot_list:
                if isinstance(slot_info, str):
                    continue
                sdate = slot_info.get("start_date", None)
                if not sdate:
                    continue
                if not first_availability or sdate < first_availability:
                    first_availability = sdate
                    motive_availability = True

            if visit_motive_ids[motive_id]:
                visite_motive_vaccine = visit_motive_ids[motive_id]
            else:
                visite_motive_vaccine = None

            for interval in INTERVAL_SPLIT_DAYS:
                chronodose = False
                if visite_motive_vaccine in CHRONODOSES[
                        "Vaccine"] and interval == CHRONODOSES["Interval"]:
                    chronodose = True
                appointment_schedules = build_appointment_schedules(
                    request,
                    interval,
                    append_date_days(start_date_original, 0),
                    append_date_days(start_date_original,
                                     days=interval,
                                     seconds=-1),
                    0,
                    appointment_schedules,
                    chronodose,
                )
                if append_date_days(start_date_original,
                                    0) <= append_date_days(
                                        start_date_original, interval):
                    if availability.get("date"):
                        if append_date_days(availability.get("date"),
                                            0) < append_date_days(
                                                start_date_original, interval):
                            appointment_schedules = build_appointment_schedules(
                                request,
                                interval,
                                append_date_days(start_date_original, 0),
                                append_date_days(start_date_original,
                                                 days=interval,
                                                 seconds=-1),
                                len(availability.get("slots", [])),
                                appointment_schedules,
                                chronodose,
                            )

        if motive_availability:
            request.add_vaccine_type(visit_motive_ids[motive_id])
        # Sometimes Doctolib does not allow to see slots for next weeks
        # which is a weird move, but still, we have to stop here.

        if not first_availability and not slots.get("next_slot", None):
            stop = True
        return first_availability, appointment_count, appointment_schedules, stop, slots.get(
            "next_slot")
コード例 #6
0
ファイル: doctolib.py プロジェクト: sgraton/vitemadose
    def fetch(self, request: ScraperRequest) -> Optional[str]:

        centre = _parse_centre(request.get_url())

        # Doctolib fetches multiple vaccination centers sometimes
        # so if a practice id is present in query, only related agendas
        # should be selected.
        practice_id = _parse_practice_id(request.get_url())

        practice_same_adress = False

        centre_api_url = f"https://partners.doctolib.fr/booking/{centre}.json"
        response = self._client.get(centre_api_url, headers=DOCTOLIB_HEADERS)
        if response.status_code == 403:
            raise BlockedByDoctolibError(centre_api_url)

        response.raise_for_status()
        time.sleep(self._cooldown_interval)
        data = response.json()
        rdata = data.get("data", {})

        if not self.is_practice_id_valid(request, rdata):
            logger.warning(f"Invalid practice ID for this Doctolib center: {request.get_url()}")
            practice_id = None
            self.pop_practice_id(request)
        if practice_id:
            practice_id, practice_same_adress = link_practice_ids(practice_id, rdata)
        if len(rdata.get("places", [])) > 1 and practice_id is None:
            practice_id = rdata.get("places")[0].get("practice_ids", None)

        request.update_practitioner_type(parse_practitioner_type(centre, rdata))
        set_doctolib_center_internal_id(request, rdata, practice_id, practice_same_adress)
        # Check if  appointments are allowed
        if not is_allowing_online_appointments(rdata):
            request.set_appointments_only_by_phone(True)
            return None

        # visit_motive_categories
        # example: https://partners.doctolib.fr/hopital-public/tarbes/centre-de-vaccination-tarbes-ayguerote?speciality_id=5494&enable_cookies_consent=1
        visit_motive_category_id = _find_visit_motive_category_id(data)
        # visit_motive_id
        visit_motive_ids = _find_visit_motive_id(data, visit_motive_category_id=visit_motive_category_id)
        if visit_motive_ids is None:
            return None

        all_agendas = parse_agenda_ids(rdata)
        first_availability = None

        appointment_schedules = request.get_appointment_schedules()

        start_date = request.get_start_date()

        for interval in INTERVAL_SPLIT_DAYS:
            chronodose = False
            if interval == CHRONODOSES["Interval"]:
                chronodose = True

            appointment_schedules = build_appointment_schedules(
                request,
                interval,
                append_date_days(start_date, 0),
                append_date_days(start_date, interval, 1),
                0,
                appointment_schedules,
                chronodose,
            )
        request.update_appointment_schedules(appointment_schedules)

        for visit_motive_id in visit_motive_ids:
            agenda_ids, practice_ids = _find_agenda_and_practice_ids(
                data, visit_motive_id, practice_id_filter=practice_id
            )
            if agenda_ids != [] and practice_ids != []:
                agenda_ids = self.sort_agenda_ids(all_agendas, agenda_ids)

                agenda_ids_q = "-".join(agenda_ids)
                practice_ids_q = "-".join(practice_ids)

                for i in range(DOCTOLIB_ITERATIONS):
                    start_date_tmp = datetime.now() + timedelta(days=7 * i)
                    start_date_tmp = start_date_tmp.strftime("%Y-%m-%d")
                    sdate, appt, appointment_schedules, stop = self.get_appointments(
                        request,
                        start_date_tmp,
                        visit_motive_ids,
                        visit_motive_id,
                        agenda_ids_q,
                        practice_ids_q,
                        DOCTOLIB_SLOT_LIMIT,
                        start_date,
                        appointment_schedules,
                    )

                    if stop:
                        break
                    if not sdate:
                        continue
                    if not first_availability or sdate < first_availability:
                        first_availability = sdate

                    request.update_appointment_count(request.appointment_count + appt)
                if appointment_schedules:
                    request.update_appointment_schedules(appointment_schedules)

        return first_availability
コード例 #7
0
ファイル: doctolib.py プロジェクト: sgraton/vitemadose
    def get_appointments(
        self,
        request: ScraperRequest,
        start_date: str,
        visit_motive_ids,
        motive_id: str,
        agenda_ids_q: str,
        practice_ids_q: str,
        limit: int,
        start_date_original: str,
        appointment_schedules: list,
    ):
        stop = False
        motive_availability = False
        first_availability = None
        appointment_count = 0
        appointment_schedules_updated = None
        slots_api_url = f"https://partners.doctolib.fr/availabilities.json?start_date={start_date}&visit_motive_ids={motive_id}&agenda_ids={agenda_ids_q}&insurance_sector=public&practice_ids={practice_ids_q}&destroy_temporary=true&limit={limit}"
        response = self._client.get(slots_api_url, headers=DOCTOLIB_HEADERS)
        if response.status_code == 403:
            raise BlockedByDoctolibError(request.get_url())

        response.raise_for_status()
        time.sleep(self._cooldown_interval)
        slots = response.json()
        if slots.get("total"):
            appointment_count += int(slots.get("total", 0))

        for availability in slots["availabilities"]:
            slot_list = availability.get("slots", None)
            if not slot_list or len(slot_list) == 0:
                continue
            if isinstance(slot_list[0], str):
                if not first_availability or slot_list[0] < first_availability:
                    first_availability = slot_list[0]
                    motive_availability = True

            for slot_info in slot_list:
                if isinstance(slot_info, str):
                    continue
                sdate = slot_info.get("start_date", None)
                if not sdate:
                    continue
                if not first_availability or sdate < first_availability:
                    first_availability = sdate
                    motive_availability = True

            if visit_motive_ids[motive_id]:
                visite_motive_vaccine = visit_motive_ids[motive_id]
            else:
                visite_motive_vaccine = None

            for interval in INTERVAL_SPLIT_DAYS:
                chronodose = False
                if visite_motive_vaccine in CHRONODOSES["Vaccine"] and interval == CHRONODOSES["Interval"]:
                    chronodose = True
                appointment_schedules = build_appointment_schedules(
                    request,
                    interval,
                    append_date_days(start_date_original, 0),
                    append_date_days(start_date_original, interval, 1),
                    0,
                    appointment_schedules,
                    chronodose,
                )
                if append_date_days(start_date_original, 0) <= append_date_days(start_date_original, interval):
                    if availability.get("date"):
                        if append_date_days(availability.get("date"), 0) < append_date_days(
                            start_date_original, interval
                        ):
                            appointment_schedules = build_appointment_schedules(
                                request,
                                interval,
                                append_date_days(start_date_original, 0),
                                append_date_days(start_date_original, interval, 1),
                                len(availability.get("slots", [])),
                                appointment_schedules,
                                chronodose,
                            )

        if motive_availability:
            request.add_vaccine_type(visit_motive_ids[motive_id])
        # Sometimes Doctolib does not allow to see slots for next weeks
        # which is a weird move, but still, we have to stop here.
        if not first_availability and not slots.get("next_slot", None):
            stop = True
        return first_availability, appointment_count, appointment_schedules, stop