def get_default_fetch_map(): return { "Doctolib": { "urls": get_conf_platform("doctolib").get("recognized_urls", []), "scraper_ptr": doctolib_fetch_slots, }, "Keldoc": { "urls": get_conf_platform("keldoc").get("recognized_urls", []), "scraper_ptr": keldoc_fetch_slots, }, "Maiia": {"urls": get_conf_platform("maiia").get("recognized_urls", []), "scraper_ptr": maiia_fetch_slots}, "Mapharma": { "urls": get_conf_platform("mapharma").get("recognized_urls", []), "scraper_ptr": mapharma_fetch_slots, }, "Ordoclic": { "urls": get_conf_platform("ordoclic").get("recognized_urls", []), "scraper_ptr": ordoclic_fetch_slots, }, }
def get_default_fetch_map(): return { "Doctolib": { "urls": get_conf_platform("doctolib").get("recognized_urls", []), "scraper_ptr": doctolib_fetch_slots, }, "Keldoc": { "urls": get_conf_platform("keldoc").get("recognized_urls", []), "scraper_ptr": keldoc_fetch_slots, }, "Maiia": { "urls": get_conf_platform("maiia").get("recognized_urls", []), "scraper_ptr": maiia_fetch_slots }, "Mapharma": { "urls": get_conf_platform("mapharma").get("recognized_urls", []), "scraper_ptr": mapharma_fetch_slots, }, "Ordoclic": { "urls": get_conf_platform("ordoclic").get("recognized_urls", []), "scraper_ptr": ordoclic_fetch_slots, }, "AvecMonDoc": { "urls": get_conf_platform("avecmondoc").get("recognized_urls", []), "scraper_ptr": avecmondoc_fetch_slots, }, "mesoigner": { "platform_name": "mesoigner", "scraper_ptr": mesoigner_fetch_slots, }, "Bimedoc": { "urls": get_conf_platform("bimedoc").get("recognized_urls", []), "scraper_ptr": bimedoc_fetch_slots, }, "Valwin": { "platform_name": "Valwin", "scraper_ptr": valwin_fetch_slots, }, }
import re from scraper.pattern.scraper_result import DRUG_STORE, GENERAL_PRACTITIONER, VACCINATION_CENTER from utils.vmd_config import get_conf_platform DOCTOLIB_CONF = get_conf_platform("doctolib") DOCTOLIB_FILTERS = DOCTOLIB_CONF.get("filters", {}) DOCTOLIB_APPOINTMENT_REASON = DOCTOLIB_FILTERS.get("appointment_reason", []) DOCTOLIB_APPOINTMENT_REASON = [ c.lower().strip() for c in DOCTOLIB_APPOINTMENT_REASON ] DOCTOLIB_CATEGORY = DOCTOLIB_FILTERS.get("appointment_category", []) DOCTOLIB_CATEGORY = [c.lower().strip() for c in DOCTOLIB_CATEGORY] def is_category_relevant(category): if not category: return False category = category.lower().strip() category = re.sub(" +", " ", category) for allowed_categories in DOCTOLIB_CATEGORY: if allowed_categories in category: return True # Weird centers. But it's vaccination related COVID-19. if category == "vaccination": return True return False
import re from typing import Dict, List, Optional from unidecode import unidecode from scraper.doctolib.conf import DoctolibConf from scraper.pattern.scraper_result import VACCINATION_CENTER from utils.vmd_config import get_conf_platform, get_conf_inputs from utils.vmd_utils import departementUtils, format_phone_number DOCTOLIB_CONF = DoctolibConf(**get_conf_platform("doctolib")) SCRAPER_CONF = DOCTOLIB_CONF.center_scraper def get_departements(): import csv # Guyane uses Maiia and does not have doctolib pages NOT_INCLUDED_DEPARTEMENTS = ["Guyane"] with open(get_conf_inputs().get("departements"), encoding="utf8", newline="\n") as csvfile: reader = csv.DictReader(csvfile) departements = [str(row["nom_departement"]) for row in reader] [departements.remove(ndep) for ndep in NOT_INCLUDED_DEPARTEMENTS] return departements def doctolib_urlify(departement: str) -> str: departement = re.sub(r"\s+|\W", "-", departement).lower() return unidecode(departement)
import json from scraper.pattern.scraper_request import ScraperRequest from scraper.pattern.center_location import CenterLocation from scraper.pattern.center_info import CenterInfo import httpx from pathlib import Path from jsonschema import validate from jsonschema.exceptions import ValidationError from datetime import datetime from dateutil.tz import tzutc import io import scraper.mesoigner.mesoigner as mesoigner from scraper.pattern.vaccine import Vaccine from utils.vmd_config import get_conf_platform MESOIGNER_CONF = get_conf_platform("mesoigner") MESOIGNER_APIs = MESOIGNER_CONF.get("api", "") TEST_CENTRE_INFO = Path("tests", "fixtures", "mesoigner", "mesoigner_center_info.json") def test_get_appointments(): """get_appointments should return first available appointment date""" center_data = dict() center_data = json.load( io.open(TEST_CENTRE_INFO, "r", encoding="utf-8-sig")) # This center has availabilities and should return a date, non null appointment_count and vaccines request = ScraperRequest(
is_appointment_relevant, parse_practitioner_type, is_category_relevant, ) from scraper.pattern.vaccine import get_vaccine_name, get_doctolib_vaccine_name, Vaccine from scraper.pattern.scraper_request import ScraperRequest from scraper.error import Blocked403, DoublonDoctolib, RequestError from utils.vmd_config import get_conf_outputs, get_conf_platform, get_config from utils.vmd_utils import DummyQueue from cachecontrol import CacheControl from cachecontrol.caches.file_cache import FileCache # PLATFORM MUST BE LOW, PLEASE LET THE "lower()" IN CASE OF BAD INPUT FORMAT. PLATFORM = "doctolib".lower() PLATFORM_CONF = get_conf_platform(PLATFORM) PLATFORM_ENABLED = PLATFORM_CONF.get("enabled", True) SCRAPE_ONLY_ATLAS = get_config().get("scrape_only_atlas_centers", False) NUMBER_OF_SCRAPED_DAYS = get_config().get("scrape_on_n_days", 28) PLATFORM_DAYS_PER_PAGE = PLATFORM_CONF.get("days_per_page", 7) if NUMBER_OF_SCRAPED_DAYS % PLATFORM_DAYS_PER_PAGE == 0: PLATFORM_PAGES_NUMBER = int(NUMBER_OF_SCRAPED_DAYS / PLATFORM_DAYS_PER_PAGE) else: PLATFORM_PAGES_NUMBER = (NUMBER_OF_SCRAPED_DAYS // PLATFORM_DAYS_PER_PAGE) + 1 PLATFORM_TIMEOUT = PLATFORM_CONF.get("timeout", 10) PLATFORM_REQUEST_SLEEP = PLATFORM_CONF.get("request_sleep", 0.1) timeout = httpx.Timeout(PLATFORM_TIMEOUT, connect=PLATFORM_TIMEOUT)
import json from scraper.pattern.scraper_request import ScraperRequest from scraper.pattern.center_info import CenterInfo import httpx from pathlib import Path import io import scraper.bimedoc.bimedoc as bimedoc from scraper.pattern.vaccine import Vaccine from utils.vmd_config import get_conf_platform import httpx import pytest BIMEDOC_CONF = get_conf_platform("bimedoc") BIMEDOC_APIs = BIMEDOC_CONF.get("api", "") TEST_CENTRE_INFO = Path("tests", "fixtures", "bimedoc", "bimedoc_center_info.json") def test_get_appointments(): """get_appointments should return first available appointment date""" center_data = dict() center_data = json.load(io.open(TEST_CENTRE_INFO, "r", encoding="utf-8-sig")) center_info = CenterInfo.from_csv_data(center_data) # This center has availabilities and should return a date, non null appointment_count and vaccines request = ScraperRequest( "https://server.bimedoc.com/vmd/pharmacy-with-slots/9cf46288-0080-4a8d-8856-8e9998ced9f7?start_date=2021-08-10&end_date=2021-08-17",
import httpx import json from datetime import datetime, timedelta from dateutil.parser import isoparse, parse from pytz import timezone from typing import Iterator, Optional, Tuple from scraper.creneaux.creneau import Creneau, Lieu, Plateforme, PasDeCreneau from scraper.pattern.scraper_request import ScraperRequest from scraper.pattern.center_info import CenterInfo, CenterLocation from scraper.pattern.vaccine import Vaccine, get_vaccine_name from utils.vmd_config import get_conf_platform, get_config from utils.vmd_utils import departementUtils, DummyQueue AVECMONDOC_CONF = get_conf_platform("avecmondoc") AVECMONDOC_ENABLED = AVECMONDOC_CONF.get("enabled", False) AVECMONDOC_API = AVECMONDOC_CONF.get("api", {}) AVECMONDOC_SCRAPER = AVECMONDOC_CONF.get("center_scraper", {}) AVECMONDOC_FILTERS = AVECMONDOC_CONF.get("filters", {}) AVECMONDOC_VALID_REASONS = AVECMONDOC_FILTERS.get("valid_reasons", []) AVECMONDOC_HEADERS = { "User-Agent": os.environ.get("AVECMONDOC_API_KEY", ""), } NUMBER_OF_SCRAPED_DAYS = get_config().get("scrape_on_n_days", 28) AVECMONDOC_DAYS_PER_PAGE = AVECMONDOC_CONF.get("days_per_page", 7) timeout = httpx.Timeout(AVECMONDOC_CONF.get("timeout", 25), connect=AVECMONDOC_CONF.get("timeout", 25)) DEFAULT_CLIENT = httpx.Client(headers=AVECMONDOC_HEADERS, timeout=timeout) logger = logging.getLogger("scraper")
import json from scraper.pattern.scraper_request import ScraperRequest from scraper.pattern.center_info import CenterInfo import httpx from pathlib import Path import io import scraper.valwin.valwin as valwin from scraper.pattern.vaccine import Vaccine from utils.vmd_config import get_conf_platform import httpx import pytest VALWIN_conf = get_conf_platform("Valwin") VALWIN_APIs = VALWIN_conf.get("api", "") TEST_CENTRE_INFO = Path("tests", "fixtures", "valwin", "valwin_center_info.json") def test_get_appointments(): """get_appointments should return first available appointment date""" center_data = dict() center_data = json.load(io.open(TEST_CENTRE_INFO, "r", encoding="utf-8-sig")) center_info = CenterInfo.from_csv_data(center_data) # This center has availabilities and should return a date, non null appointment_count and vaccines request = ScraperRequest( "https://pharma-api.epharmacie.pro/global/api/meetings/v2/aptiphar18-priker-magny-hameaux/slots",
from scraper.circuit_breaker import ShortCircuit from scraper.creneaux.creneau import Creneau, Lieu, Plateforme, PasDeCreneau from scraper.pattern.vaccine import get_vaccine_name from scraper.pattern.scraper_request import ScraperRequest from scraper.profiler import Profiling from utils.vmd_config import get_conf_platform, get_config, get_conf_outputs from scraper.error import Blocked403 from utils.vmd_utils import DummyQueue, append_date_days from typing import Dict, Iterator, List, Optional import dateutil from cachecontrol import CacheControl from cachecontrol.caches.file_cache import FileCache PLATFORM = "mesoigner" PLATFORM_CONF = get_conf_platform("mesoigner") PLATFORM_ENABLED = PLATFORM_CONF.get("enabled", False) MESOIGNER_HEADERS = { "Authorization": f'Mesoigner apikey="{os.environ.get("MESOIGNER_API_KEY", "")}"', } MESOIGNER_APIs = PLATFORM_CONF.get("api", "") SCRAPER_CONF = PLATFORM_CONF.get("center_scraper", {}) CENTER_LIST_URL = PLATFORM_CONF.get("api", {}).get("center_list", {}) BOOSTER_VACCINES = get_config().get("vaccines_allowed_for_booster", []) timeout = httpx.Timeout(PLATFORM_CONF.get("timeout", 30), connect=PLATFORM_CONF.get("timeout", 30))
from datetime import datetime, timedelta from dateutil.parser import isoparse, parse as dateparse from pytz import timezone from typing import Dict, Iterator, List, Optional, Tuple, Set from scraper.pattern.vaccine import get_vaccine_name from scraper.pattern.scraper_request import ScraperRequest from scraper.pattern.scraper_result import DRUG_STORE from utils.vmd_config import get_conf_platform, get_config from utils.vmd_utils import departementUtils, DummyQueue from scraper.profiler import Profiling from scraper.creneaux.creneau import Creneau, Lieu, Plateforme, PasDeCreneau logger = logging.getLogger("scraper") ORDOCLIC_CONF = get_conf_platform("ordoclic") ORDOCLIC_API = ORDOCLIC_CONF.get("api", {}) ORDOCLIC_ENABLED = ORDOCLIC_CONF.get("enabled", False) NUMBER_OF_SCRAPED_DAYS = get_config().get("scrape_on_n_days", 28) timeout = httpx.Timeout(ORDOCLIC_CONF.get("timeout", 25), connect=ORDOCLIC_CONF.get("timeout", 25)) DEFAULT_CLIENT = httpx.Client(timeout=timeout) insee = {} paris_tz = timezone("Europe/Paris") # Filtre pour le rang d'injection # Il faut rajouter 2 à la liste si l'on veut les 2èmes injections # get all slugs def search(client: httpx.Client = DEFAULT_CLIENT): base_url = ORDOCLIC_API.get("scraper")
from scraper.keldoc.keldoc_filters import filter_vaccine_motives from scraper.pattern.scraper_request import ScraperRequest from scraper.profiler import Profiling from utils.vmd_config import get_conf_platform, get_config, get_conf_outputs from utils.vmd_utils import DummyQueue from scraper.circuit_breaker import ShortCircuit from scraper.creneaux.creneau import Creneau, Lieu, Plateforme, PasDeCreneau import json import requests from cachecontrol import CacheControl from cachecontrol.caches.file_cache import FileCache # PLATFORM MUST BE LOW, PLEASE LET THE "lower()" IN CASE OF BAD INPUT FORMAT. PLATFORM = "keldoc".lower() PLATFORM_CONF = get_conf_platform("keldoc") PLATFORM_ENABLED = PLATFORM_CONF.get("enabled", False) PLATFORM_TIMEOUT = PLATFORM_CONF.get("timeout", 25) SCRAPE_ONLY_ATLAS = get_config().get("scrape_only_atlas_centers", False) timeout = httpx.Timeout(PLATFORM_TIMEOUT, connect=PLATFORM_TIMEOUT) # change KELDOC_KILL_SWITCH to True to bypass Keldoc scraping KELDOC_HEADERS = { "User-Agent": os.environ.get("KELDOC_API_KEY", ""), } session = httpx.Client(timeout=timeout, headers=KELDOC_HEADERS) logger = logging.getLogger("scraper")
from scraper.pattern.vaccine import get_vaccine_name from scraper.pattern.scraper_request import ScraperRequest from scraper.profiler import Profiling from utils.vmd_config import get_conf_platform, get_config, get_conf_outputs from scraper.error import Blocked403 from utils.vmd_utils import DummyQueue, append_date_days from typing import Dict, Iterator, List, Optional import dateutil from cachecontrol import CacheControl from cachecontrol.caches.file_cache import FileCache import datetime from scraper.pattern.vaccine import Vaccine, get_vaccine_name PLATFORM = "bimedoc".lower() PLATFORM_CONF = get_conf_platform("bimedoc") PLATFORM_ENABLED = PLATFORM_CONF.get("enabled", False) BIMEDOC_HEADERS = {"Authorization": f'Partner {os.environ.get("BIMEDOC_API_KEY", "")}'} BIMEDOC_APIs = PLATFORM_CONF.get("api", "") SCRAPER_CONF = PLATFORM_CONF.get("center_scraper", {}) CENTER_LIST_URL = PLATFORM_CONF.get("api", {}).get("center_list", {}) NUMBER_OF_SCRAPED_DAYS = get_config().get("scrape_on_n_days", 28) timeout = httpx.Timeout(PLATFORM_CONF.get("timeout", 30), connect=PLATFORM_CONF.get("timeout", 30))
from urllib import parse as urlparse from urllib.parse import quote, parse_qs from typing import List, Optional, Tuple from scraper.profiler import Profiling from scraper.pattern.vaccine import get_vaccine_name from scraper.pattern.scraper_request import ScraperRequest from scraper.maiia.maiia_utils import get_paged, MAIIA_LIMIT, DEFAULT_CLIENT from utils.vmd_config import get_conf_platform, get_config, get_conf_outputs from utils.vmd_utils import DummyQueue from scraper.creneaux.creneau import Creneau, Lieu, Plateforme, PasDeCreneau import requests from cachecontrol import CacheControl from cachecontrol.caches.file_cache import FileCache PLATFORM = "maiia".lower() PLATFORM_CONF = get_conf_platform("maiia") PLATFORM_API = PLATFORM_CONF.get("api", {}) PLATFORM_ENABLED = PLATFORM_CONF.get("enabled", False) PLATFORM_SCRAPER = PLATFORM_CONF.get("center_scraper", {}) # timeout = httpx.Timeout(PLATFORM_CONF.get("timeout", 25), connect=PLATFORM_CONF.get("timeout", 25)) logger = logging.getLogger("scraper") paris_tz = timezone("Europe/Paris") MAIIA_URL = PLATFORM_CONF.get("base_url") NUMBER_OF_SCRAPED_DAYS = get_config().get("scrape_on_n_days", 28) MAIIA_DOSES = PLATFORM_SCRAPER.get("dose_types") MAIIA_DO_NOT_SCRAP_NAME = PLATFORM_SCRAPER.get("excluded_names", [])
import time from math import floor from typing import Optional, Union, Iterable, List from urllib.parse import urlsplit, parse_qs import datetime as dt from dateutil.parser import isoparse from pytz import timezone import httpx from scraper.keldoc.keldoc_filters import parse_keldoc_availability from scraper.keldoc.keldoc_routes import API_KELDOC_CALENDAR, API_KELDOC_CENTER, API_KELDOC_CABINETS from scraper.pattern.scraper_request import ScraperRequest from scraper.pattern.center_info import get_vaccine_name, Vaccine, INTERVAL_SPLIT_DAYS, CHRONODOSES from utils.vmd_config import get_conf_platform KELDOC_CONF = get_conf_platform("keldoc") timeout = httpx.Timeout(KELDOC_CONF.get("timeout", 25), connect=KELDOC_CONF.get("timeout", 25)) KELDOC_HEADERS = { "User-Agent": os.environ.get("KELDOC_API_KEY", ""), } # 16 days is enough for now, due to recent issues with Keldoc API KELDOC_SLOT_PAGES = KELDOC_CONF.get("pagination", {}).get("pages", 2) KELDOC_DAYS_PER_PAGE = KELDOC_CONF.get("pagination", {}).get("days", 4) KELDOC_SLOT_TIMEOUT = KELDOC_CONF.get("timeout", 20) DEFAULT_CLIENT = httpx.Client(timeout=timeout, headers=KELDOC_HEADERS) logger = logging.getLogger("scraper") paris_tz = timezone("Europe/Paris") class KeldocCenter:
from pytz import timezone import requests from dateutil.parser import isoparse from urllib import parse as urlparse from urllib.parse import quote, parse_qs from typing import List, Optional, Tuple from scraper.profiler import Profiling from scraper.pattern.center_info import INTERVAL_SPLIT_DAYS, CHRONODOSES from scraper.pattern.vaccine import get_vaccine_name from scraper.pattern.scraper_request import ScraperRequest from scraper.maiia.maiia_utils import get_paged, MAIIA_LIMIT, DEFAULT_CLIENT from utils.vmd_config import get_conf_platform, get_config MAIIA_CONF = get_conf_platform("maiia") MAIIA_API = MAIIA_CONF.get("api", {}) MAIIA_ENABLED = MAIIA_CONF.get("enabled", False) MAIIA_SCRAPER = MAIIA_CONF.get("center_scraper", {}) # timeout = httpx.Timeout(MAIIA_CONF.get("timeout", 25), connect=MAIIA_CONF.get("timeout", 25)) logger = logging.getLogger("scraper") paris_tz = timezone("Europe/Paris") MAIIA_URL = MAIIA_CONF.get("base_url") MAIIA_DAY_LIMIT = MAIIA_CONF.get("calendar_limit", 50) def parse_slots(slots: list) -> Optional[dt.datetime]: if not slots:
from dateutil.parser import isoparse from pytz import timezone from urllib.parse import parse_qs from bs4 import BeautifulSoup from pathlib import Path from urllib import parse from typing import Optional from scraper.pattern.scraper_request import ScraperRequest from scraper.pattern.scraper_result import DRUG_STORE from scraper.pattern.center_info import get_vaccine_name, Vaccine, INTERVAL_SPLIT_DAYS, CHRONODOSES from utils.vmd_config import get_conf_platform from utils.vmd_utils import departementUtils from scraper.profiler import Profiling MAPHARMA_CONF = get_conf_platform("mapharma") MAPHARMA_API = MAPHARMA_CONF.get("api", {}) MAPHARMA_ENABLED = MAPHARMA_CONF.get("enabled", False) # timeout = httpx.Timeout(MAPHARMA_CONF.get("timeout", 25), connect=MAPHARMA_CONF.get("timeout", 25)) MAPARMA_REFERER = MAPHARMA_CONF.get("headers", {}).get("referer", {}) MAPHARMA_HEADERS = { "User-Agent": os.environ.get("MAPHARMA_API_KEY", ""), "Referer": MAPARMA_REFERER } MAPHARMA_FILTERS = MAPHARMA_CONF.get("filters", {}) MAPHARMA_CAMPAGNES_VALIDES = MAPHARMA_CONF.get("valid_campaigns", []) MAPHARMA_CAMPAGNES_INVALIDES = MAPHARMA_CONF.get("invalid_campaigns", [])