Example #1
0
def get_default_fetch_map():
    return {
        "Doctolib": {
            "urls": get_conf_platform("doctolib").get("recognized_urls", []),
            "scraper_ptr": doctolib_fetch_slots,
        },
        "Keldoc": {
            "urls": get_conf_platform("keldoc").get("recognized_urls", []),
            "scraper_ptr": keldoc_fetch_slots,
        },
        "Maiia": {"urls": get_conf_platform("maiia").get("recognized_urls", []), "scraper_ptr": maiia_fetch_slots},
        "Mapharma": {
            "urls": get_conf_platform("mapharma").get("recognized_urls", []),
            "scraper_ptr": mapharma_fetch_slots,
        },
        "Ordoclic": {
            "urls": get_conf_platform("ordoclic").get("recognized_urls", []),
            "scraper_ptr": ordoclic_fetch_slots,
        },
    }
Example #2
0
def get_default_fetch_map():
    return {
        "Doctolib": {
            "urls": get_conf_platform("doctolib").get("recognized_urls", []),
            "scraper_ptr": doctolib_fetch_slots,
        },
        "Keldoc": {
            "urls": get_conf_platform("keldoc").get("recognized_urls", []),
            "scraper_ptr": keldoc_fetch_slots,
        },
        "Maiia": {
            "urls": get_conf_platform("maiia").get("recognized_urls", []),
            "scraper_ptr": maiia_fetch_slots
        },
        "Mapharma": {
            "urls": get_conf_platform("mapharma").get("recognized_urls", []),
            "scraper_ptr": mapharma_fetch_slots,
        },
        "Ordoclic": {
            "urls": get_conf_platform("ordoclic").get("recognized_urls", []),
            "scraper_ptr": ordoclic_fetch_slots,
        },
        "AvecMonDoc": {
            "urls": get_conf_platform("avecmondoc").get("recognized_urls", []),
            "scraper_ptr": avecmondoc_fetch_slots,
        },
        "mesoigner": {
            "platform_name": "mesoigner",
            "scraper_ptr": mesoigner_fetch_slots,
        },
        "Bimedoc": {
            "urls": get_conf_platform("bimedoc").get("recognized_urls", []),
            "scraper_ptr": bimedoc_fetch_slots,
        },
        "Valwin": {
            "platform_name": "Valwin",
            "scraper_ptr": valwin_fetch_slots,
        },
    }
Example #3
0
import re

from scraper.pattern.scraper_result import DRUG_STORE, GENERAL_PRACTITIONER, VACCINATION_CENTER
from utils.vmd_config import get_conf_platform

DOCTOLIB_CONF = get_conf_platform("doctolib")
DOCTOLIB_FILTERS = DOCTOLIB_CONF.get("filters", {})

DOCTOLIB_APPOINTMENT_REASON = DOCTOLIB_FILTERS.get("appointment_reason", [])
DOCTOLIB_APPOINTMENT_REASON = [
    c.lower().strip() for c in DOCTOLIB_APPOINTMENT_REASON
]

DOCTOLIB_CATEGORY = DOCTOLIB_FILTERS.get("appointment_category", [])
DOCTOLIB_CATEGORY = [c.lower().strip() for c in DOCTOLIB_CATEGORY]


def is_category_relevant(category):
    if not category:
        return False

    category = category.lower().strip()
    category = re.sub(" +", " ", category)
    for allowed_categories in DOCTOLIB_CATEGORY:
        if allowed_categories in category:
            return True
    # Weird centers. But it's vaccination related COVID-19.
    if category == "vaccination":
        return True
    return False
Example #4
0
import re

from typing import Dict, List, Optional
from unidecode import unidecode

from scraper.doctolib.conf import DoctolibConf
from scraper.pattern.scraper_result import VACCINATION_CENTER
from utils.vmd_config import get_conf_platform, get_conf_inputs
from utils.vmd_utils import departementUtils, format_phone_number

DOCTOLIB_CONF = DoctolibConf(**get_conf_platform("doctolib"))
SCRAPER_CONF = DOCTOLIB_CONF.center_scraper


def get_departements():
    import csv

    # Guyane uses Maiia and does not have doctolib pages
    NOT_INCLUDED_DEPARTEMENTS = ["Guyane"]
    with open(get_conf_inputs().get("departements"),
              encoding="utf8",
              newline="\n") as csvfile:
        reader = csv.DictReader(csvfile)
        departements = [str(row["nom_departement"]) for row in reader]
        [departements.remove(ndep) for ndep in NOT_INCLUDED_DEPARTEMENTS]
        return departements


def doctolib_urlify(departement: str) -> str:
    departement = re.sub(r"\s+|\W", "-", departement).lower()
    return unidecode(departement)
import json
from scraper.pattern.scraper_request import ScraperRequest
from scraper.pattern.center_location import CenterLocation
from scraper.pattern.center_info import CenterInfo
import httpx
from pathlib import Path
from jsonschema import validate
from jsonschema.exceptions import ValidationError
from datetime import datetime
from dateutil.tz import tzutc
import io
import scraper.mesoigner.mesoigner as mesoigner
from scraper.pattern.vaccine import Vaccine
from utils.vmd_config import get_conf_platform

MESOIGNER_CONF = get_conf_platform("mesoigner")
MESOIGNER_APIs = MESOIGNER_CONF.get("api", "")

TEST_CENTRE_INFO = Path("tests", "fixtures", "mesoigner",
                        "mesoigner_center_info.json")


def test_get_appointments():
    """get_appointments should return first available appointment date"""

    center_data = dict()
    center_data = json.load(
        io.open(TEST_CENTRE_INFO, "r", encoding="utf-8-sig"))

    # This center has availabilities and should return a date, non null appointment_count and vaccines
    request = ScraperRequest(
Example #6
0
    is_appointment_relevant,
    parse_practitioner_type,
    is_category_relevant,
)
from scraper.pattern.vaccine import get_vaccine_name, get_doctolib_vaccine_name, Vaccine
from scraper.pattern.scraper_request import ScraperRequest
from scraper.error import Blocked403, DoublonDoctolib, RequestError
from utils.vmd_config import get_conf_outputs, get_conf_platform, get_config
from utils.vmd_utils import DummyQueue
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache

# PLATFORM MUST BE LOW, PLEASE LET THE "lower()" IN CASE OF BAD INPUT FORMAT.
PLATFORM = "doctolib".lower()

PLATFORM_CONF = get_conf_platform(PLATFORM)
PLATFORM_ENABLED = PLATFORM_CONF.get("enabled", True)
SCRAPE_ONLY_ATLAS = get_config().get("scrape_only_atlas_centers", False)

NUMBER_OF_SCRAPED_DAYS = get_config().get("scrape_on_n_days", 28)
PLATFORM_DAYS_PER_PAGE = PLATFORM_CONF.get("days_per_page", 7)
if NUMBER_OF_SCRAPED_DAYS % PLATFORM_DAYS_PER_PAGE == 0:
    PLATFORM_PAGES_NUMBER = int(NUMBER_OF_SCRAPED_DAYS /
                                PLATFORM_DAYS_PER_PAGE)
else:
    PLATFORM_PAGES_NUMBER = (NUMBER_OF_SCRAPED_DAYS //
                             PLATFORM_DAYS_PER_PAGE) + 1

PLATFORM_TIMEOUT = PLATFORM_CONF.get("timeout", 10)
PLATFORM_REQUEST_SLEEP = PLATFORM_CONF.get("request_sleep", 0.1)
timeout = httpx.Timeout(PLATFORM_TIMEOUT, connect=PLATFORM_TIMEOUT)
Example #7
0
import json
from scraper.pattern.scraper_request import ScraperRequest
from scraper.pattern.center_info import CenterInfo
import httpx
from pathlib import Path
import io
import scraper.bimedoc.bimedoc as bimedoc
from scraper.pattern.vaccine import Vaccine
from utils.vmd_config import get_conf_platform
import httpx
import pytest


BIMEDOC_CONF = get_conf_platform("bimedoc")
BIMEDOC_APIs = BIMEDOC_CONF.get("api", "")


TEST_CENTRE_INFO = Path("tests", "fixtures", "bimedoc", "bimedoc_center_info.json")


def test_get_appointments():

    """get_appointments should return first available appointment date"""

    center_data = dict()
    center_data = json.load(io.open(TEST_CENTRE_INFO, "r", encoding="utf-8-sig"))
    center_info = CenterInfo.from_csv_data(center_data)

    # This center has availabilities and should return a date, non null appointment_count and vaccines
    request = ScraperRequest(
        "https://server.bimedoc.com/vmd/pharmacy-with-slots/9cf46288-0080-4a8d-8856-8e9998ced9f7?start_date=2021-08-10&end_date=2021-08-17",
Example #8
0
import httpx
import json

from datetime import datetime, timedelta
from dateutil.parser import isoparse, parse
from pytz import timezone
from typing import Iterator, Optional, Tuple
from scraper.creneaux.creneau import Creneau, Lieu, Plateforme, PasDeCreneau
from scraper.pattern.scraper_request import ScraperRequest
from scraper.pattern.center_info import CenterInfo, CenterLocation
from scraper.pattern.vaccine import Vaccine, get_vaccine_name
from utils.vmd_config import get_conf_platform, get_config
from utils.vmd_utils import departementUtils, DummyQueue


AVECMONDOC_CONF = get_conf_platform("avecmondoc")
AVECMONDOC_ENABLED = AVECMONDOC_CONF.get("enabled", False)
AVECMONDOC_API = AVECMONDOC_CONF.get("api", {})
AVECMONDOC_SCRAPER = AVECMONDOC_CONF.get("center_scraper", {})
AVECMONDOC_FILTERS = AVECMONDOC_CONF.get("filters", {})
AVECMONDOC_VALID_REASONS = AVECMONDOC_FILTERS.get("valid_reasons", [])
AVECMONDOC_HEADERS = {
    "User-Agent": os.environ.get("AVECMONDOC_API_KEY", ""),
}

NUMBER_OF_SCRAPED_DAYS = get_config().get("scrape_on_n_days", 28)
AVECMONDOC_DAYS_PER_PAGE = AVECMONDOC_CONF.get("days_per_page", 7)

timeout = httpx.Timeout(AVECMONDOC_CONF.get("timeout", 25), connect=AVECMONDOC_CONF.get("timeout", 25))
DEFAULT_CLIENT = httpx.Client(headers=AVECMONDOC_HEADERS, timeout=timeout)
logger = logging.getLogger("scraper")
Example #9
0
import json
from scraper.pattern.scraper_request import ScraperRequest
from scraper.pattern.center_info import CenterInfo
import httpx
from pathlib import Path
import io
import scraper.valwin.valwin as valwin
from scraper.pattern.vaccine import Vaccine
from utils.vmd_config import get_conf_platform
import httpx
import pytest


VALWIN_conf = get_conf_platform("Valwin")
VALWIN_APIs = VALWIN_conf.get("api", "")


TEST_CENTRE_INFO = Path("tests", "fixtures", "valwin", "valwin_center_info.json")


def test_get_appointments():

    """get_appointments should return first available appointment date"""

    center_data = dict()
    center_data = json.load(io.open(TEST_CENTRE_INFO, "r", encoding="utf-8-sig"))
    center_info = CenterInfo.from_csv_data(center_data)

    # This center has availabilities and should return a date, non null appointment_count and vaccines
    request = ScraperRequest(
        "https://pharma-api.epharmacie.pro/global/api/meetings/v2/aptiphar18-priker-magny-hameaux/slots",
Example #10
0
from scraper.circuit_breaker import ShortCircuit
from scraper.creneaux.creneau import Creneau, Lieu, Plateforme, PasDeCreneau
from scraper.pattern.vaccine import get_vaccine_name
from scraper.pattern.scraper_request import ScraperRequest
from scraper.profiler import Profiling
from utils.vmd_config import get_conf_platform, get_config, get_conf_outputs
from scraper.error import Blocked403
from utils.vmd_utils import DummyQueue, append_date_days
from typing import Dict, Iterator, List, Optional
import dateutil
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache

PLATFORM = "mesoigner"

PLATFORM_CONF = get_conf_platform("mesoigner")
PLATFORM_ENABLED = PLATFORM_CONF.get("enabled", False)
MESOIGNER_HEADERS = {
    "Authorization":
    f'Mesoigner apikey="{os.environ.get("MESOIGNER_API_KEY", "")}"',
}
MESOIGNER_APIs = PLATFORM_CONF.get("api", "")

SCRAPER_CONF = PLATFORM_CONF.get("center_scraper", {})
CENTER_LIST_URL = PLATFORM_CONF.get("api", {}).get("center_list", {})

BOOSTER_VACCINES = get_config().get("vaccines_allowed_for_booster", [])

timeout = httpx.Timeout(PLATFORM_CONF.get("timeout", 30),
                        connect=PLATFORM_CONF.get("timeout", 30))
Example #11
0
from datetime import datetime, timedelta
from dateutil.parser import isoparse, parse as dateparse
from pytz import timezone
from typing import Dict, Iterator, List, Optional, Tuple, Set
from scraper.pattern.vaccine import get_vaccine_name
from scraper.pattern.scraper_request import ScraperRequest
from scraper.pattern.scraper_result import DRUG_STORE
from utils.vmd_config import get_conf_platform, get_config
from utils.vmd_utils import departementUtils, DummyQueue
from scraper.profiler import Profiling
from scraper.creneaux.creneau import Creneau, Lieu, Plateforme, PasDeCreneau

logger = logging.getLogger("scraper")

ORDOCLIC_CONF = get_conf_platform("ordoclic")
ORDOCLIC_API = ORDOCLIC_CONF.get("api", {})
ORDOCLIC_ENABLED = ORDOCLIC_CONF.get("enabled", False)
NUMBER_OF_SCRAPED_DAYS = get_config().get("scrape_on_n_days", 28)

timeout = httpx.Timeout(ORDOCLIC_CONF.get("timeout", 25), connect=ORDOCLIC_CONF.get("timeout", 25))
DEFAULT_CLIENT = httpx.Client(timeout=timeout)
insee = {}
paris_tz = timezone("Europe/Paris")

# Filtre pour le rang d'injection
# Il faut rajouter 2 à la liste si l'on veut les 2èmes injections

# get all slugs
def search(client: httpx.Client = DEFAULT_CLIENT):
    base_url = ORDOCLIC_API.get("scraper")
Example #12
0
from scraper.keldoc.keldoc_filters import filter_vaccine_motives
from scraper.pattern.scraper_request import ScraperRequest
from scraper.profiler import Profiling
from utils.vmd_config import get_conf_platform, get_config, get_conf_outputs
from utils.vmd_utils import DummyQueue
from scraper.circuit_breaker import ShortCircuit
from scraper.creneaux.creneau import Creneau, Lieu, Plateforme, PasDeCreneau
import json
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache

# PLATFORM MUST BE LOW, PLEASE LET THE "lower()" IN CASE OF BAD INPUT FORMAT.
PLATFORM = "keldoc".lower()

PLATFORM_CONF = get_conf_platform("keldoc")
PLATFORM_ENABLED = PLATFORM_CONF.get("enabled", False)

PLATFORM_TIMEOUT = PLATFORM_CONF.get("timeout", 25)

SCRAPE_ONLY_ATLAS = get_config().get("scrape_only_atlas_centers", False)

timeout = httpx.Timeout(PLATFORM_TIMEOUT, connect=PLATFORM_TIMEOUT)
# change KELDOC_KILL_SWITCH to True to bypass Keldoc scraping

KELDOC_HEADERS = {
    "User-Agent": os.environ.get("KELDOC_API_KEY", ""),
}
session = httpx.Client(timeout=timeout, headers=KELDOC_HEADERS)
logger = logging.getLogger("scraper")
Example #13
0
from scraper.pattern.vaccine import get_vaccine_name
from scraper.pattern.scraper_request import ScraperRequest
from scraper.profiler import Profiling
from utils.vmd_config import get_conf_platform, get_config, get_conf_outputs
from scraper.error import Blocked403
from utils.vmd_utils import DummyQueue, append_date_days
from typing import Dict, Iterator, List, Optional
import dateutil
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache
import datetime
from scraper.pattern.vaccine import Vaccine, get_vaccine_name

PLATFORM = "bimedoc".lower()

PLATFORM_CONF = get_conf_platform("bimedoc")
PLATFORM_ENABLED = PLATFORM_CONF.get("enabled", False)

BIMEDOC_HEADERS = {"Authorization": f'Partner {os.environ.get("BIMEDOC_API_KEY", "")}'}


BIMEDOC_APIs = PLATFORM_CONF.get("api", "")

SCRAPER_CONF = PLATFORM_CONF.get("center_scraper", {})
CENTER_LIST_URL = PLATFORM_CONF.get("api", {}).get("center_list", {})

NUMBER_OF_SCRAPED_DAYS = get_config().get("scrape_on_n_days", 28)


timeout = httpx.Timeout(PLATFORM_CONF.get("timeout", 30), connect=PLATFORM_CONF.get("timeout", 30))
Example #14
0
from urllib import parse as urlparse
from urllib.parse import quote, parse_qs
from typing import List, Optional, Tuple
from scraper.profiler import Profiling
from scraper.pattern.vaccine import get_vaccine_name
from scraper.pattern.scraper_request import ScraperRequest
from scraper.maiia.maiia_utils import get_paged, MAIIA_LIMIT, DEFAULT_CLIENT
from utils.vmd_config import get_conf_platform, get_config, get_conf_outputs
from utils.vmd_utils import DummyQueue
from scraper.creneaux.creneau import Creneau, Lieu, Plateforme, PasDeCreneau
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache

PLATFORM = "maiia".lower()
PLATFORM_CONF = get_conf_platform("maiia")
PLATFORM_API = PLATFORM_CONF.get("api", {})
PLATFORM_ENABLED = PLATFORM_CONF.get("enabled", False)
PLATFORM_SCRAPER = PLATFORM_CONF.get("center_scraper", {})

# timeout = httpx.Timeout(PLATFORM_CONF.get("timeout", 25), connect=PLATFORM_CONF.get("timeout", 25))

logger = logging.getLogger("scraper")
paris_tz = timezone("Europe/Paris")

MAIIA_URL = PLATFORM_CONF.get("base_url")
NUMBER_OF_SCRAPED_DAYS = get_config().get("scrape_on_n_days", 28)

MAIIA_DOSES = PLATFORM_SCRAPER.get("dose_types")

MAIIA_DO_NOT_SCRAP_NAME = PLATFORM_SCRAPER.get("excluded_names", [])
import time
from math import floor
from typing import Optional, Union, Iterable, List
from urllib.parse import urlsplit, parse_qs
import datetime as dt
from dateutil.parser import isoparse
from pytz import timezone
import httpx

from scraper.keldoc.keldoc_filters import parse_keldoc_availability
from scraper.keldoc.keldoc_routes import API_KELDOC_CALENDAR, API_KELDOC_CENTER, API_KELDOC_CABINETS
from scraper.pattern.scraper_request import ScraperRequest
from scraper.pattern.center_info import get_vaccine_name, Vaccine, INTERVAL_SPLIT_DAYS, CHRONODOSES
from utils.vmd_config import get_conf_platform

KELDOC_CONF = get_conf_platform("keldoc")
timeout = httpx.Timeout(KELDOC_CONF.get("timeout", 25),
                        connect=KELDOC_CONF.get("timeout", 25))
KELDOC_HEADERS = {
    "User-Agent": os.environ.get("KELDOC_API_KEY", ""),
}
# 16 days is enough for now, due to recent issues with Keldoc API
KELDOC_SLOT_PAGES = KELDOC_CONF.get("pagination", {}).get("pages", 2)
KELDOC_DAYS_PER_PAGE = KELDOC_CONF.get("pagination", {}).get("days", 4)
KELDOC_SLOT_TIMEOUT = KELDOC_CONF.get("timeout", 20)
DEFAULT_CLIENT = httpx.Client(timeout=timeout, headers=KELDOC_HEADERS)
logger = logging.getLogger("scraper")
paris_tz = timezone("Europe/Paris")


class KeldocCenter:
Example #16
0
from pytz import timezone

import requests
from dateutil.parser import isoparse
from urllib import parse as urlparse
from urllib.parse import quote, parse_qs
from typing import List, Optional, Tuple

from scraper.profiler import Profiling
from scraper.pattern.center_info import INTERVAL_SPLIT_DAYS, CHRONODOSES
from scraper.pattern.vaccine import get_vaccine_name
from scraper.pattern.scraper_request import ScraperRequest
from scraper.maiia.maiia_utils import get_paged, MAIIA_LIMIT, DEFAULT_CLIENT
from utils.vmd_config import get_conf_platform, get_config

MAIIA_CONF = get_conf_platform("maiia")
MAIIA_API = MAIIA_CONF.get("api", {})
MAIIA_ENABLED = MAIIA_CONF.get("enabled", False)
MAIIA_SCRAPER = MAIIA_CONF.get("center_scraper", {})

# timeout = httpx.Timeout(MAIIA_CONF.get("timeout", 25), connect=MAIIA_CONF.get("timeout", 25))

logger = logging.getLogger("scraper")
paris_tz = timezone("Europe/Paris")

MAIIA_URL = MAIIA_CONF.get("base_url")
MAIIA_DAY_LIMIT = MAIIA_CONF.get("calendar_limit", 50)


def parse_slots(slots: list) -> Optional[dt.datetime]:
    if not slots:
Example #17
0
from dateutil.parser import isoparse
from pytz import timezone
from urllib.parse import parse_qs
from bs4 import BeautifulSoup
from pathlib import Path
from urllib import parse
from typing import Optional

from scraper.pattern.scraper_request import ScraperRequest
from scraper.pattern.scraper_result import DRUG_STORE
from scraper.pattern.center_info import get_vaccine_name, Vaccine, INTERVAL_SPLIT_DAYS, CHRONODOSES
from utils.vmd_config import get_conf_platform
from utils.vmd_utils import departementUtils
from scraper.profiler import Profiling

MAPHARMA_CONF = get_conf_platform("mapharma")
MAPHARMA_API = MAPHARMA_CONF.get("api", {})
MAPHARMA_ENABLED = MAPHARMA_CONF.get("enabled", False)

# timeout = httpx.Timeout(MAPHARMA_CONF.get("timeout", 25), connect=MAPHARMA_CONF.get("timeout", 25))

MAPARMA_REFERER = MAPHARMA_CONF.get("headers", {}).get("referer", {})
MAPHARMA_HEADERS = {
    "User-Agent": os.environ.get("MAPHARMA_API_KEY", ""),
    "Referer": MAPARMA_REFERER
}

MAPHARMA_FILTERS = MAPHARMA_CONF.get("filters", {})
MAPHARMA_CAMPAGNES_VALIDES = MAPHARMA_CONF.get("valid_campaigns", [])
MAPHARMA_CAMPAGNES_INVALIDES = MAPHARMA_CONF.get("invalid_campaigns", [])