Ejemplo n.º 1
0
def centre_iterator(overwrite_centers_file=True):
    if not MAIIA_ENABLED:
        return None
    try:
        center_path = MAIIA_SCRAPER.get("result_path")
        data_auto = get_config().get("data-auto", {}).get("base_url")
        url = f"{data_auto}{center_path}"
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        if overwrite_centers_file:
            with open(center_path, "w") as f:
                f.write(json.dumps(data, indent=2))
        logger.info(f"Found {len(data)} Maiia centers (external scraper).")
        for center in data:
            yield center
    except Exception as e:
        logger.warning(f"Unable to scrape Maiia centers: {e}")
Ejemplo n.º 2
0
from enum import Enum
from typing import Optional

from utils.vmd_config import get_config

VACCINE_CONF = get_config().get("vaccines", {})


class Vaccine(str, Enum):
    PFIZER = "Pfizer-BioNTech"
    MODERNA = "Moderna"
    ASTRAZENECA = "AstraZeneca"
    JANSSEN = "Janssen"
    ARNM = "ARNm"


VACCINES_NAMES = {
    Vaccine.PFIZER: VACCINE_CONF.get(Vaccine.PFIZER, []),
    Vaccine.MODERNA: VACCINE_CONF.get(Vaccine.MODERNA, []),
    Vaccine.ARNM: VACCINE_CONF.get(Vaccine.ARNM, []),
    Vaccine.ASTRAZENECA: VACCINE_CONF.get(Vaccine.ASTRAZENECA, []),
    Vaccine.JANSSEN: VACCINE_CONF.get(Vaccine.JANSSEN, []),
}


def get_vaccine_name(name: Optional[str], fallback: Optional[Vaccine] = None) -> Optional[Vaccine]:
    if not name:
        return fallback
    name = name.lower().strip()
    for vaccine, vaccine_names in VACCINES_NAMES.items():
        for vaccine_name in vaccine_names:
Ejemplo n.º 3
0
    is_category_relevant,
)
from scraper.pattern.vaccine import get_vaccine_name, get_doctolib_vaccine_name, Vaccine
from scraper.pattern.scraper_request import ScraperRequest
from scraper.error import Blocked403, DoublonDoctolib, RequestError
from utils.vmd_config import get_conf_outputs, get_conf_platform, get_config
from utils.vmd_utils import DummyQueue
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache

# PLATFORM MUST BE LOW, PLEASE LET THE "lower()" IN CASE OF BAD INPUT FORMAT.
PLATFORM = "doctolib".lower()

PLATFORM_CONF = get_conf_platform(PLATFORM)
PLATFORM_ENABLED = PLATFORM_CONF.get("enabled", True)
SCRAPE_ONLY_ATLAS = get_config().get("scrape_only_atlas_centers", False)

NUMBER_OF_SCRAPED_DAYS = get_config().get("scrape_on_n_days", 28)
PLATFORM_DAYS_PER_PAGE = PLATFORM_CONF.get("days_per_page", 7)
if NUMBER_OF_SCRAPED_DAYS % PLATFORM_DAYS_PER_PAGE == 0:
    PLATFORM_PAGES_NUMBER = int(NUMBER_OF_SCRAPED_DAYS /
                                PLATFORM_DAYS_PER_PAGE)
else:
    PLATFORM_PAGES_NUMBER = (NUMBER_OF_SCRAPED_DAYS //
                             PLATFORM_DAYS_PER_PAGE) + 1

PLATFORM_TIMEOUT = PLATFORM_CONF.get("timeout", 10)
PLATFORM_REQUEST_SLEEP = PLATFORM_CONF.get("request_sleep", 0.1)
timeout = httpx.Timeout(PLATFORM_TIMEOUT, connect=PLATFORM_TIMEOUT)

DOCTOLIB_HEADERS = {
Ejemplo n.º 4
0
import logging
from typing import List
from urllib.parse import urlparse, urlencode, urlunparse, parse_qs, unquote
import datetime as dt
import pytz
import requests

import time
from datetime import date, timedelta, datetime

from pathlib import Path
from unidecode import unidecode

from utils.vmd_config import get_conf_inputs, get_config

RESERVED_CENTERS = get_config().get("reserved_centers", [])


def load_insee() -> dict:
    with open(get_conf_inputs().get("postalcode_to_insee")) as json_file:
        return json.load(json_file)


def load_cedex_to_insee() -> dict:
    with open(get_conf_inputs().get("cedex_to_insee")) as json_file:
        return json.load(json_file)


logger = logging.getLogger("scraper")
insee = load_insee()
cedex_to_insee = load_cedex_to_insee()
import json
import logging
from datetime import datetime

import pytz
import requests

from pathlib import Path
from stats_generation.stats_center_types import generate_stats_center_types
from stats_generation.stats_map import make_maps
from utils.vmd_config import get_conf_outstats, get_conf_outputs, get_config, get_conf_inputs
from utils.vmd_logger import enable_logger_for_production

logger = logging.getLogger("scraper")

DATA_AUTO = get_config().get("base_urls").get("gitlab_public_path")


def generate_stats_date(centres_stats):
    stats_path = get_conf_inputs().get("from_gitlab_public").get("by_date")
    stats_data = {
        "dates": [],
        "total_centres_disponibles": [],
        "total_centres": [],
        "total_appointments": [],
    }

    try:
        history_rq = requests.get(f"{DATA_AUTO}{stats_path}")
        data = history_rq.json()
        if data:
Ejemplo n.º 6
0
from scraper.pattern.vaccine import Vaccine, get_vaccine_name
from utils.vmd_config import get_conf_platform, get_config
from utils.vmd_utils import departementUtils, DummyQueue


AVECMONDOC_CONF = get_conf_platform("avecmondoc")
AVECMONDOC_ENABLED = AVECMONDOC_CONF.get("enabled", False)
AVECMONDOC_API = AVECMONDOC_CONF.get("api", {})
AVECMONDOC_SCRAPER = AVECMONDOC_CONF.get("center_scraper", {})
AVECMONDOC_FILTERS = AVECMONDOC_CONF.get("filters", {})
AVECMONDOC_VALID_REASONS = AVECMONDOC_FILTERS.get("valid_reasons", [])
AVECMONDOC_HEADERS = {
    "User-Agent": os.environ.get("AVECMONDOC_API_KEY", ""),
}

NUMBER_OF_SCRAPED_DAYS = get_config().get("scrape_on_n_days", 28)
AVECMONDOC_DAYS_PER_PAGE = AVECMONDOC_CONF.get("days_per_page", 7)

timeout = httpx.Timeout(AVECMONDOC_CONF.get("timeout", 25), connect=AVECMONDOC_CONF.get("timeout", 25))
DEFAULT_CLIENT = httpx.Client(headers=AVECMONDOC_HEADERS, timeout=timeout)
logger = logging.getLogger("scraper")
paris_tz = timezone("Europe/Paris")


def search(client: httpx.Client = DEFAULT_CLIENT) -> Optional[list]:
    url = AVECMONDOC_API.get("search", "")
    limit = AVECMONDOC_API.get("search_page_size", 10)
    page = 1
    result = {"data": [], "hasNextPage": True}
    while result["hasNextPage"]:
        payload = {"limit": limit, "page": page}
Ejemplo n.º 7
0
from cachecontrol.caches.file_cache import FileCache

PLATFORM = "mesoigner"

PLATFORM_CONF = get_conf_platform("mesoigner")
PLATFORM_ENABLED = PLATFORM_CONF.get("enabled", False)
MESOIGNER_HEADERS = {
    "Authorization":
    f'Mesoigner apikey="{os.environ.get("MESOIGNER_API_KEY", "")}"',
}
MESOIGNER_APIs = PLATFORM_CONF.get("api", "")

SCRAPER_CONF = PLATFORM_CONF.get("center_scraper", {})
CENTER_LIST_URL = PLATFORM_CONF.get("api", {}).get("center_list", {})

BOOSTER_VACCINES = get_config().get("vaccines_allowed_for_booster", [])

timeout = httpx.Timeout(PLATFORM_CONF.get("timeout", 30),
                        connect=PLATFORM_CONF.get("timeout", 30))

if os.getenv("WITH_TOR", "no") == "yes":
    session = requests.Session()
    session.proxies = {  # type: ignore
        "http": "socks5://127.0.0.1:9050",
        "https": "socks5://127.0.0.1:9050",
    }
    DEFAULT_CLIENT = session  # type: ignore
else:
    DEFAULT_CLIENT = httpx.Client(timeout=timeout)

logger = logging.getLogger("scraper")
Ejemplo n.º 8
0
MAPHARMA_ENABLED = MAPHARMA_CONF.get("enabled", False)

# timeout = httpx.Timeout(MAPHARMA_CONF.get("timeout", 25), connect=MAPHARMA_CONF.get("timeout", 25))

MAPARMA_REFERER = MAPHARMA_CONF.get("headers", {}).get("referer", {})
MAPHARMA_HEADERS = {"User-Agent": os.environ.get("MAPHARMA_API_KEY", ""), "Referer": MAPARMA_REFERER}

MAPHARMA_FILTERS = MAPHARMA_CONF.get("filters", {})
MAPHARMA_CAMPAGNES_VALIDES = MAPHARMA_CONF.get("valid_campaigns", [])
MAPHARMA_CAMPAGNES_INVALIDES = MAPHARMA_CONF.get("invalid_campaigns", [])

MAPHARMA_PATHS = MAPHARMA_CONF.get("paths", {})
MAPHARMA_OPEN_DATA_FILE = Path(MAPHARMA_PATHS.get("opendata", ""))
MAPHARMA_OPEN_DATA_URL = MAPHARMA_API.get("opendata", "")
MAPHARMA_OPEN_DATA_URL_FALLBACK = MAPHARMA_API.get("opendata_fallback", "")
NUMBER_OF_SCRAPED_DAYS = get_config().get("scrape_on_n_days", 28)

BOOSTER_VACCINES = get_config().get("vaccines_allowed_for_booster", [])

DEFAULT_CLIENT = httpx.Client(headers=MAPHARMA_HEADERS)
logger = logging.getLogger("scraper")
paris_tz = timezone("Europe/Paris")

campagnes_valides = []
campagnes_inconnues = []
opendata = []


def get_possible_dose_numbers(vaccine_list: list):
    if not vaccine_list:
        return []
Ejemplo n.º 9
0
import pytz

from utils.vmd_config import get_config
from utils.vmd_utils import departementUtils
from scraper.pattern.center_location import CenterLocation
from scraper.pattern.scraper_result import ScraperResult
from scraper.pattern.vaccine import Vaccine

from utils.vmd_utils import urlify, format_phone_number
from utils.vmd_logger import get_logger

logger = get_logger()

# Schedules array for appointments by interval
INTERVAL_SPLIT_DAYS = get_config().get("appointment_split_days", [])

# Array for CHRONODOSES parameters
CHRONODOSE_CONF = get_config().get("chronodoses", {})
CHRONODOSES = {
    "Vaccine": CHRONODOSE_CONF.get("vaccine", []),
    "Interval": CHRONODOSE_CONF.get("interval", 0)
}


class CenterInfo:
    def __init__(
        self,
        departement: str,
        nom: str,
        url: str,
Ejemplo n.º 10
0
import dateutil
from dateutil.tz import gettz
from datetime import datetime, timedelta
from typing import Iterator, Union
from .resource import Resource
from scraper.creneaux.creneau import Creneau, Lieu, Plateforme, PasDeCreneau
from utils.vmd_config import get_config

DEFAULT_NEXT_DAYS = get_config().get("scrape_on_n_days", 7)

DEFAULT_TAGS = {"all": [lambda creneau: True]}


class ResourceCreneauxQuotidiens(Resource):
    def __init__(self,
                 departement,
                 next_days=DEFAULT_NEXT_DAYS,
                 now=datetime.now,
                 tags=DEFAULT_TAGS):
        super().__init__()
        self.departement = departement
        self.now = now
        self.next_days = next_days
        today = now(tz=gettz("Europe/Paris"))
        self.dates = {}
        for days_from_now in range(0, next_days + 1):
            day = today + timedelta(days=days_from_now)
            date = as_date(day)
            self.dates[date] = ResourceCreneauxParDate(date=date, tags=tags)

    def on_creneau(self, creneau: Union[Creneau, PasDeCreneau]):
Ejemplo n.º 11
0
from scraper.circuit_breaker import ShortCircuit
from scraper.creneaux.creneau import Creneau, Lieu, Plateforme, PasDeCreneau
import json
import requests
from cachecontrol import CacheControl
from cachecontrol.caches.file_cache import FileCache

# PLATFORM MUST BE LOW, PLEASE LET THE "lower()" IN CASE OF BAD INPUT FORMAT.
PLATFORM = "keldoc".lower()

PLATFORM_CONF = get_conf_platform("keldoc")
PLATFORM_ENABLED = PLATFORM_CONF.get("enabled", False)

PLATFORM_TIMEOUT = PLATFORM_CONF.get("timeout", 25)

SCRAPE_ONLY_ATLAS = get_config().get("scrape_only_atlas_centers", False)

timeout = httpx.Timeout(PLATFORM_TIMEOUT, connect=PLATFORM_TIMEOUT)
# change KELDOC_KILL_SWITCH to True to bypass Keldoc scraping

KELDOC_HEADERS = {
    "User-Agent": os.environ.get("KELDOC_API_KEY", ""),
}
session = httpx.Client(timeout=timeout, headers=KELDOC_HEADERS)
logger = logging.getLogger("scraper")


# Allow 10 bad runs of keldoc_slot before giving up for the 200 next tries
#@ShortCircuit("keldoc_slot", trigger=10, release=200, time_limit=40.0)
#@Profiling.measure("keldoc_slot")
def fetch_slots(request: ScraperRequest, creneau_q=DummyQueue()):