Example #1
0
def get_last_scans(centres):
    url = get_conf_inputs().get("last_scans")
    last_scans = {}
    liste_centres = []

    for centre in centres:
        liste_centres.append(centre)

    try:
        response = requests.get(url)
        response.raise_for_status()
        info_centres = response.json()

    except Exception as e:
        logger.warning(f"Impossible de récupérer le fichier info_centres: {e}")
        info_centres = {}

    for last_centres in info_centres.values():
        for centre in last_centres["centres_disponibles"] + last_centres[
                "centres_indisponibles"]:
            if "last_scan_with_availabilities" in centre:
                last_scans[
                    centre["url"]] = centre["last_scan_with_availabilities"]

    for centre in liste_centres:
        if not centre.prochain_rdv:
            if centre.url in last_scans:
                centre.last_scan_with_availabilities = last_scans[centre.url]
        else:
            centre.last_scan_with_availabilities = dt.datetime.now(
                tz=pytz.timezone("Europe/Paris")).isoformat()

    return liste_centres
Example #2
0
    def to_departement_number(insee_code: str) -> str:
        """
        Renvoie le numéro de département correspondant au code INSEE d'une commune.

        Le code INSEE est un code à 5 chiffres, qui est typiquement différent du code postal,
        mais qui commence (en général) aussi par les 2 chiffres du département.

        >>> to_departement_number('59350')  # Lille
        '59'
        >>> to_departement_number('75106')  # Paris 6e arr
        '75'
        >>> to_departement_number('97701')  # Saint-Barthélémy
        '971'
        """
        insee_code = insee_code.strip()
        if len(insee_code) == 4:
            # Quand le CSV des centres de vaccinations est édité avec un tableur comme Excel,
            # il est possible que le 1er zéro soit retiré si la colonne est interprétée comme
            # un nombre (par ex 02401 devient 2401, mais on veut 02401 au complet).
            insee_code = insee_code.zfill(5)

        if len(insee_code) != 5:
            raise ValueError(f"Code INSEE non-valide : {insee_code}")

        with open(get_conf_inputs().get(
                "insee_to_postalcode_and_dep")) as json_file:
            insee_to_code_departement_table = json.load(json_file)

        if insee_code in insee_to_code_departement_table:
            return insee_to_code_departement_table[insee_code]["departement"]

        else:
            raise ValueError(
                f"Code INSEE absent de la base des codes INSEE : {insee_code}")
Example #3
0
def get_blocklist_urls() -> set:
    path_blocklist = get_conf_inputs().get("blocklist")
    centers_blocklist_urls = set([
        center["url"]
        for center in json.load(open(path_blocklist))["centers_not_displayed"]
    ])
    return centers_blocklist_urls
def generate_stats_date(centres_stats):
    stats_path = get_conf_inputs().get("from_gitlab_public").get("by_date")
    stats_data = {
        "dates": [],
        "total_centres_disponibles": [],
        "total_centres": [],
        "total_appointments": [],
    }

    try:
        history_rq = requests.get(f"{DATA_AUTO}{stats_path}")
        data = history_rq.json()
        if data:
            stats_data = data
    except Exception:
        logger.warning(
            f"Unable to fetch {DATA_AUTO}{stats_path}: generating a template file."
        )
    ctz = pytz.timezone("Europe/Paris")
    current_time = datetime.now(tz=ctz).strftime("%Y-%m-%d %H:00:00")
    if current_time in stats_data["dates"]:
        with open(Path("data", "output", stats_path), "w") as stat_graph_file:
            json.dump(stats_data, stat_graph_file)
        logger.info(f"Stats file already updated: {stats_path}")
        return
    data_alldep = centres_stats["tout_departement"]
    stats_data["dates"].append(current_time)
    stats_data["total_centres_disponibles"].append(data_alldep["disponibles"])
    stats_data["total_centres"].append(data_alldep["total"])
    stats_data["total_appointments"].append(data_alldep["creneaux"])

    with open(Path("data", "output", stats_path), "w") as stat_graph_file:
        json.dump(stats_data, stat_graph_file)
    logger.info(f"Updated stats file: {stats_path}")
Example #5
0
def parse_atlas():
    url = get_conf_inputs().get("from_data_gouv_website").get("centers_gouv")
    data = requests.get(url).json()
    keldoc_gouv_centers = {}
    for center in data["features"]:
        centre_pro = center["properties"].get("c_reserve_professionels_sante",
                                              False)
        url = center["properties"].get("c_rdv_site_web", None)
        id_adresse = center["properties"].get("c_id_adr", None)
        gid = center["properties"].get("c_gid", None)

        if centre_pro:
            continue
        if not url:
            continue
        if not gid:
            continue

        if not "keldoc" in url:
            continue
        if "redirect" in url:
            parsed = parse.parse_qs(parse.urlparse(
                center["properties"]["c_rdv_site_web"]).query,
                                    keep_blank_values=True)
            url = f'http://keldoc.com/{parsed["dom"][0]}/{parsed["inst"][0]}/{parsed["user"][0]}'

        end_url = f'{parse.urlsplit(url).path.split("/")[3]}'

        keldoc_gouv_centers[gid] = {
            "url_end": end_url,
            "id_adresse": id_adresse
        }
    return keldoc_gouv_centers
def parse_atlas():
    url = get_conf_inputs().get("from_data_gouv_website").get("centers_gouv")
    data = requests.get(url).json()
    doctolib_gouv_centers = {}
    for center in data["features"]:
        centre_pro = center["properties"].get("c_reserve_professionels_sante",
                                              False)
        url = center["properties"].get("c_rdv_site_web", None)
        id_adresse = center["properties"].get("c_id_adr", None)
        gid = center["properties"].get("c_gid", None)

        if centre_pro:
            continue
        if not url:
            continue
        if not gid:
            continue
        if not "doctolib" in url:
            continue
        end_url = f'{parse.urlsplit(url).path.split("/")[-1]}'

        doctolib_gouv_centers[gid] = {
            "url_end": end_url,
            "id_adresse": id_adresse
        }
    return doctolib_gouv_centers
Example #7
0
def get_departements(excluded_departments: List[str] = []) -> List[str]:
    with open(get_conf_inputs()["from_main_branch"]["departements"],
              encoding="utf8",
              newline="\n") as csvfile:
        reader = csv.DictReader(csvfile)
        departements = [
            row["nom_departement"] for row in reader
            if row["nom_departement"] not in excluded_departments
        ]
        return departements
def generate_stats_center_types(centres_info):
    stats_path = get_conf_inputs().get("from_gitlab_public").get("center_types")
    stats_data = {"dates": [], "plateformes": {}, "center_types": {}}

    try:
        history_rq = requests.get(f"{DATA_AUTO}{stats_path}")
        data = history_rq.json()
        if data:
            stats_data = data
    except Exception:
        logger.warning(f"Unable to fetch {DATA_AUTO}{stats_path}: generating a template file.")
    ctz = pytz.timezone("Europe/Paris")
    current_time = datetime.now(tz=ctz).strftime("%Y-%m-%d %H:00:00")
    if current_time in stats_data["dates"]:
        with open(f"data/output/{stats_path}", "w") as stat_graph_file:
            json.dump(stats_data, stat_graph_file)
        logger.info(f"Stats file already updated: {stats_path}")
        return

    if "center_types" not in stats_data:
        stats_data["center_types"] = {}

    stats_data["dates"].append(current_time)
    current_calc = compute_plateforme_data(centres_info)
    for plateforme in current_calc[0]:
        plateform_data = current_calc[0][plateforme]
        if plateforme not in stats_data["plateformes"]:
            stats_data["plateformes"][plateforme] = {
                "disponible": [plateform_data["disponible"]],
                "total": [plateform_data["total"]],
                "creneaux": [plateform_data["creneaux"]],
            }
            continue
        current_data = stats_data["plateformes"][plateforme]
        current_data["disponible"].append(plateform_data["disponible"])
        current_data["total"].append(plateform_data["total"])
        current_data["creneaux"].append(plateform_data["creneaux"])

    for center_type in current_calc[1]:
        center_type_data = current_calc[1][center_type]
        if center_type not in stats_data["center_types"]:
            stats_data["center_types"][center_type] = {
                "disponible": [center_type_data["disponible"]],
                "total": [center_type_data["total"]],
                "creneaux": [center_type_data["creneaux"]],
            }
            continue
        current_data = stats_data["center_types"][center_type]
        current_data["disponible"].append(center_type_data["disponible"])
        current_data["total"].append(center_type_data["total"])
        current_data["creneaux"].append(center_type_data["creneaux"])

    with open(f"data/output/{stats_path}", "w") as stat_graph_file:
        json.dump(stats_data, stat_graph_file)
    logger.info(f"Updated stats file: {stats_path}")
def get_departements():
    import csv

    # Guyane uses Maiia and does not have doctolib pages
    NOT_INCLUDED_DEPARTEMENTS = ["Guyane"]
    with open(get_conf_inputs().get("departements"),
              encoding="utf8",
              newline="\n") as csvfile:
        reader = csv.DictReader(csvfile)
        departements = [str(row["nom_departement"]) for row in reader]
        [departements.remove(ndep) for ndep in NOT_INCLUDED_DEPARTEMENTS]
        return departements
Example #10
0
def get_departements() -> List[str]:
    with open(get_conf_inputs()["from_main_branch"]["departements"],
              encoding="utf8",
              newline="\n") as csvfile:
        reader = list(csv.DictReader(csvfile, delimiter=","))

        departements = [
            f'{department_urlify(row["nom_departement"])}-{row["code_departement"]}'
            if row["nom_departement"] not in KELDOC_WEIRD_DEPS else
            f'{department_urlify(KELDOC_WEIRD_DEPS[row["nom_departement"]])}-{row["code_departement"]}'
            for row in reader
        ]
        departements = departements + KELDOC_MISSING_DEPS
        return departements
    def import_departements() -> List[str]:
        """
        Renvoie la liste des codes départements.

        >>> departements = import_departements()
        >>> len(departements)
        101
        >>> departements[:3]
        ['01', '02', '03']
        >>> departements[83]
        '83'
        >>> departements.index('2A')
        28
        >>> sorted(departements) == departements
        True
        """
        with open(get_conf_inputs().get("departements"), newline="\n") as csvfile:
            reader = csv.DictReader(csvfile)
            return [str(row["code_departement"]) for row in reader]
Example #12
0
def load_cedex_to_insee() -> dict:
    with open(get_conf_inputs().get("cedex_to_insee")) as json_file:
        return json.load(json_file)
Example #13
0
def load_insee() -> dict:
    with open(get_conf_inputs().get("postalcode_to_insee")) as json_file:
        return json.load(json_file)
Example #14
0
from utils.vmd_config import get_conf_inputs
from utils.vmd_logger import enable_logger_for_debug
from utils.vmd_utils import get_departements_numbers

timeout = httpx.Timeout(30.0, connect=30.0)
DEFAULT_CLIENT = httpx.Client(timeout=timeout)
logger = logging.getLogger("scraper")

PALETTE_FB = ["#ffffff", "#eaeaea", "#cecece", "#80bdf4", "#2d8dfe"]
PALETTE_FB_RDV = [
    "#eaeaea", "#F44848", "#FF9255", "#FFD84F", "#FEE487", "#7DF0AE",
    "#27DF76", "#00B94F"
]
ECHELLE_STROKE = "#797979"
ECHELLE_FONT = "#424242"
MAP_SRC_PATH = Path(get_conf_inputs().get("from_main_branch").get("map"))
CSV_POP_URL = get_conf_inputs().get("from_main_branch").get("dep_pop")
CSV_RDV_URL = get_conf_inputs().get("from_data_gouv_website").get("rdv_gouv")
JSON_INFO_CENTRES_URL = get_conf_inputs().get("from_gitlab_public").get(
    "last_scans")


def get_pop():
    dept_pop = {}
    with open(CSV_POP_URL, encoding="utf-8", newline="") as file:
        csvreader = csv.DictReader(file, delimiter=";")
        for row in csvreader:
            dept_pop[row["dep"]] = row["departmentPopulation"]
    return dept_pop

Example #15
0
from utils.vmd_config import get_conf_inputs
from utils.vmd_logger import enable_logger_for_production, enable_logger_for_debug

timeout = httpx.Timeout(30.0, connect=30.0)
DEFAULT_CLIENT = httpx.Client(timeout=timeout)
logger = logging.getLogger("scraper")

PALETTE_FB = ["#ffffff", "#eaeaea", "#cecece", "#80bdf4", "#2d8dfe"]
PALETTE_FB_RDV = [
    "#eaeaea", "#F44848", "#FF9255", "#FFD84F", "#FEE487", "#7DF0AE",
    "#27DF76", "#00B94F"
]
ECHELLE_STROKE = "#797979"
ECHELLE_FONT = "#424242"
MAP_SRC_PATH = Path(get_conf_inputs().get("map"))
CSV_POP_URL = get_conf_inputs().get("dep_pop")
CSV_RDV_URL = get_conf_inputs().get("rdv_gouv")
JSON_INFO_CENTRES_URL = get_conf_inputs().get("last_scans")


def get_csv(url: str,
            header=True,
            delimiter=";",
            encoding="utf-8",
            client: httpx.Client = DEFAULT_CLIENT):
    try:
        r = client.get(url)
        r.raise_for_status()
    except httpx.HTTPStatusError as hex:
        logger.warning(f"{url} returned error {hex.response.status_code}")
Example #16
0
def get_departements():
    with open(get_conf_inputs()["departements"], encoding="utf8", newline="\n") as csvfile:
        reader = csv.DictReader(csvfile)
        departements = [str(row["nom_departement"]) for row in reader]
        return departements