Esempio n. 1
0
def test_import_departements():
    departements = departementUtils.import_departements()

    assert len(departements) == 101
    assert departements[:3] == ["01", "02", "03"]
    assert departements[83] == "83"
    assert departements[-1] == "976"
    assert departements.index("2A") == 28
    assert sorted(departements) == departements
Esempio n. 2
0
def test_export_data(tmp_path):
    centres_cherchés_dict = [
        {
            "departement": "01",
            "nom": "Bugey Sud",
            "url": "https://example.com/bugey-sud",
            "plateforme": "Doctolib",
            "prochain_rdv": "2021-04-10T00:00:00",
            "location": None,
            "metadata": None,
            "type": None,
            "appointment_count": 1,
            "internal_id": None
        },
        {
            "departement": "59",
            "nom": "CH Armentières",
            "url": "https://example.com/ch-armentieres",
            "plateforme": "Keldoc",
            "prochain_rdv": "2021-04-11:00:00",
            "erreur": None,
            "location": None,
            "metadata": None,
            "type": None,
            "appointment_count": 1,
            "internal_id": None
        },
        {
            "departement": "59",
            "nom": "Clinique du Cambresis",
            "url": "https://example.com/clinique-du-cambresis",
            "plateforme": "Maiia",
            "prochain_rdv": None,
            "erreur": None,
            "location": None,
            "metadata": None,
            "type": None,
            "appointment_count": 1,
            "internal_id": None
        },
        {
            "departement": "92",
            "nom": "Médiathèque Jacques GAUTIER",
            "url": "https://example.com/mediatheque-jacques-gautier",
            "plateforme": "Maiia",
            "prochain_rdv": "2021-04-11:00:00",
            "erreur": None,
            "location": None,
            "metadata": None,
            "type": None,
            "appointment_count": 0,
            "internal_id": None
        },
        {
            # Unknown departement (edge case) => should be skipped w/o failing
            "departement": "1234",
            "nom": "Hôpital magique",
            "url": "https://example.com/hopital-magique",
            "plateforme": "Doctolib",
            "prochain_rdv": "2021-04-12:00:00",
            "erreur": None,
            "location": None,
            "metadata": None,
            "type": None,
            "appointment_count": 1,
            "internal_id": None
        },
    ]
    centres_cherchés = [
        dict_to_center_info(center) for center in centres_cherchés_dict
    ]

    for center in centres_cherchés:
        if center.nom != "Médiathèque Jacques GAUTIER":
            center.appointment_count = 1

    out_dir = tmp_path / "out"
    out_dir.mkdir()
    outpath_format = str(out_dir / "{}.json")

    fake_now = dt.datetime(2021, 4, 4)
    get_start_date()
    with mock_datetime_now(fake_now):
        export_data(centres_cherchés, outpath_format=outpath_format)

    # All departements for which we don't have data should be empty.
    for departement in departementUtils.import_departements():
        if departement in ("01", "59", "92"):
            continue
        content = json.loads((out_dir / f"{departement}.json").read_text())
        assert content == {
            "version": 1,
            "centres_disponibles": [],
            "centres_indisponibles": [],
            "last_updated": "2021-04-04T00:00:00",
        }

    # Departements 01 and 59 should contain expected data.

    content = json.loads((out_dir / "01.json").read_text())
    assert content == {
        "version":
        1,
        "centres_disponibles": [
            {
                "departement": "01",
                "nom": "Bugey Sud",
                "url": "https://example.com/bugey-sud",
                "plateforme": "Doctolib",
                "prochain_rdv": "2021-04-10T00:00:00",
                "location": None,
                "metadata": None,
                "type": None,
                "appointment_by_phone_only": False,
                "appointment_count": 1,
                "internal_id": None,
                "appointment_by_phone_only": False,
                "vaccine_type": None,
                "erreur": None
            },
        ],
        "centres_indisponibles": [],
        "last_updated":
        "2021-04-04T00:00:00",
    }

    content = json.loads((out_dir / "59.json").read_text())
    assert content == {
        "version":
        1,
        "centres_disponibles": [
            {
                "departement": "59",
                "nom": "CH Armentières",
                "url": "https://example.com/ch-armentieres",
                "plateforme": "Keldoc",
                "prochain_rdv": "2021-04-11:00:00",
                "location": None,
                "metadata": None,
                "appointment_by_phone_only": False,
                "type": None,
                "appointment_count": 1,
                "internal_id": None,
                "vaccine_type": None,
                "erreur": None
            },
        ],
        "centres_indisponibles": [{
            "departement": "59",
            "nom": "Clinique du Cambresis",
            "url": "https://example.com/clinique-du-cambresis",
            "plateforme": "Maiia",
            "prochain_rdv": None,
            "location": None,
            "metadata": None,
            "type": None,
            "appointment_count": 1,
            "internal_id": None,
            "appointment_by_phone_only": False,
            "vaccine_type": None,
            "erreur": None
        }],
        "last_updated":
        "2021-04-04T00:00:00",
    }

    content = json.loads((out_dir / "92.json").read_text())
    assert content == {
        "version":
        1,
        "centres_disponibles": [],
        "centres_indisponibles": [
            {
                "departement": "92",
                "nom": "Médiathèque Jacques GAUTIER",
                "url": "https://example.com/mediatheque-jacques-gautier",
                "location": None,
                "metadata": None,
                "prochain_rdv": "2021-04-11:00:00",
                "plateforme": "Maiia",
                "type": None,
                "appointment_by_phone_only": False,
                "appointment_count": 0,
                "internal_id": None,
                "vaccine_type": None,
                "erreur": None
            },
        ],
        "last_updated":
        "2021-04-04T00:00:00",
    }
    print(content)

    # On test l'export vers le format inscrit sur la plateforme data.gouv.fr
    content = json.loads((out_dir / "centres_open_data.json").read_text())
    assert content == [{
        "departement": "01",
        "nom": "Bugey Sud",
        "url": "https://example.com/bugey-sud",
        "plateforme": "Doctolib"
    }, {
        "departement": "59",
        "nom": "CH Armentières",
        "url": "https://example.com/ch-armentieres",
        "plateforme": "Keldoc"
    }, {
        "departement": "59",
        "nom": "Clinique du Cambresis",
        "url": "https://example.com/clinique-du-cambresis",
        "plateforme": "Maiia"
    }, {
        "departement": "92",
        "nom": "Médiathèque Jacques GAUTIER",
        "url": "https://example.com/mediatheque-jacques-gautier",
        "plateforme": "Maiia"
    }]
Esempio n. 3
0
def export_data(centres_cherchés: Iterator[CenterInfo],
                last_scrap,
                outpath_format="data/output/{}.json"):
    compte_centres = 0
    compte_centres_avec_dispo = 0
    bloqués_doctolib = 0
    centres_open_data = []
    internal_ids = []
    par_departement = {
        code: {
            "version":
            1,
            "last_updated":
            dt.datetime.now(tz=pytz.timezone("Europe/Paris")).isoformat(),
            "last_scrap":
            last_scrap,
            "centres_disponibles": [],
            "centres_indisponibles": [],
        }
        for code in departementUtils.import_departements()
    }

    blocklist = get_blocklist_urls()
    # This should be duplicate free, they are already checked in
    is_blocked_center = lambda center: (is_reserved_center(center) or
                                        is_in_blocklist(center, blocklist))

    for centre in centres_cherchés:
        if is_blocked_center(centre):
            if centre.has_available_appointments():
                logger.warn(
                    f"{centre.nom} {centre.internal_id} has available appointments but is blocked"
                )
            continue

        compte_centres += 1

        centre.nom = centre.nom.strip()
        if centre.departement not in par_departement:
            logger.warning(
                f"Center {centre.nom} ({centre.departement}) could not be attached to a valid department"
            )
            continue
        erreur = centre.erreur
        if centre.internal_id and centre.internal_id in internal_ids:  # pragma: no cover
            logger.warning(
                f"Found a duplicated internal_id: {centre.nom} ({centre.departement}) -> {centre.internal_id}"
            )
            continue
        internal_ids.append(centre.internal_id)
        skipped_keys = [
            "prochain_rdv",
            "internal_id",
            "metadata",
            "location",
            "appointment_count",
            "appointment_schedules",
            "erreur",
            "ville",
            "type",
            "vaccine_type",
            "appointment_by_phone_only",
            "last_scan_with_availabilities",
        ]
        centres_open_data.append(copy_omit_keys(centre.default(),
                                                skipped_keys))

        if centre.has_available_appointments():
            compte_centres_avec_dispo += 1
            par_departement[centre.departement]["centres_disponibles"].append(
                centre.default())
        else:
            par_departement[
                centre.departement]["centres_indisponibles"].append(
                    centre.default())
            if isinstance(erreur, BlockedByDoctolibError):
                par_departement[centre.departement]["doctolib_bloqué"] = True
                bloqués_doctolib += 1

    outpath = outpath_format.format("info_centres")
    with open(outpath, "w") as info_centres:
        json.dump(par_departement, info_centres, indent=2)

    outpath = outpath_format.format("centres_open_data")
    with open(outpath, "w") as centres_file:
        json.dump(centres_open_data, centres_file, indent=2)

    for departement, disponibilités in par_departement.items():
        disponibilités["last_updated"] = dt.datetime.now(
            tz=pytz.timezone("Europe/Paris")).isoformat()
        if "centres_disponibles" in disponibilités:
            disponibilités["centres_disponibles"] = sorted(deduplicates_names(
                disponibilités["centres_disponibles"]),
                                                           key=sort_center)
        disponibilités["centres_indisponibles"] = deduplicates_names(
            disponibilités["centres_indisponibles"])
        outpath = outpath_format.format(departement)
        logger.debug(f"writing result to {outpath} file")
        with open(outpath, "w") as outfile:
            outfile.write(json.dumps(disponibilités, indent=2))

    return compte_centres, compte_centres_avec_dispo, bloqués_doctolib
Esempio n. 4
0
def export_data(centres_cherchés, outpath_format='data/output/{}.json'):
    compte_centres = 0
    compte_centres_avec_dispo = 0
    bloqués_doctolib = 0
    centres_open_data = []
    par_departement = {
        code: {
            'version':
            1,
            'last_updated':
            dt.datetime.now(tz=pytz.timezone('Europe/Paris')).isoformat(),
            'centres_disponibles': [],
            'centres_indisponibles': []
        }
        for code in departementUtils.import_departements()
    }

    # This should be duplicate free, they are already checked in
    for centre in centres_cherchés:
        centre.nom = centre.nom.strip()

        if is_reserved_center(centre):
            continue
        compte_centres += 1
        code_departement = centre.departement

        if code_departement not in par_departement:
            logger.warning(
                f"le centre {centre.nom} ({code_departement}) n'a pas pu être rattaché à un département connu"
            )
            continue
        erreur = centre.erreur
        centres_open_data.append(
            copy_omit_keys(centre.default(), [
                'prochain_rdv', 'internal_id', 'metadata', 'location',
                'appointment_count', 'erreur', 'ville', 'type', 'vaccine_type'
            ]))
        if centre.prochain_rdv is None or centre.appointment_count == 0:
            par_departement[code_departement]['centres_indisponibles'].append(
                centre.default())
            if isinstance(erreur, BlockedByDoctolibError):
                par_departement[code_departement]['doctolib_bloqué'] = True
                bloqués_doctolib += 1
        else:
            compte_centres_avec_dispo += 1
            par_departement[code_departement]['centres_disponibles'].append(
                centre.default())

    outpath = outpath_format.format("info_centres")
    with open(outpath, "w") as info_centres:
        json.dump(par_departement, info_centres, indent=2)

    outpath = outpath_format.format("centres_open_data")
    with open(outpath, 'w') as centres_file:
        json.dump(centres_open_data, centres_file, indent=2)

    for code_departement, disponibilités in par_departement.items():
        disponibilités['last_updated'] = dt.datetime.now(
            tz=pytz.timezone('Europe/Paris')).isoformat()
        if 'centres_disponibles' in disponibilités:
            disponibilités['centres_disponibles'] = sorted(deduplicates_names(
                disponibilités['centres_disponibles']),
                                                           key=sort_center)
        disponibilités["centres_indisponibles"] = deduplicates_names(
            disponibilités['centres_indisponibles'])
        outpath = outpath_format.format(code_departement)
        logger.debug(f'writing result to {outpath} file')
        with open(outpath, "w") as outfile:
            outfile.write(json.dumps(disponibilités, indent=2))

    return compte_centres, compte_centres_avec_dispo, bloqués_doctolib