def test_import_departements(): departements = departementUtils.import_departements() assert len(departements) == 101 assert departements[:3] == ["01", "02", "03"] assert departements[83] == "83" assert departements[-1] == "976" assert departements.index("2A") == 28 assert sorted(departements) == departements
def test_export_data(tmp_path): centres_cherchés_dict = [ { "departement": "01", "nom": "Bugey Sud", "url": "https://example.com/bugey-sud", "plateforme": "Doctolib", "prochain_rdv": "2021-04-10T00:00:00", "location": None, "metadata": None, "type": None, "appointment_count": 1, "internal_id": None }, { "departement": "59", "nom": "CH Armentières", "url": "https://example.com/ch-armentieres", "plateforme": "Keldoc", "prochain_rdv": "2021-04-11:00:00", "erreur": None, "location": None, "metadata": None, "type": None, "appointment_count": 1, "internal_id": None }, { "departement": "59", "nom": "Clinique du Cambresis", "url": "https://example.com/clinique-du-cambresis", "plateforme": "Maiia", "prochain_rdv": None, "erreur": None, "location": None, "metadata": None, "type": None, "appointment_count": 1, "internal_id": None }, { "departement": "92", "nom": "Médiathèque Jacques GAUTIER", "url": "https://example.com/mediatheque-jacques-gautier", "plateforme": "Maiia", "prochain_rdv": "2021-04-11:00:00", "erreur": None, "location": None, "metadata": None, "type": None, "appointment_count": 0, "internal_id": None }, { # Unknown departement (edge case) => should be skipped w/o failing "departement": "1234", "nom": "Hôpital magique", "url": "https://example.com/hopital-magique", "plateforme": "Doctolib", "prochain_rdv": "2021-04-12:00:00", "erreur": None, "location": None, "metadata": None, "type": None, "appointment_count": 1, "internal_id": None }, ] centres_cherchés = [ dict_to_center_info(center) for center in centres_cherchés_dict ] for center in centres_cherchés: if center.nom != "Médiathèque Jacques GAUTIER": center.appointment_count = 1 out_dir = tmp_path / "out" out_dir.mkdir() outpath_format = str(out_dir / "{}.json") fake_now = dt.datetime(2021, 4, 4) get_start_date() with mock_datetime_now(fake_now): export_data(centres_cherchés, outpath_format=outpath_format) # All departements for which we don't have data should be empty. for departement in departementUtils.import_departements(): if departement in ("01", "59", "92"): continue content = json.loads((out_dir / f"{departement}.json").read_text()) assert content == { "version": 1, "centres_disponibles": [], "centres_indisponibles": [], "last_updated": "2021-04-04T00:00:00", } # Departements 01 and 59 should contain expected data. content = json.loads((out_dir / "01.json").read_text()) assert content == { "version": 1, "centres_disponibles": [ { "departement": "01", "nom": "Bugey Sud", "url": "https://example.com/bugey-sud", "plateforme": "Doctolib", "prochain_rdv": "2021-04-10T00:00:00", "location": None, "metadata": None, "type": None, "appointment_by_phone_only": False, "appointment_count": 1, "internal_id": None, "appointment_by_phone_only": False, "vaccine_type": None, "erreur": None }, ], "centres_indisponibles": [], "last_updated": "2021-04-04T00:00:00", } content = json.loads((out_dir / "59.json").read_text()) assert content == { "version": 1, "centres_disponibles": [ { "departement": "59", "nom": "CH Armentières", "url": "https://example.com/ch-armentieres", "plateforme": "Keldoc", "prochain_rdv": "2021-04-11:00:00", "location": None, "metadata": None, "appointment_by_phone_only": False, "type": None, "appointment_count": 1, "internal_id": None, "vaccine_type": None, "erreur": None }, ], "centres_indisponibles": [{ "departement": "59", "nom": "Clinique du Cambresis", "url": "https://example.com/clinique-du-cambresis", "plateforme": "Maiia", "prochain_rdv": None, "location": None, "metadata": None, "type": None, "appointment_count": 1, "internal_id": None, "appointment_by_phone_only": False, "vaccine_type": None, "erreur": None }], "last_updated": "2021-04-04T00:00:00", } content = json.loads((out_dir / "92.json").read_text()) assert content == { "version": 1, "centres_disponibles": [], "centres_indisponibles": [ { "departement": "92", "nom": "Médiathèque Jacques GAUTIER", "url": "https://example.com/mediatheque-jacques-gautier", "location": None, "metadata": None, "prochain_rdv": "2021-04-11:00:00", "plateforme": "Maiia", "type": None, "appointment_by_phone_only": False, "appointment_count": 0, "internal_id": None, "vaccine_type": None, "erreur": None }, ], "last_updated": "2021-04-04T00:00:00", } print(content) # On test l'export vers le format inscrit sur la plateforme data.gouv.fr content = json.loads((out_dir / "centres_open_data.json").read_text()) assert content == [{ "departement": "01", "nom": "Bugey Sud", "url": "https://example.com/bugey-sud", "plateforme": "Doctolib" }, { "departement": "59", "nom": "CH Armentières", "url": "https://example.com/ch-armentieres", "plateforme": "Keldoc" }, { "departement": "59", "nom": "Clinique du Cambresis", "url": "https://example.com/clinique-du-cambresis", "plateforme": "Maiia" }, { "departement": "92", "nom": "Médiathèque Jacques GAUTIER", "url": "https://example.com/mediatheque-jacques-gautier", "plateforme": "Maiia" }]
def export_data(centres_cherchés: Iterator[CenterInfo], last_scrap, outpath_format="data/output/{}.json"): compte_centres = 0 compte_centres_avec_dispo = 0 bloqués_doctolib = 0 centres_open_data = [] internal_ids = [] par_departement = { code: { "version": 1, "last_updated": dt.datetime.now(tz=pytz.timezone("Europe/Paris")).isoformat(), "last_scrap": last_scrap, "centres_disponibles": [], "centres_indisponibles": [], } for code in departementUtils.import_departements() } blocklist = get_blocklist_urls() # This should be duplicate free, they are already checked in is_blocked_center = lambda center: (is_reserved_center(center) or is_in_blocklist(center, blocklist)) for centre in centres_cherchés: if is_blocked_center(centre): if centre.has_available_appointments(): logger.warn( f"{centre.nom} {centre.internal_id} has available appointments but is blocked" ) continue compte_centres += 1 centre.nom = centre.nom.strip() if centre.departement not in par_departement: logger.warning( f"Center {centre.nom} ({centre.departement}) could not be attached to a valid department" ) continue erreur = centre.erreur if centre.internal_id and centre.internal_id in internal_ids: # pragma: no cover logger.warning( f"Found a duplicated internal_id: {centre.nom} ({centre.departement}) -> {centre.internal_id}" ) continue internal_ids.append(centre.internal_id) skipped_keys = [ "prochain_rdv", "internal_id", "metadata", "location", "appointment_count", "appointment_schedules", "erreur", "ville", "type", "vaccine_type", "appointment_by_phone_only", "last_scan_with_availabilities", ] centres_open_data.append(copy_omit_keys(centre.default(), skipped_keys)) if centre.has_available_appointments(): compte_centres_avec_dispo += 1 par_departement[centre.departement]["centres_disponibles"].append( centre.default()) else: par_departement[ centre.departement]["centres_indisponibles"].append( centre.default()) if isinstance(erreur, BlockedByDoctolibError): par_departement[centre.departement]["doctolib_bloqué"] = True bloqués_doctolib += 1 outpath = outpath_format.format("info_centres") with open(outpath, "w") as info_centres: json.dump(par_departement, info_centres, indent=2) outpath = outpath_format.format("centres_open_data") with open(outpath, "w") as centres_file: json.dump(centres_open_data, centres_file, indent=2) for departement, disponibilités in par_departement.items(): disponibilités["last_updated"] = dt.datetime.now( tz=pytz.timezone("Europe/Paris")).isoformat() if "centres_disponibles" in disponibilités: disponibilités["centres_disponibles"] = sorted(deduplicates_names( disponibilités["centres_disponibles"]), key=sort_center) disponibilités["centres_indisponibles"] = deduplicates_names( disponibilités["centres_indisponibles"]) outpath = outpath_format.format(departement) logger.debug(f"writing result to {outpath} file") with open(outpath, "w") as outfile: outfile.write(json.dumps(disponibilités, indent=2)) return compte_centres, compte_centres_avec_dispo, bloqués_doctolib
def export_data(centres_cherchés, outpath_format='data/output/{}.json'): compte_centres = 0 compte_centres_avec_dispo = 0 bloqués_doctolib = 0 centres_open_data = [] par_departement = { code: { 'version': 1, 'last_updated': dt.datetime.now(tz=pytz.timezone('Europe/Paris')).isoformat(), 'centres_disponibles': [], 'centres_indisponibles': [] } for code in departementUtils.import_departements() } # This should be duplicate free, they are already checked in for centre in centres_cherchés: centre.nom = centre.nom.strip() if is_reserved_center(centre): continue compte_centres += 1 code_departement = centre.departement if code_departement not in par_departement: logger.warning( f"le centre {centre.nom} ({code_departement}) n'a pas pu être rattaché à un département connu" ) continue erreur = centre.erreur centres_open_data.append( copy_omit_keys(centre.default(), [ 'prochain_rdv', 'internal_id', 'metadata', 'location', 'appointment_count', 'erreur', 'ville', 'type', 'vaccine_type' ])) if centre.prochain_rdv is None or centre.appointment_count == 0: par_departement[code_departement]['centres_indisponibles'].append( centre.default()) if isinstance(erreur, BlockedByDoctolibError): par_departement[code_departement]['doctolib_bloqué'] = True bloqués_doctolib += 1 else: compte_centres_avec_dispo += 1 par_departement[code_departement]['centres_disponibles'].append( centre.default()) outpath = outpath_format.format("info_centres") with open(outpath, "w") as info_centres: json.dump(par_departement, info_centres, indent=2) outpath = outpath_format.format("centres_open_data") with open(outpath, 'w') as centres_file: json.dump(centres_open_data, centres_file, indent=2) for code_departement, disponibilités in par_departement.items(): disponibilités['last_updated'] = dt.datetime.now( tz=pytz.timezone('Europe/Paris')).isoformat() if 'centres_disponibles' in disponibilités: disponibilités['centres_disponibles'] = sorted(deduplicates_names( disponibilités['centres_disponibles']), key=sort_center) disponibilités["centres_indisponibles"] = deduplicates_names( disponibilités['centres_indisponibles']) outpath = outpath_format.format(code_departement) logger.debug(f'writing result to {outpath} file') with open(outpath, "w") as outfile: outfile.write(json.dumps(disponibilités, indent=2)) return compte_centres, compte_centres_avec_dispo, bloqués_doctolib