def sanitize_row(row): # sanitize data: # 2020-12-04 contains 'Non communiqué' entries, skip them for now if not sc.represents_int( row.get('Nombre de cas actuellement hospitalisés')): row['Nombre de cas actuellement hospitalisés'] = '' if not sc.represents_int( row.get('Nombre de cas actuellement en soins intensifs')): row['Nombre de cas actuellement en soins intensifs'] = '' if not sc.represents_int(row.get('Nombre de nouveaux décès')): row['Nombre de nouveaux décès'] = '' return row
if data: # nothing to do here continue # we should never reach here unless there is an unknown iframe raise Exception(f"issue parsing data in iframe {iframe_url}") # order dict by key to ensure the most recent entry is last ordered_rows = OrderedDict(sorted(rows.items())) for row_date, row in ordered_rows.items(): if not is_first: print('-' * 10) is_first = False dd = sc.DayData(canton='BL', url=main_url) dd.datetime = row['date'] dd.cases = sc.safeint(row.get('cases')) dd.hospitalized = sc.safeint(row.get('hospitalized')) dd.icu = sc.safeint(row.get('icu')) dd.vent = sc.safeint(row.get('vent')) dd.deaths = sc.safeint(row.get('deaths')) dd.recovered = sc.safeint(row.get('recovered')) dd.quarantined = sc.safeint(row.get('quarantined')) dd.quarantine_riskareatravel = sc.safeint( row.get('quarantine_riskareatravel')) if sc.represents_int(dd.quarantined) and sc.represents_int( dd.quarantine_riskareatravel): dd.quarantine_total = dd.quarantined + dd.quarantine_riskareatravel dd.isolated = sc.safeint(row.get('isolated')) print(dd)
r'befanden\ssich\s(\d+)\spositive\sF.lle\snoch\simmer\sin\sIsolation', content) dd.quarantined = sc.find(r'Isolation\sund\s(\d+)\sKontakte\sin\sQuarant.ne', content) dd.quarantine_riskareatravel = sc.find(r'\s(\d+)\sReisende\sin\sQuarant.ne', content) print(dd) xls_url = 'https://raw.githubusercontent.com/statistikZH/covid19_drop/master/Chiffres%20%20COVID-19%20Valais.xlsx' main_url = 'https://www.vs.ch/de/web/coronavirus' xls = sc.xlsdownload(xls_url, silent=True) rows = sc.parse_xls(xls, header_row=1) for i, row in enumerate(rows): if not isinstance(row['Date'], datetime.datetime): continue if not sc.represents_int(row['Cumul cas positifs']): continue if row['Nb nouveaux cas positifs'] is None and row[ "Nb nouvelles admissions à l'hôpital"] is None: continue dd = sc.DayData(canton='VS', url=main_url) dd.datetime = row['Date'].date().isoformat() dd.cases = row['Cumul cas positifs'] dd.hospitalized = row['Total hospitalisations COVID-19'] dd.new_hosp = row['Nb nouvelles admissions à l\'hôpital'] dd.icu = row['Patients COVID-19 aux SI total (y.c. intubés)'] dd.vent = row['Patients COVID-19 intubés'] dd.deaths = row['Cumul décès COVID-19'] if row['Nb de nouvelles sorties'] is not None: dd.recovered = sum(r['Nb de nouvelles sorties'] for r in rows[:i + 1])
isolated_date = isolated_table.find_next(string=re.compile("Stand")).string dd_isolated.datetime = sc.find(r'Stand:?\s*(.+[0-9]{4})', isolated_date) rows = isolated_table.find_all('tr') headers = rows[0].find_all('td') or rows[0].find_all('th') assert len( headers) == 2, f"Number of header columns changed, {len(headers)} != 2" assert headers[1].text.strip() == "Anzahl" for i in range(1, len(rows)): cells = rows[i].find_all('td') if cells[0].text.strip() == 'Positiv Getestete im Tracing / in Isolation': value = cells[1].text.strip() if sc.represents_int(value): dd_isolated.isolated = int(value) elif cells[0].text.strip() == 'Kontaktpersonen im Tracing / in Quarantäne': value = cells[1].text.strip() if sc.represents_int(value): dd_isolated.quarantined = int(value) if dd_isolated: print(dd_isolated) print('-' * 10) # historized cases csv_url = 'https://www.sg.ch/ueber-den-kanton-st-gallen/statistik/covid-19/_jcr_content/Par/sgch_downloadlist/DownloadListPar/sgch_download.ocFile/KantonSG_C19-Faelle_download.csv' d = sc.download(csv_url, silent=True) # strip the "header" / description lines
'https://www.jura.ch/fr/Autorites/Coronavirus/Chiffres-H-JU/Evolution-des-cas-COVID-19-dans-le-Jura.html', silent=True) soup = BeautifulSoup(d, 'html.parser') box = soup.find('li', class_="ico-xlsx") xls_url = box.find('a').get('href') assert xls_url, "URL is empty" if not xls_url.startswith('http'): xls_url = f'https://www.jura.ch{xls_url}' xls = sc.xlsdownload(xls_url, silent=True) rows = sc.parse_xls(xls, header_row=0) is_first = True for i, row in enumerate(rows): if not isinstance(row['Date'], datetime.datetime): continue if not is_first: print('-' * 10) is_first = False dd = sc.DayData(canton='JU', url=xls_url) dd.datetime = row['Date'].date().isoformat() dd.cases = row['Cumul des cas confimés'] dd.hospitalized = row.get('Nb cas actuellement hospitalisés') dd.icu = row.get('Nb cas actuellement en SI') if sc.represents_int(row.get('Nombre de nouveaux décès')): dd.deaths = sum(r['Nombre de nouveaux décès'] for r in rows[:i + 1]) print(dd)
if dd: if not is_first: print('-' * 10) is_first = False print(dd) # get cases xls elem = driver.find_element_by_link_text('Indicateurs principaux') elem.click() case_xls_url = sgc.get_link_from_element(driver, 'download_table_indicateurs') assert case_xls_url, "Couldn't find cases XLS url" xls = sc.xlsdownload(case_xls_url, silent=True) rows = sc.parse_xls(xls, header_row=0) for row in rows: dd = sc.DayData(canton='GE', url=url) dd.datetime = row['Date'] dd.cases = row['Cumul cas COVID-19'] current_hosp = row[ 'Total hospitalisations COVID-19 actifs (en cours) canton (HUG-cliniques)'] if sc.represents_int(current_hosp) and int(current_hosp) >= 0: dd.hospitalized = current_hosp dd.icu = row['Patients COVID-19 actifs aux soins intensifs HUG'] dd.icf = row['Patients COVID-19 actifs aux soins intermédiaires HUG'] dd.deaths = row['Cumul décès COVID-19 '] if dd: if not is_first: print('-' * 10) is_first = False print(dd)
print('-' * 10) is_first = False print(dd) # cases + hospitalization rows = sc.parse_xls(xls, sheet_name='1. Covid-19-Daten', header_row=2) for row in rows: if not isinstance(row['A'], datetime.datetime): continue dd = sc.DayData(canton='AG', url=xls_url) dd.datetime = f"{row['A'].date().isoformat()} {row['A'].time().isoformat()}" dd.cases = row['Gesamtzahl'] non_icu = row['Bestätigte Fälle auf Abteilung (ohne IPS/IMC)'] icu = row['Bestätigte Fälle Intensivpflegestation (IPS)'] icf = row['Bestätigte Fälle Intermediate Care (IMC)'] if sc.represents_int(non_icu) and sc.represents_int( icu) and sc.represents_int(icf): dd.hospitalized = int(non_icu) + int(icu) + int(icf) dd.icu = icu dd.icf = icf dd.deaths = row['Gesamtzahl16'] dd.recovered = row['Gesamtzahl20'] if dd: if not is_first: print('-' * 10) is_first = False print(dd)
t = sc.find(r'Contact\s+tracing\s+\(.*?Stand\:?\s+(.+?Uhr).*?\)', d) or \ sc.find(r'Contact\s+tracing.*Stand\:? (.+? Uhr).*?\)', d) or \ sc.find(r'Contact\s+tracing.*Stand ([0-9]+\.[0-9]+\.? \/ [0-9]+h)', d) dd_ct.datetime = t dd_ct.isolated = sc.find( r'Aktuell\s+COVID-19-Erkrankte\s+in\s+Isolation:\s+<strong>\s?(\d+)\s?</strong>', d) quarantined_total = sc.find( r'Aktuell\s+im\s+Kanton\s+wohnhafte\s+(?:Kontaktpersonen|Personen)\s+in\s+Quarantäne:\s?<strong>\s?(\d+)\s?</strong>', d) quarantined_travel = sc.find( r'davon\s+Anzahl\s+Personen.*die\s+aus\s+einem\s+<strong>Risikogebiet</strong>\s+in\s+die\s+Schweiz\s+eingereist\s+sind\s+und\s+aufgrund\s+dessen\s+aktuell\s+im\s+Kanton\s+in\s+Quarantäne\s+sind:\s+<strong>\s*(\d+)</strong>', d) assert sc.represents_int( quarantined_travel ), f"quarantined_travel is not an integer: {quarantined_travel}" if sc.represents_int(quarantined_total): dd_ct.quarantine_total = quarantined_total quarantined = int(quarantined_total) - int(quarantined_travel) assert quarantined >= 0, f"Quarantined is negative: {quarantined}" dd_ct.quarantined = quarantined dd_ct.quarantine_riskareatravel = quarantined_travel if dd_ct: print(dd_ct) print('-' * 10) # cases
if not is_first: print('-' * 10) is_first = False print('SH') sc.timestamp() print('Downloading:', xls_url) if isinstance(row['Uhrzeit'], datetime.datetime): print('Date and time:', row['Datum'].date().isoformat(), row['Uhrzeit'].time().isoformat()) elif row['Uhrzeit']: print('Date and time:', row['Datum'].strftime('%d.%m.%Y'), row['Uhrzeit']) else: print('Date and time:', row['Datum'].date().isoformat()) print('Confirmed cases:', row['Positiv']) if sc.represents_int(row['Hospitalisation isoliert\nbestätigt']) and sc.represents_int(row['Hospitalisiert_Intensiv']): print('Hospitalized:', (row.search(r'Hospitalisation isoliert\s+bestätigt.*$') + row['Hospitalisiert_Intensiv'])) print('ICU:', row['Hospitalisiert_Intensiv']) if row['Verstorben'] is not None: print('Deaths:', row['Verstorben']) isolated = row.search(r'Anzahl Personen\s+in Isolation.*') if isolated is not None: print('Isolated:', isolated) quarantined = row.search(r'Anzahl Personen\s+in Quarantäne\s+.*Kontaktpersonen.*') if quarantined is not None: print('Quarantined:', quarantined) quarantined_risk = row.search(r'Anzahl Personen\s+in Quarantäne\s+.*Rückkehr.*Risikoländer.*') if quarantined_risk is not None: print('Quarantined risk area travel:', quarantined_risk)
is_first = False print('SH') sc.timestamp() print('Downloading:', main_url) if isinstance(row['Uhrzeit'], datetime.datetime): print('Date and time:', row['Datum'].date().isoformat(), row['Uhrzeit'].time().isoformat()) elif row['Uhrzeit']: print('Date and time:', row['Datum'].strftime('%d.%m.%Y'), row['Uhrzeit']) else: print('Date and time:', row['Datum'].date().isoformat()) print('Confirmed cases:', row['Positiv']) if sc.represents_int(row.search( r'Hospitalisation isoliert\s+bestätigt.*$')) and sc.represents_int( row.search(r'Hospitalisiert.*Intensiv.*$')): print('Hospitalized:', (row.search(r'Hospitalisation isoliert\s+bestätigt.*$') + row.search(r'Hospitalisiert.*Intensiv.*$'))) print('ICU:', row.search(r'Hospitalisiert.*Intensiv.*$')) if row['Verstorben'] is not None: print('Deaths:', row['Verstorben']) isolated = row.search(r'Anzahl Personen\s+in Isolation.*') if isolated is not None: print('Isolated:', isolated) quarantined = row.search( r'Anzahl Personen\s+in Quarantäne\s+.*Kontaktpersonen.*') if quarantined is not None: print('Quarantined:', quarantined)
xls_url = f"https://sh.ch{meta['url']}" xls = sc.xlsdownload(xls_url, silent=True) rows = sc.parse_xls(xls, header_row=0) is_first = True for row in rows: if not isinstance(row['Datum'], datetime.datetime): continue if not (row['Positiv'] or row['Hospitalisiert_Iso'] or row['Hospitalisiert_Intensiv'] or row['Verstorben']): continue if not is_first: print('-' * 10) is_first = False print('SH') sc.timestamp() print('Downloading:', xls_url) if isinstance(row['Uhrzeit'], datetime.datetime): print('Date and time:', row['Datum'].date().isoformat(), row['Uhrzeit'].time().isoformat()) elif row['Uhrzeit']: print('Date and time:', row['Datum'].strftime('%d.%m.%Y'), row['Uhrzeit']) else: print('Date and time:', row['Datum'].date().isoformat()) print('Confirmed cases:', row['Positiv']) if sc.represents_int(row['Hospitalisiert_Iso']) and sc.represents_int(row['Hospitalisiert_Intensiv']): print('Hospitalized:', (row['Hospitalisiert_Iso'] + row['Hospitalisiert_Intensiv'])) print('ICU:', row['Hospitalisiert_Intensiv']) print('Deaths:', row['Verstorben'])