def parse_xlsx(): html_url = 'https://www.vd.ch/toutes-les-actualites/hotline-et-informations-sur-le-coronavirus/point-de-situation-statistique-dans-le-canton-de-vaud/' d = sc.download(html_url, silent=True) soup = BeautifulSoup(d, 'html.parser') xls_url = soup.find(href=re.compile("\.xlsx$")).get('href') assert xls_url, "URL is empty" xls = sc.xlsdownload(xls_url, silent=True) rows = sc.parse_xls(xls, header_row=2) is_first = True for row in rows: if not isinstance(row['Date'], datetime.datetime): continue if not is_first: print('-' * 10) is_first = False print('VD') sc.timestamp() print('Downloading:', html_url) print('Date and time:', row['Date'].date().isoformat()) print('Confirmed cases:', row['Nombre total de cas confirmés positifs']) print('Hospitalized:', row['Hospitalisation en cours']) print('ICU:', row['Dont soins intensifs']) print('Deaths:', row['Décès'])
def get_fr_xls(): d = sc.download( 'https://www.fr.ch/sante/covid-19/coronavirus-statistiques-evolution-de-la-situation-dans-le-canton', silent=True) soup = BeautifulSoup(d, 'html.parser') xls_url = soup.find(href=re.compile("\.xlsx$")).get('href') assert xls_url, "URL is empty" if not xls_url.startswith('http'): xls_url = f'https://www.fr.ch{xls_url}' xls = sc.xlsdownload(xls_url, silent=True) return xls_url, xls
def get_fr_xls(): d = sc.download( 'https://www.fr.ch/de/gesundheit/covid-19/coronavirus-statistik-ueber-die-entwicklung-im-kanton', silent=True) soup = BeautifulSoup(d, 'html.parser') xls_url = soup.find(href=re.compile(r"\.xlsx$")).get('href') assert xls_url, "URL is empty" if not xls_url.startswith('http'): xls_url = f'https://www.fr.ch{xls_url}' xls = sc.xlsdownload(xls_url, silent=True) return xls_url, xls
def get_fr_xls(): main_url = 'https://www.fr.ch/de/gesundheit/covid-19/coronavirus-statistik-ueber-die-entwicklung-im-kanton' d = sc.download(main_url, silent=True) soup = BeautifulSoup(d, 'html.parser') item = soup.find( 'span', text=re.compile(r"Statistik .ber die Entwicklungen im Kanton.*")) item = item.find_parent('a') xls_url = item.get('href') assert xls_url, "URL is empty" if not xls_url.startswith('http'): xls_url = f'https://www.fr.ch{xls_url}' xls = sc.xlsdownload(xls_url, silent=True) return xls_url, xls, main_url
dd.datetime = date.replace('\n', ' ') dd.isolated = res['iso'] dd.hospitalized = res['hosp'] dd.quarantined = res['quar'] dd.quarantine_riskareatravel = res['qtravel'] print(dd) is_first = False """ try: xls_url = soup.find( 'a', string=re.compile(r'Coronaf.lle\s*im\s*Kanton\s*Schwyz'))['href'] except TypeError: print("Unable to determine xls url", file=sys.stderr) sys.exit(1) xls = sc.xlsdownload(xls_url, silent=True) rows = sc.parse_xls(xls) for row in rows: if not isinstance(row['Datum'], datetime.datetime): continue if not is_first: print('-' * 10) is_first = False # TODO: remove when source is fixed # handle wrong value on 2020-03-25, see issue #631 if row['Datum'].date().isoformat() == '2020-03-25': row['Bestätigte Fälle (kumuliert)'] = ''
#!/usr/bin/env python3 import re from bs4 import BeautifulSoup import scrape_common as sc print('GL') d = sc.download( 'https://www.gl.ch/verwaltung/finanzen-und-gesundheit/gesundheit/coronavirus.html/4817' ) sc.timestamp() soup = BeautifulSoup(d, 'html.parser') box = soup.find('div', class_="box--error") xls_url = box.find('a', string=re.compile(r'.*Dokument.*')).get('href') xls = sc.xlsdownload(xls_url) sc.timestamp() sheet = xls.sheet_by_index(0) last_row = sheet.nrows - 1 date_value = sheet.cell_value(last_row, 0) current_date = sc.xldate_as_datetime(sheet, date_value) print('Date and time:', current_date.date().isoformat()) cases = int(sheet.cell_value(last_row, 1)) print('Confirmed cases:', cases) hosp = int(sheet.cell_value(last_row, 2)) if hosp: print('Hospitalized:', hosp)
chrome_options.add_argument("--headless") driver = webdriver.Chrome(options=chrome_options) driver.implicitly_wait(5) url = 'https://infocovid.smc.unige.ch/' driver.get(url) elem = driver.find_element_by_link_text('Tables') elem.click() # get quarantine xls elem = driver.find_element_by_link_text('Cas et quarantaines') elem.click() quarantine_xls_url = sgc.get_link_from_element(driver, 'download_table_cas') assert quarantine_xls_url, "Couldn't find quarantine XLS url" xls = sc.xlsdownload(quarantine_xls_url, silent=True) rows = sc.parse_xls(xls, header_row=0) for row in rows: dd = sc.DayData(canton='GE', url=url) dd.datetime = row['date'] dd.isolated = row['isolement déjà en cours'] dd.quarantined = row['Quarantaines en cours suite\nà un contact étroit'] dd.quarantine_riskareatravel = row[ 'Quarantaines en cours au retour de zone à risque'] if dd: if not is_first: print('-' * 10) is_first = False print(dd)
if m: dd.hospitalized = sc.int_or_word(m[1].lower()) m = re.search(r'Gegenwärtig\s+befinden\s+sich\s+(\w+)\s+enge\s+Kontaktpersonen\s+in\s+Quarantäne.', content) if m: dd.quarantined = sc.int_or_word(m[1]) if dd: if not is_first: print('-' * 10) print(dd) is_first = False # get the data from XLS file containing full history history_url='https://www.llv.li/files/ag/aktuelle-fallzahlen.xlsx' xls = sc.xlsdownload(history_url, silent=True) rows = sc.parse_xls(xls, header_row=0) for row in rows: dd_full_list = sc.DayData(canton='FL', url=history_url) dd_full_list.datetime = row['Datenstand'] dd_full_list.cases = str(row['Anzahl pos. Fälle']).replace("'","") dd_full_list.recovered = row['genesen'] dd_full_list.hospitalized = row['hospitalisiert'] dd_full_list.deaths = row['Todesfälle'] if dd_full_list: if not is_first: print('-' * 10) is_first = False print(dd_full_list)
#!/usr/bin/env python3 import scrape_common as sc print('SZ') xls = sc.xlsdownload( 'https://www.sz.ch/public/upload/assets/45951/COVID-19_Fallzahlen_Kanton_Schwyz.xlsx' ) sc.timestamp() sheet = xls.sheet_by_index(0) last_row = sheet.nrows - 1 date_value = sheet.cell_value(last_row, 0) current_date = sc.xldate_as_datetime(sheet, date_value) print('Date and time:', current_date.date().isoformat()) cases = int(sheet.cell_value(last_row, 1)) print('Confirmed cases:', cases) deaths = int(sheet.cell_value(last_row, 2)) if deaths: print('Deaths:', deaths) recovered = int(sheet.cell_value(last_row, 3)) if recovered: print('Recovered:', recovered)