def collect_country(): country_data = fetchhelper.Updater(url_countries, ext='country.json') country_data.check_fetch(rawfile=args.rawfile[0], compressed=True) if not country_data.rawdata.strip(): if datetime.date.today().isoweekday() == 7: # They nowadays turn the servers off on Sundays or sth. pass else: print("Empty country.json") return jdat = json.loads(country_data.rawdata) parse = fetchhelper.ParseData(country_data, 'countries') parse.parsedtime = datatime with open(parse.parsedfile, 'w') as f: cw = csv.writer(f) header = ['Code', 'Country', 'Timestamp', 'Confirmed', 'Deaths'] cw.writerow(header) for data in sorted(jdat['data'], key=(lambda d: d['areaCode'])): code = data['areaCode'] name = data['areaName'] confirmed = data['cumCasesByPublishDate'] deaths = data['cumDeaths28DaysByPublishDate'] cw.writerow([code, name, datatime, confirmed, deaths]) parse.deploy_timestamp() return parse
def parse_v1(parses, html): parse_c = fetchhelper.ParseData(update, 'confirmed') parse_counts(parse_c, html, "Best.*tigte F.*lle") parses.append(parse_c) # Seems to be removed for good #parse_r = fetchhelper.ParseData(update, 'recovered') #try: # parse_counts(parse_r, infobox, "Genesene Personen") # parses.append(parse_r) #except AttributeError as err: # # It seems to be removed, we ignore it # print(err) parse_d = fetchhelper.ParseData(update, 'deceased') parse_counts(parse_d, html, "Todesf.*lle") parses.append(parse_d)
def parse_v2(parses, html): table = fetchhelper.text_table(html.find('table')) ths = table[0] assert ('Bundesland' in ths[0]) assert ('gesamt' in ths[-1]) trs = table[1:] assert ('tigte' in trs[0][0]) assert ('Todesf' in trs[1][0]) assert ('Genesen' in trs[2][0]) assert ('Hospital' in trs[3][0]) assert ('Intensiv' in trs[4][0]) assert ('Testungen' in trs[5][0]) parse = [ fetchhelper.ParseData(update, 'confirmed'), fetchhelper.ParseData(update, 'deaths'), fetchhelper.ParseData(update, 'recovered'), fetchhelper.ParseData(update, 'hospital'), fetchhelper.ParseData(update, 'intensivecare'), fetchhelper.ParseData(update, 'tests'), ] labels = [ 'confirmed', 'deceased', 'recovered', 'hospital', 'intensivecare', 'tests' ] areas = { 'Bgld.': 'Burgenland', 'Kt.': 'Kärnten', 'Ktn.': 'Kärnten', 'NÖ': 'Niederösterreich', 'OÖ': 'Oberösterreich', 'Sbg.': 'Salzburg', 'Stmk.': 'Steiermark', 'T': 'Tirol', 'Vbg.': 'Vorarlberg', 'W': 'Wien' } for i, tds in enumerate(trs): assert (len(ths) == len(tds)) mo = re.search(r'Stand (\d\d.\d\d.\d\d\d\d), *(\d\d:\d\d) ?Uhr', tds[0]) if mo is None: print("cannot parse date") sys.exit(1) parse = fetchhelper.ParseData(update, labels[i]) datadate = parse.parsedtime = datetime.strptime( mo.group(1) + ' ' + mo.group(2), '%d.%m.%Y %H:%M').replace(tzinfo=datatz) with open(parse.parsedfile, 'w') as f: cw = csv.writer(f) cw.writerow(['Area', 'Date', 'Value']) for col in range(1, len(tds) - 1): area = areas[strip_footnote(ths[col])] count = cleannum(tds[col]) cw.writerow([area, datadate.isoformat(), count]) parse.deploy_timestamp() parses.append(parse)
def collect_utla(): utla_data = fetchhelper.Updater(url_utlas, ext='utla.json') utla_data.check_fetch(rawfile=args.rawfile[1], compressed=True) if not utla_data.rawdata.strip(): if datetime.date.today().isoweekday() == 7: pass else: print("Empty utla.json") return jdat = json.loads(utla_data.rawdata) parse = fetchhelper.ParseData(utla_data, 'utla') parse.parsedtime = datatime with open(parse.parsedfile, 'w') as f: cw = csv.writer(f) header = [ 'Code', 'UTLA', 'Region', 'Timestamp', 'Confirmed', 'Deaths', 'Backdated' ] cw.writerow(header) for data in sorted(jdat['data'], key=(lambda d: d['areaCode'])): code = data['areaCode'] name = data['areaName'] confirmed = data['cumCasesByPublishDate'] fallback = '' if confirmed is None: confirmed = data['cumCasesBySpecimenDate'] if confirmed is not None: fallback += 'C' deaths = data['cumDeaths28DaysByPublishDate'] if deaths is None: deaths = data['cumDeaths28DaysByDeathDate'] if deaths is not None: fallback += 'D' cw.writerow([ code, name, (regions[code][1] if code[0] == 'E' else None), datatime, confirmed, deaths, fallback ]) parse.deploy_timestamp() return parse
) update.check_fetch(rawfile=args.rawfile) if args.only_changed: if not update.raw_changed(): print("downloaded raw data unchanged") exit(0) html = BeautifulSoup(update.rawdata, 'html.parser') tab = html.find( string=re.compile('R.*gion de notification')).find_parent('table') datestr = tab.find_previous('h4').get_text() mo = re.search('(\d\d/\d\d/\d\d\d\d) à (\d\d)h', datestr) parse = fetchhelper.ParseData(update, 'data') parse.parsedtime = datetime.strptime( mo.group(1) + ' ' + mo.group(2), '%d/%m/%Y %H').replace(tzinfo=datatz) with open(parse.parsedfile, 'w') as outf: cw = csv.writer(outf) cw.writerow(['Area', 'Region', 'Date', 'Confirmed']) group = 'Métropole' for tr in tab.find('tbody').find_all('tr'): tds = tr.find_all('td') area = tds[0].get_text() counttxt = tds[1].get_text() if '**' in counttxt: continue count = cleannum(tds[1].get_text())
cols.append(datatime.isoformat()) if tab_combined: cols += [ clean_num(tds[1].get_text()), clean_num(tds[n_deaths].get_text()) ] else: cols.append(clean_num(tds[1].get_text())) if deaths: cols.append(deaths.get(lk, 0)) cout.writerow(cols) # If the current day is later than the contenttime we assume the # content time is a mistake and we adjust it to the current day. # (This problem has happend before) #if parse.update.rawtime.date() > parse.parsedtime.date(): # if parse.parseddiff.changed and not parse.parseddiff.first: # print("Adjust date", parse.parsedtime, "->", parse.update.rawtime) # parse.parsedtime = parse.update.rawtime parse.deploy_timestamp() rparse = fetchhelper.ParseData(update, 'regierungsbezirk') parse_table(rparse, html, 'regierungsbezirk') lparse = fetchhelper.ParseData(update, 'landkreis') parse_table(lparse, html, 'landkreis') fetchhelper.git_commit([rparse, lparse], args)
import datetime, re, csv, os import json import dateutil.tz datatz = dateutil.tz.gettz('Europe/Berlin') # Bundesländer url_bl = 'https://services7.arcgis.com/mOBPykOjAyBO2ZKk/ArcGIS/rest/services/Coronaf%c3%a4lle_in_den_Bundesl%c3%a4ndern/FeatureServer/0/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields=LAN_ew_GEN%2CAktualisierung%2CFallzahl%2CDeath%2CLAN_ew_AGS&returnGeometry=false&returnCentroid=false&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pjson&token=' updatebl = fetchhelper.Updater(url_bl, ext='bl.json') updatebl.check_fetch(rawfile=args.rawfile[0]) jdat = json.loads(updatebl.rawdata) parsebl = fetchhelper.ParseData(updatebl, 'data') parsebl.parsedtime = None with open(parsebl.parsedfile, 'w') as outf: cout = csv.writer(outf) cout.writerow(['Bundesland', 'AGS', 'Timestamp', 'EConfirmed', 'EDeaths']) for jfeat in sorted(jdat['features'], key=(lambda f: f['attributes']['LAN_ew_GEN'])): ts = datetime.datetime.fromtimestamp(jfeat['attributes']['Aktualisierung']/1000, tz=datatz) if parsebl.parsedtime is None or ts > parsebl.parsedtime: parsebl.parsedtime = ts cout.writerow([ jfeat['attributes']['LAN_ew_GEN'], jfeat['attributes']['LAN_ew_AGS'], ts.isoformat(), jfeat['attributes']['Fallzahl'], jfeat['attributes']['Death'], ])
url_cases = 'https://atlas.jifo.co/api/connectors/41be7d71-7260-497f-a60b-adce5aa9445d' url_recovered = 'https://atlas.jifo.co/api/connectors/2adaf217-e526-492a-bcad-5ed6ec6ad3ad' datatz = dateutil.tz.gettz('Europe/Berlin') update = fetchhelper.Updater(url_cases, ext='cases.json') update.check_fetch(rawfile=rawfiles[0]) jdat = json.loads(update.rawdata) header = jdat['data'][0][0] i_kom = header.index("Ort") i_con = header.index("Gesamtzahl seit Ausbruch") parses = [] parse = fetchhelper.ParseData(update, 'data') parse.parsedtime = datetime.datetime.fromtimestamp(jdat['refreshed'] / 1000, tz=datatz) with open(parse.parsedfile, 'w') as f: cw = csv.writer(f) cw.writerow(['Kommune', 'Timestamp', 'Confirmed']) for jrow in jdat['data'][0][1:]: kom = jrow[i_kom] if kom in ('Zuordnung fehlt', 'Gesamt', ''): continue if kom.startswith('Stand vom'): continue if jrow[i_kom] == 'Pfaffenhofen a.d.Glonn': jrow[i_kom] = 'Pfaffenhofen a.d. Glonn' cw.writerow([jrow[i_kom], parse.parsedtime.isoformat(), jrow[i_con]])
fetchhelper.add_arguments(ap) args = ap.parse_args() import subprocess, csv from datetime import datetime, timedelta import dateutil.tz datatz = dateutil.tz.gettz('Europe/Berlin') update = fetchhelper.Updater( 'https://sozialministerium.baden-wuerttemberg.de/fileadmin/redaktion/m-sm/intern/downloads/Downloads_Gesundheitsschutz/Tabelle_Coronavirus-Faelle-BW.xlsx', ext='xlsx') update.check_fetch(args.rawfile, binary=True) parse = fetchhelper.ParseData(update, 'timeline') proc = subprocess.Popen(['xlsx2csv', update.rawfile], stdout=subprocess.PIPE, encoding='utf-8') cr = csv.reader(proc.stdout) with open(parse.parsedfile, 'w') as pf: cpf = csv.writer(pf) start = False dates = None for row in cr: if not start: if row and row[0] == 'Stadt-/Landkreis': start = True continue
if not row[0]: continue # There is no consistent date for these numbers, but we assume there are published at the end of the day timestamp = datetime.datetime.strptime( row[0] + ' 23:59', '%m-%d-%y %H:%M').replace(tzinfo=datatz) for n in [3, 4, 5, 6]: country = clean_label(header[n]) if row[n] == '': continue deaths = clean_num(row[n]) countrydata[timestamp][country].deaths = deaths parses = [] for timestamp, tsdata in sorted(countrydata.items()): parse = fetchhelper.ParseData(update, 'countries') parse.parsedtime = timestamp has_deaths = any(cdata.deaths is not None for cdata in tsdata.values()) with open(parse.parsedfile, 'w') as f: cw = csv.writer(f) header = ['Code', 'Country', 'Timestamp', 'Confirmed'] if has_deaths: header.append('Deaths') cw.writerow(header) for _, cdata in sorted(tsdata.items()): row = [ cdata.code, cdata.name, cdata.timestamp.isoformat(), cdata.confirmed ]
if args.rawfile is not None: args.rawfile = args.rawfile.split(',') else: args.rawfile = [None, None] countrydata = {} country_data = fetchhelper.Updater(url_countries, ext='country.json') country_data.check_fetch(rawfile=args.rawfile[1]) jdat = json.loads(country_data.rawdata) datatime = datetime.datetime.fromisoformat( jdat['metadata']['lastUpdatedAt'].rstrip('Z')).astimezone( datetime.timezone.utc) parses = [] parse = fetchhelper.ParseData(country_data, 'countries') parse.parsedtime = datatime with open(parse.parsedfile, 'w') as f: cw = csv.writer(f) header = ['Code', 'Country', 'Timestamp', 'Confirmed', 'Deaths'] cw.writerow(header) for (code, data) in jdat.items(): if code == 'metadata': continue name = data['name']['value'] confirmed = int(data['totalCases']['value']) deaths = int(data['deaths']['value']) cw.writerow([code, name, datatime, confirmed, deaths]) parse.deploy_timestamp() parses.append(parse)
'Västernorrland': 0, 'Västmanland': 0, 'Västra_Götaland': 0, 'Örebro': 0, 'Östergötland': 0, } parses = [] datatime = None features = [f for f in jd['features'] if f['attributes']['Statistikdatum'] is not None] for feat in sorted(features, key=(lambda f: f['attributes']['Statistikdatum'])): attrs = feat['attributes'] datatime = datetime.datetime.utcfromtimestamp(attrs['Statistikdatum']/1000).replace(hour=11, minute=30, tzinfo=datatz) for attr, value in attrs.items(): if attr in areasum: areasum[attr] += value parse = fetchhelper.ParseData(update, 'data', variant=datatime.isoformat()) parse.parsedtime = datatime with open(parse.parsedfile, 'w') as outf: cw = csv.writer(outf) header = ['Area', 'Timestamp', 'Confirmed'] cw.writerow(header) for area, count in sorted(areasum.items()): cw.writerow([area.replace('_', ' '), datatime.isoformat(), count]) parse.deploy_timestamp() parses.append(parse) fetchhelper.git_commit(parses, args)
# on 10.09.2020 there were additional empty cells for tr in table: if tr[-1] == '': tr.pop() ths = table[0] assert ('Bundesland' in ths[0]) assert ('gesamt' in ths[-1]) trs = table[1:] assert ('tigte' in trs[0][0]) assert ('Todesf' in trs[1][0]) assert ('Genesen' in trs[2][0]) assert ('Hospital' in trs[3][0]) assert ('Intensiv' in trs[4][0]) assert ('Testungen' in trs[5][0]) parse = [ fetchhelper.ParseData(update, 'confirmed'), fetchhelper.ParseData(update, 'deaths'), fetchhelper.ParseData(update, 'recovered'), fetchhelper.ParseData(update, 'hospital'), fetchhelper.ParseData(update, 'intensivecare'), fetchhelper.ParseData(update, 'tests'), ] labels = [ 'confirmed', 'deceased', 'recovered', 'hospital', 'intensivecare', 'tests' ] areas = { 'Bgld.': 'Burgenland', 'Kt.': 'Kärnten', 'Ktn.': 'Kärnten', 'NÖ': 'Niederösterreich',