Python Updaterの例、fetchhelper.Updater Pythonの例

コード例 #1

0

ファイルを表示

def collect_country():
    country_data = fetchhelper.Updater(url_countries, ext='country.json')
    country_data.check_fetch(rawfile=args.rawfile[0], compressed=True)
    if not country_data.rawdata.strip():
        if datetime.date.today().isoweekday() == 7:
            # They nowadays turn the servers off on Sundays or sth.
            pass
        else:
            print("Empty country.json")
        return

    jdat = json.loads(country_data.rawdata)

    parse = fetchhelper.ParseData(country_data, 'countries')
    parse.parsedtime = datatime
    with open(parse.parsedfile, 'w') as f:
        cw = csv.writer(f)
        header = ['Code', 'Country', 'Timestamp', 'Confirmed', 'Deaths']
        cw.writerow(header)
        for data in sorted(jdat['data'], key=(lambda d: d['areaCode'])):
            code = data['areaCode']
            name = data['areaName']
            confirmed = data['cumCasesByPublishDate']
            deaths = data['cumDeaths28DaysByPublishDate']
            cw.writerow([code, name, datatime, confirmed, deaths])
    parse.deploy_timestamp()
    return parse

コード例 #2

0

ファイルを表示

def collect_utla():
    utla_data = fetchhelper.Updater(url_utlas, ext='utla.json')
    utla_data.check_fetch(rawfile=args.rawfile[1], compressed=True)
    if not utla_data.rawdata.strip():
        if datetime.date.today().isoweekday() == 7:
            pass
        else:
            print("Empty utla.json")
        return

    jdat = json.loads(utla_data.rawdata)

    parse = fetchhelper.ParseData(utla_data, 'utla')
    parse.parsedtime = datatime
    with open(parse.parsedfile, 'w') as f:
        cw = csv.writer(f)
        header = [
            'Code', 'UTLA', 'Region', 'Timestamp', 'Confirmed', 'Deaths',
            'Backdated'
        ]
        cw.writerow(header)
        for data in sorted(jdat['data'], key=(lambda d: d['areaCode'])):
            code = data['areaCode']
            name = data['areaName']
            confirmed = data['cumCasesByPublishDate']
            fallback = ''
            if confirmed is None:
                confirmed = data['cumCasesBySpecimenDate']
                if confirmed is not None:
                    fallback += 'C'
            deaths = data['cumDeaths28DaysByPublishDate']
            if deaths is None:
                deaths = data['cumDeaths28DaysByDeathDate']
                if deaths is not None:
                    fallback += 'D'
            cw.writerow([
                code, name, (regions[code][1] if code[0] == 'E' else None),
                datatime, confirmed, deaths, fallback
            ])
    parse.deploy_timestamp()
    return parse

コード例 #3

0

ファイルを表示

ファイル: fetch.py プロジェクト: sth/covid-19-data

import fetchhelper

ap = argparse.ArgumentParser()
fetchhelper.add_arguments(ap)
args = ap.parse_args()

fetchhelper.check_oldfetch(args)


import subprocess, datetime, re, csv, os, sys
from bs4 import BeautifulSoup
import dateutil.tz

datatz = dateutil.tz.gettz('Europe/Berlin')

update = fetchhelper.Updater('https://www.landkreis-muenchen.de/themen/verbraucherschutz-gesundheit/gesundheit/coronavirus/fallzahlen/')
update.check_fetch(rawfile=args.rawfile)

# accidentally duplicated <tr> and other hrml errors
html = BeautifulSoup(update.rawdata, 'html.parser')

parse = fetchhelper.ParseData(update, 'data')

datatime = None
timeguess = False
txt = str(html.find(text=re.compile('(?:Stand|Datenstand): ')))
for timere, timefmt in [
        (r'(?:Stand|Datenstand): (\d\d\.\d\d\.\d\d\d\d, \d\d:\d\d) ?Uhr', '%d.%m.%Y, %H:%M'),
        (r'(?:Stand|Datenstand): (\d\d\.\d\d\.\d\d\d\d, \d\d\.\d\d) ?Uhr', '%d.%m.%Y, %H.%M'),
        (r'(?:Stand|Datenstand): (\d\d\.\d\d\.\d\d\d\d, \d\d) ?Uhr', '%d.%m.%Y, %H'),
        (r'(?:Stand|Datenstand): (\d\d\.\d\d\.\d\d\d\d, \d\d) ?Uhr', '%d.%m.%Y, %H'),

コード例 #4

0

ファイルを表示

ファイル: fetch_2021-06-25T18:00.py プロジェクト: sth/covid-19-data

import argparse
import fetchhelper

ap = argparse.ArgumentParser()
fetchhelper.add_arguments(ap)
args = ap.parse_args()

import subprocess, csv
from datetime import datetime, timedelta
import dateutil.tz

datatz = dateutil.tz.gettz('Europe/Berlin')

update = fetchhelper.Updater(
    'https://sozialministerium.baden-wuerttemberg.de/fileadmin/redaktion/m-sm/intern/downloads/Downloads_Gesundheitsschutz/Tabelle_Coronavirus-Faelle-BW.xlsx',
    ext='xlsx')

update.check_fetch(args.rawfile, binary=True)

parse = fetchhelper.ParseData(update, 'timeline')

proc = subprocess.Popen(['xlsx2csv', update.rawfile],
                        stdout=subprocess.PIPE,
                        encoding='utf-8')
cr = csv.reader(proc.stdout)

with open(parse.parsedfile, 'w') as pf:
    cpf = csv.writer(pf)
    start = False
    dates = None

コード例 #5

0

ファイルを表示

import fetchhelper

ap = argparse.ArgumentParser()
fetchhelper.add_arguments(ap)
args = ap.parse_args()

import subprocess, datetime, re, csv, os, sys
from bs4 import BeautifulSoup
import dateutil.tz
from dataclasses import dataclass

datatz = dateutil.tz.gettz('Europe/London')

# Public Health England
update = fetchhelper.Updater(
    'https://fingertips.phe.org.uk/documents/Historic%20COVID-19%20Dashboard%20Data.xlsx',
    ext='xlsx')
update.check_fetch(rawfile=args.rawfile, binary=True)

regions = {
    'E06000001': ('Hartlepool', 'North East'),
    'E06000002': ('Middlesbrough', 'North East'),
    'E06000003': ('Redcar and Cleveland', 'North East'),
    'E06000004': ('Stockton-on-Tees', 'North East'),
    'E06000005': ('Darlington', 'North East'),
    'E06000006': ('Halton', 'North West'),
    'E06000007': ('Warrington', 'North West'),
    'E06000008': ('Blackburn with Darwen', 'North West'),
    'E06000009': ('Blackpool', 'North West'),
    'E06000010': ('Kingston upon Hull, City of', 'Yorkshire and The Humber'),
    'E06000011': ('East Riding of Yorkshire', 'Yorkshire and The Humber'),

コード例 #6

0

ファイルを表示

ファイル: fetch_2020-10-05T13:00.py プロジェクト: sth/covid-19-data

    now = datetime.datetime.now()
    if now.time() > datetime.time(12, 0):
        # We expect data for the current day
        target = now.date()
    else:
        # We expect at least data for the previous day
        target = (now - datetime.timedelta(days=1)).date()
    match = target.isoformat()
    if glob.glob('data/*%s*.csv' % match):
        # Looks good.
        print("Data for %s already saved." % match)
        sys.exit(0)

datatz = dateutil.tz.gettz('Europe/Berlin')

update = fetchhelper.Updater('https://www.mags.nrw/coronavirus-fallzahlen-nrw')
update.check_fetch(rawfile=args.rawfile)

html = BeautifulSoup(update.rawdata, 'html.parser')


def clean_num(numstr):
    if numstr in ['', '-']:
        return 0
    return int(re.sub(r'[.:]', '', numstr).strip())


header = html.find(text="Bestätigte Fälle (IfSG)")

parse = fetchhelper.ParseData(update, 'data')

コード例 #7

0

ファイルを表示

ファイル: fetch_2021-04-20T00:00.py プロジェクト: sth/covid-19-data

ap = argparse.ArgumentParser()
fetchhelper.add_arguments(ap)
args = ap.parse_args()

fetchhelper.check_oldfetch(args)

import datetime, re, csv, json, os, sys, shutil
import dateutil.tz

url = 'https://www.landratsamt-dachau.de/bilder/zahlen.jpg'
url = 'https://atlas.jifo.co/api/connectors/c3a4b965-0e10-46db-aec6-cceffdb74857'

datatz = dateutil.tz.gettz('Europe/Berlin')

update = fetchhelper.Updater(url, ext='json')
update.check_fetch(rawfile=args.rawfile)
jdat = json.loads(update.rawdata)

header = jdat['data'][0][0]
i_kom = header.index("Gemeinde")
i_con = header.index("Fälle insgesamt")
i_rec = header.index("Genesen")

parse = fetchhelper.ParseData(update, 'data')
parse.parsedtime = datetime.datetime.fromtimestamp(jdat['refreshed'] / 1000,
                                                   tz=datatz)
with open(parse.parsedfile, 'w') as f:
    cw = csv.writer(f)
    cw.writerow(['Kommune', 'Timestamp', 'Confirmed', 'Recovered'])
    for jrow in jdat['data'][0][1:]:

コード例 #8

0

ファイルを表示

    stripped = re_strip_landkreis.sub('', rawlabel)
    if stripped in rawlabel_landkreis:
        return rawlabel_landkreis[stripped]
    else:
        return rawlabel


def get_regierungsbezirk(rawlabel):
    landkreis = clean_landkreis(rawlabel)
    return rawlabel_regierungsbezirk[landkreis]


datatz = dateutil.tz.gettz('Europe/Berlin')

update = fetchhelper.Updater(
    'https://www.lgl.bayern.de/gesundheit/infektionsschutz/infektionskrankheiten_a_z/coronavirus/karte_coronavirus/index.htm'
)
update.check_fetch(rawfile=args.rawfile)

# accidentally duplicated <tr> and other hrml errors
update.rawdata = re.sub(r'<tr>\s*<tr>', r'<tr>', update.rawdata)
update.rawdata = re.sub(r'(<th><span>[^<>]*</span>)</(td|div)>', r'\1</th>',
                        update.rawdata)
html = BeautifulSoup(update.rawdata, 'html.parser')

datenode = html.find('script', text=re.compile(r'var publikationsDatum = '))
if datenode is None:
    print("Cannot find publish date", file=sys.stderr)
    sys.exit(1)
datemo = re.search(r'"(\d\d.\d\d.\d\d\d\d)"', datenode.get_text())
if datemo is None:

コード例 #9

0

ファイルを表示

ファイル: fetch.py プロジェクト: sth/covid-19-data

if args.rawfile is not None:
    args.rawfile = args.rawfile.split(',')
else:
    args.rawfile = (None, None)

import datetime, re, csv, os
import json
import dateutil.tz

datatz = dateutil.tz.gettz('Europe/Berlin')

# Bundesländer
url_bl = 'https://services7.arcgis.com/mOBPykOjAyBO2ZKk/ArcGIS/rest/services/Coronaf%c3%a4lle_in_den_Bundesl%c3%a4ndern/FeatureServer/0/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields=LAN_ew_GEN%2CAktualisierung%2CFallzahl%2CDeath%2CLAN_ew_AGS&returnGeometry=false&returnCentroid=false&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pjson&token='

updatebl = fetchhelper.Updater(url_bl, ext='bl.json')
updatebl.check_fetch(rawfile=args.rawfile[0])

jdat = json.loads(updatebl.rawdata)

parsebl = fetchhelper.ParseData(updatebl, 'data')
parsebl.parsedtime = None
with open(parsebl.parsedfile, 'w') as outf:
    cout = csv.writer(outf)
    cout.writerow(['Bundesland', 'AGS', 'Timestamp', 'EConfirmed', 'EDeaths'])
    for jfeat in sorted(jdat['features'], key=(lambda f: f['attributes']['LAN_ew_GEN'])):
        ts = datetime.datetime.fromtimestamp(jfeat['attributes']['Aktualisierung']/1000, tz=datatz)
        if parsebl.parsedtime is None or ts > parsebl.parsedtime:
            parsebl.parsedtime = ts
        cout.writerow([
            jfeat['attributes']['LAN_ew_GEN'],

コード例 #10

0

ファイルを表示

import fetchhelper

ap = argparse.ArgumentParser()
fetchhelper.add_arguments(ap)
args = ap.parse_args()

fetchhelper.check_oldfetch(args)

import subprocess, datetime, re, csv, os, sys, shutil
import urllib.parse
from bs4 import BeautifulSoup
import dateutil.tz

url = 'https://www.landratsamt-dachau.de/bilder/zahlen.jpg'

datatz = dateutil.tz.gettz('Europe/Berlin')

#txt = str(html.find(text=re.compile('Landkreis-Statistik ')))
#mo = re.search(r'Landkreis-Statistik(?: nach Gemeinden)? für den (\d\d.\d\d.\d\d\d\d)', txt)
#datatime = parse.parsedtime = update.contenttime = datetime.datetime.strptime(mo.group(1) + ' 21:30', '%d.%m.%Y %H:%M').replace(tzinfo=datatz)

update = fetchhelper.Updater(url, ext='png')
update.check_fetch(rawfile=args.rawfile, binary=True, remotetime=True)
datatime = datetime.datetime.fromtimestamp(os.stat(update.rawfile).st_mtime)

if not os.path.exists('collected'):
    os.mkdir('collected')
shutil.copy(
    update.rawfile,
    'collected/gemeinden_%s.png' % datatime.isoformat(timespec='minutes'))

コード例 #11

0

ファイルを表示

ファイル: fetch.py プロジェクト: sth/covid-19-data

import argparse
import fetchhelper

ap = argparse.ArgumentParser()
fetchhelper.add_arguments(ap)
args = ap.parse_args()

import subprocess, datetime, re, csv, os, sys
import dateutil.tz

datatz = dateutil.tz.gettz('Europe/Stockholm')

#https://services5.arcgis.com/fsYDFeRKu1hELJJs/arcgis/rest/services/FOHM_Covid_19_FME_1/FeatureServer/1/query?f=json&where=Statistikdatum%3E%3Dtimestamp%20%272020-03-26%2023%3A00%3A00%27%20AND%20Statistikdatum%3C%3Dtimestamp%20%272020-03-27%2022%3A59%3A59%27&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&orderByFields=Statistikdatum%20desc&outSR=102100&resultOffset=0&resultRecordCount=2000&cacheHint=true

update = fetchhelper.Updater('https://services5.arcgis.com/fsYDFeRKu1hELJJs/arcgis/rest/services/FOHM_Covid_19_FME_1/FeatureServer/1/query?f=json&where=1%3d1&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&orderByFields=Statistikdatum%20desc&outSR=102100&resultOffset=0&resultRecordCount=2000&cacheHint=true', ext='json')
update.check_fetch(rawfile=args.rawfile)

import json
with open(update.rawfile) as f:
    jd = json.load(f)

#'Statistikdatum': 1581552000000,
areasum = {
    'Blekinge': 0,
    'Dalarna': 0,
    'Gotland': 0,
    'Gävleborg': 0,
    'Halland': 0,
    'Jämtland': 0,
    'Jönköping': 0,

コード例 #12

0

ファイルを表示

ファイル: fetch_2020-05-28T14:00.py プロジェクト: sth/covid-19-data

if args.rawfile:
    args.rawfile = args.rawfile.split(',', 1)
else:
    args.rawfile = (None, None)

import subprocess, datetime, re, csv, os, sys, shutil
import urllib.parse
from bs4 import BeautifulSoup
import dateutil.tz

url = 'https://www.landratsamt-dachau.de/gesundheit-veterinaerwesen-sicherheitsrecht/gesundheit/coronavirus/corona-statistiken/'

datatz = dateutil.tz.gettz('Europe/Berlin')

update = fetchhelper.Updater(url)
update.check_fetch(rawfile=args.rawfile[0])

html = BeautifulSoup(update.rawdata, 'html.parser')

parse = fetchhelper.ParseData(update, 'data')

#txt = str(html.find(text=re.compile('Landkreis-Statistik ')))
#mo = re.search(r'Landkreis-Statistik(?: nach Gemeinden)? für den (\d\d.\d\d.\d\d\d\d)', txt)
#datatime = parse.parsedtime = update.contenttime = datetime.datetime.strptime(mo.group(1) + ' 21:30', '%d.%m.%Y %H:%M').replace(tzinfo=datatz)

iframe = html.find('iframe')
furl = urllib.parse.urljoin(url, iframe['src'])

update_f = fetchhelper.Updater(furl, ext='iframe.html')
update_f.check_fetch(rawfile=args.rawfile[1], remotetime=True)

コード例 #13

0

ファイルを表示

args = ap.parse_args()

import subprocess, datetime, re, csv, os
from datetime import datetime
from bs4 import BeautifulSoup
import dateutil.tz


def cleannum(s):
    return int(s.replace('.', '').rstrip('+*^'))


datatz = dateutil.tz.gettz('Europe/Vienna')

update = fetchhelper.Updater(
    'https://www.sozialministerium.at/Informationen-zum-Coronavirus/Neuartiges-Coronavirus-(2019-nCov).html'
)
update.check_fetch(rawfile=args.rawfile)

html = BeautifulSoup(update.rawdata, 'html.parser')


class NotFound(Exception):
    pass


def parse_counts(parse, base, lead):
    infotext = base.find("div", class_="infobox").find(string=re.compile(lead))
    if infotext is None:
        infotext = base.find("main",
                             id="content").find(string=re.compile(lead))

コード例 #14

0

ファイルを表示

ファイル: fetch_disabled.py プロジェクト: sth/covid-19-data

targetdate = None


@dataclasses.dataclass
class Cases:
    kreis: str
    confirmed: int = 0
    deaths: int = 0
    recovered: int = 0
    date: None = None


newest = []
for kreisid, name in sorted(kreise.items()):
    update = fetchhelper.Updater(
        f'https://www.lzg.nrw.de/covid19/daten/covid19_{kreisid}.csv',
        ext=f'{kreisid}.csv')
    k_rawfile = (None if args.rawfile is None else
                 glob.glob(f'{args.rawfile}.{kreisid}.csv')[0])
    update.check_fetch(rawfile=k_rawfile)
    with open(update.rawfile, 'r', encoding='utf-8-sig') as rf:
        cf = csv.reader(rf)
        header = next(cf)
        cols = coldefs.build(header)
        # newest line is last, so iterate through whole file
        # The data contains several "kummuliert" columns, but those sometimes seem to be rounded
        # Lets hope thats no longer the case
        cases = Cases(kreisid)
        for line in cf:
            fields = cols.get(line)
            cases.confirmed = fields.confirmed

コード例 #15

0

ファイルを表示

class CountryData:
    code: str
    name: str
    timestamp: datetime.datetime
    confirmed: int = None
    deaths: int = None


if args.rawfile is not None:
    args.rawfile = args.rawfile.split(',')
else:
    args.rawfile = [None, None]

countrydata = {}

country_data = fetchhelper.Updater(url_countries, ext='country.json')
country_data.check_fetch(rawfile=args.rawfile[1])
jdat = json.loads(country_data.rawdata)

datatime = datetime.datetime.fromisoformat(
    jdat['metadata']['lastUpdatedAt'].rstrip('Z')).astimezone(
        datetime.timezone.utc)
parses = []
parse = fetchhelper.ParseData(country_data, 'countries')
parse.parsedtime = datatime
with open(parse.parsedfile, 'w') as f:
    cw = csv.writer(f)
    header = ['Code', 'Country', 'Timestamp', 'Confirmed', 'Deaths']
    cw.writerow(header)
    for (code, data) in jdat.items():
        if code == 'metadata':

コード例 #16

0

ファイルを表示

ファイル: fetch.py プロジェクト: sth/covid-19-data

fetchhelper.check_oldfetch(args)

if args.rawfile is None:
    rawfiles = (None, None)
else:
    rawfiles = args.rawfile.split(',')

import datetime, re, csv, json, os, sys, shutil
import dateutil.tz

url_cases = 'https://atlas.jifo.co/api/connectors/41be7d71-7260-497f-a60b-adce5aa9445d'
url_recovered = 'https://atlas.jifo.co/api/connectors/2adaf217-e526-492a-bcad-5ed6ec6ad3ad'

datatz = dateutil.tz.gettz('Europe/Berlin')

update = fetchhelper.Updater(url_cases, ext='cases.json')
update.check_fetch(rawfile=rawfiles[0])
jdat = json.loads(update.rawdata)

header = jdat['data'][0][0]
i_kom = header.index("Ort")
i_con = header.index("Gesamtzahl seit Ausbruch")

parses = []

parse = fetchhelper.ParseData(update, 'data')
parse.parsedtime = datetime.datetime.fromtimestamp(jdat['refreshed'] / 1000,
                                                   tz=datatz)
with open(parse.parsedfile, 'w') as f:
    cw = csv.writer(f)
    cw.writerow(['Kommune', 'Timestamp', 'Confirmed'])

コード例 #17

0

ファイルを表示

ファイル: fetch_disabled.py プロジェクト: sth/covid-19-data

fetchhelper.add_arguments(ap)
args = ap.parse_args()

import subprocess, datetime, re, csv, os
from datetime import datetime
from bs4 import BeautifulSoup
import dateutil.tz


def cleannum(s):
    return int(s.replace(' ', ''))


datatz = dateutil.tz.gettz('America/New York')

update = fetchhelper.Updater(
    'https://covidtracking.com/api/v1/states/current.csv', ext='csv')
update.check_fetch(rawfile=args.rawfile)

parse = fetchhelper.ParseData(update, 'data')
parse.parsedtime = None
with open(update.rawfile) as inf:
    cr = csv.reader(inf)
    header = next(cr)

    selector_labels = ['state', 'lastUpdateEt', 'positive', 'death']
    selector = []
    for sl in selector_labels:
        for (i, h) in enumerate(header):
            if h == sl:
                selector.append(i)
                break

コード例 #18

0

ファイルを表示

ファイル: fetch.py プロジェクト: sth/covid-19-data


def cleannum(s):
    return int(s.replace('.', '').replace(',', '').rstrip('+*^◊'))


datatz = dateutil.tz.gettz('Europe/Vienna')

# page:
url = 'https://www.sozialministerium.at/Informationen-zum-Coronavirus/Neuartiges-Coronavirus-(2019-nCov).html'
# iframe:
url = 'https://info.gesundheitsministerium.gv.at/?re=tabelle'
# csv:
url = 'https://info.gesundheitsministerium.gv.at/data/timeline-faelle-bundeslaender.csv'

update = fetchhelper.Updater(url, ext='csv')
update.check_fetch(rawfile=args.rawfile, checkdh=False)

coldefs = csvtools.CSVColumns(
    timestamp=['Datum'],
    area=['Name'],
    confirmed=['BestaetigteFaelleBundeslaender'],
    deaths=['Todesfaelle'],
    recovered=['Genesen'],
    hospital=["Hospitalisierung"],
    intensive=["Intensivstation"],
    tests=["Testungen"],
)
coldefs.set_type('timestamp', datetime.fromisoformat)
coldefs.set_type('confirmed', int)
coldefs.set_type('deaths', int)

コード例 #19

0

ファイルを表示

ファイル: fetch_2020-05-12T12:00.py プロジェクト: sth/covid-19-data

import argparse
import fetchhelper

ap = argparse.ArgumentParser()
fetchhelper.add_arguments(ap)
args = ap.parse_args()

import subprocess, datetime, re, csv, os, sys
from bs4 import BeautifulSoup
import dateutil.tz

datatz = dateutil.tz.gettz('Europe/Berlin')

update = fetchhelper.Updater(
    'https://www.landratsamt-dachau.de/gesundheit-veterinaerwesen-sicherheitsrecht/gesundheit/coronavirus/'
)
update.check_fetch(rawfile=args.rawfile)

# accidentally duplicated <tr> and other hrml errors
html = BeautifulSoup(update.rawdata, 'html.parser')

parse = fetchhelper.ParseData(update, 'data')

txt = str(html.find(text=re.compile('Landkreis-Statistik ')))
mo = re.search(
    r'Landkreis-Statistik(?: nach Gemeinden)? für den (\d\d.\d\d.\d\d\d\d)',
    txt)
datatime = parse.parsedtime = update.contenttime = datetime.datetime.strptime(
    mo.group(1) + ' 21:30', '%d.%m.%Y %H:%M').replace(tzinfo=datatz)

コード例 #20

0

ファイルを表示

ファイル: fetch_disabled.py プロジェクト: sth/covid-19-data

args = ap.parse_args()

import subprocess, datetime, re, csv, os
from datetime import datetime
from bs4 import BeautifulSoup
import dateutil.tz


def cleannum(s):
    return int(s.replace(' ', ''))


datatz = dateutil.tz.gettz('Europe/Paris')

update = fetchhelper.Updater(
    'https://www.santepubliquefrance.fr/maladies-et-traumatismes/maladies-et-infections-respiratoires/infection-a-coronavirus/articles/infection-au-nouveau-coronavirus-sars-cov-2-covid-19-france-et-monde'
)
update.check_fetch(rawfile=args.rawfile)
if args.only_changed:
    if not update.raw_changed():
        print("downloaded raw data unchanged")
        exit(0)

html = BeautifulSoup(update.rawdata, 'html.parser')

tab = html.find(
    string=re.compile('R.*gion de notification')).find_parent('table')

datestr = tab.find_previous('h4').get_text()
mo = re.search('(\d\d/\d\d/\d\d\d\d) à (\d\d)h', datestr)

コード例 #21

0

ファイルを表示

class CountryData:
    code: str
    name: str
    timestamp: datetime.datetime
    confirmed: int = None
    deaths: int = None


if args.rawfile is not None:
    args.rawfile = args.rawfile.split(',')
else:
    args.rawfile = [None, None]

countrydata = {}

update_buckets = fetchhelper.Updater(url_buckets, ext='buckets.xml')
update_buckets.check_fetch(rawfile=args.rawfile[0])

xdoc = lxml.etree.fromstring(update_buckets.rawdata)
newest_name = None
newest_time = None
for blob in xdoc.xpath('//Blob'):
    name = blob.xpath('./Name')[0].text
    if not re.match(r'data_.*\.json', name):
        continue
    modified = datetime.datetime.strptime(
        blob.xpath('./Properties/Last-Modified')[0].text,
        '%a, %d %b %Y %H:%M:%S %Z')
    if newest_time is None or modified > newest_time:
        newest_name = name
        newest_time = modified