def date_from_text(date_str): new_date = parse_date(date_str) day = new_date.split("T", 1)[0].split('-', 2) day = datetime.date(int(day[0]), int(day[1]), int(day[2])) return day
'district_id': '1909', 'population': 47926, }, 'Zurzach': { 'pattern': r'^Z.+zach.*', 'district_id': '1911', 'population': 34650, }, } data_url = 'https://www.ag.ch/de/themen_1/coronavirus_2/lagebulletins/lagebulletins_1.jsp' d = sc.download(data_url, silent=True) soup = BeautifulSoup(d, 'html.parser') img_caption = soup.find(string=re.compile(r".*Inzidenz pro 100'000 Einwohner nach Bezirke.*")) img_date = sc.find(r'\(Stand:?\s+(.*\d{4})', img_caption.string) img_date = datetime.datetime.fromisoformat(parse_date(img_date).split('T', 1)[0]) img_url = img_caption.find_previous('img')['src'] img_url = 'https://www.ag.ch/media/kanton_aargau/themen_1/coronavirus_1/bilder_11/daten/Inzidenz_pro_100K_Einwohner_content_large.jpg' if not img_url.startswith('http'): img_url = f'https://www.ag.ch{img_url}' # download the image to a temporary file _, path = tempfile.mkstemp(suffix='.jpg') sc.download_file(img_url, path) # convert to binary image img = cv2.imread(path) gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) gray, img_bin = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) gray = cv2.bitwise_not(img_bin)
v = v.strip() # Ignore k or v, if v is "None" if v == "None": print(f'WARNING: {k} is None', file=sys.stderr) warns.append(f"{k} is None") continue if k == "Downloading": url_sources.append(v) continue if k == "Scraped at": scrape_time = v continue if k == "Date and time": new_date = parse_date(v) parts = new_date.split("T", 1) day = parts[0].split('-', 2) day = datetime.date(int(day[0]), int(day[1]), int(day[2])) if parts[1] == '24:00': day = day + datetime.timedelta(days=1) new_date = f"{day.isoformat()}T00:00" now = datetime.date.today() if day > now: print(f"Parsed date/time must not be in the future: parsed: {day}: now: {now}", file=sys.stderr) errs.append(f"Date {day} in the future") # In case there are multiple "Date and time", use first one, # or the one which is more specific (includes time). if date is None or len(new_date) > len(date):
}, 'Zurzach': { 'pattern': r'^Z.+zach.*', 'district_id': '1911', 'population': 34650, }, } data_url = 'https://www.ag.ch/de/themen_1/coronavirus_2/lagebulletins/lagebulletins_1.jsp' d = sc.download(data_url, silent=True) soup = BeautifulSoup(d, 'html.parser') img_caption = soup.find( string=re.compile(r".*Inzidenz pro 100'000 Einwohner nach Bezirke.*")) img_date = sc.find(r'\(Stand:?\s+(.*\d{4})', img_caption.string) img_date = datetime.datetime.fromisoformat( parse_date(img_date).split('T', 1)[0]) img_url = img_caption.find_previous('img')['src'] img_url = 'https://www.ag.ch/media/kanton_aargau/themen_1/coronavirus_1/bilder_11/daten/Inzidenz_pro_100K_Einwohner_content_large.jpg' if not img_url.startswith('http'): img_url = f'https://www.ag.ch{img_url}' # download the image to a temporary file _, path = tempfile.mkstemp(suffix='.jpg') sc.download_file(img_url, path) # convert to binary image img = cv2.imread(path) gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) gray, img_bin = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) gray = cv2.bitwise_not(img_bin)