def parse_date(date): return datetime_isoformat('%s-%d' % (date, datetime.now().year), '%d-%b-%Y')
#!/usr/bin/env python import re import sys from pandas import read_csv from utils import ROOT, datetime_isoformat, read_metadata, safe_int_cast data = read_csv('https://ocgptweb.azurewebsites.net/CSVDownload') data = data.drop(columns=['CountryName', 'ConfirmedCases', 'ConfirmedDeaths']) data = data.drop( columns=[col for col in data.columns if col.endswith('_Notes')]) data = data.drop( columns=[col for col in data.columns if col.endswith('_IsGeneral')]) data['Date'] = data['Date'].apply(lambda x: datetime_isoformat(x, '%Y%m%d')) # Join with ISO data iso = read_csv(ROOT / 'input' / 'ISO-3166-2.csv')[['3166-2-Alpha-2', '3166-2-Alpha-3']] data = data.rename(columns={'CountryCode': '3166-2-Alpha-3'}).merge(iso) # Join with our metadata metadata = read_metadata()[['Key', 'CountryCode']] data = data.rename(columns={'3166-2-Alpha-2': 'CountryCode'}).merge(metadata) # Use consistent naming convention for columns data = data[[ col for col in data.columns if '_' in col or col in ('Date', 'Key', 'StringencyIndex') ]] data.columns = [col.split('_')[-1] for col in data.columns] data.columns = [
# Retrieve the CSV files from https://covid19.isciii.es df = read_argv(encoding='ISO-8859-1').rename(columns={ 'FECHA': 'Date', 'CCAA': 'RegionCode', 'Fallecidos': 'Deaths' }).dropna(subset=['Date']) # Add the country code to all records df['CountryCode'] = 'ES' # Confirmed cases are split across 3 columns confirmed_columns = ['CASOS', 'PCR+', 'TestAc+'] for col in confirmed_columns: df[col] = df[col].fillna(0) df['Confirmed'] = df.apply(lambda x: sum([x[col] for col in confirmed_columns]), axis=1) # Convert dates to ISO format df['Date'] = df['Date'].apply( lambda date: datetime_isoformat(date, '%d/%m/%Y')) # Country-wide is the sum of all regions region_level = df country_level = df.groupby(['Date', 'CountryCode']).sum().reset_index() # Output the results dataframe_output(country_level) dataframe_output(region_level, 'ES')