#!/usr/bin/env python ''' This script loads the latest JSON from covidtracking.com website and extracts the confirmed cases, deaths and total tests for each state. Credit to the covidtracking.com team for scraping the data from each state. ''' import sys import datetime from utils import github_raw_dataframe, dataframe_output # Read CSV file from covidtracking's GitHub project df = github_raw_dataframe('COVID19Tracking/covid-tracking-data', 'data/states_daily_4pm_et.csv') # Rename the appropriate columns df = df.rename( columns={ 'date': 'Date', 'state': 'RegionCode', 'positive': 'Confirmed', 'death': 'Deaths', 'total': 'Tested' }) # Convert date to ISO format df['Date'] = df['Date'].apply(lambda date: datetime.datetime.strptime( str(date), '%Y%m%d').date().isoformat()) # Output the results
Credit to the github.com/BlankerL team for scraping the data from DXY.cn. ''' import os import datetime from pathlib import Path import requests from utils import github_raw_dataframe, dataframe_output, timezone_adjust # Root path of the project ROOT = Path(os.path.dirname(__file__)) / '..' # Read DXY CSV file from website df = github_raw_dataframe('BlankerL/DXY-COVID-19-Data', 'csv/DXYArea.csv') # Adjust 7 hour difference between China's GMT+8 and GMT+1 df['Date'] = df['updateTime'].apply(lambda date: timezone_adjust(date, 7)) # Rename the appropriate columns df = df.rename( columns={ 'countryEnglishName': 'CountryName', 'provinceEnglishName': 'RegionName', 'province_confirmedCount': 'Confirmed', 'province_deadCount': 'Deaths', 'province_curedCount': 'Recovered' }) # Filter China data only
#!/usr/bin/env python from pandas import DataFrame from utils import github_raw_dataframe, dataframe_output # Read data from GitHub repo df = github_raw_dataframe('tomwhite/covid-19-uk-data', 'data/covid-19-indicators-uk.csv') # Aggregate time series data into relational format records = [] for idx, rows in df.groupby(['Date', 'Country']): records.append({ 'Date': idx[0], 'Country': idx[1], **{ record.loc['Indicator']: record.loc['Value'] for _, record in rows.iterrows() } }) df = DataFrame.from_records(records).rename(columns={ 'Country': '_RegionLabel', 'ConfirmedCases': 'Confirmed' }) # Output the results dataframe_output(df, 'GB')
import os import sys from pathlib import Path from datetime import datetime from numpy import unique from pandas import DataFrame from utils import github_raw_dataframe, dataframe_output # Root path of the project ROOT = Path(os.path.dirname(__file__)) / '..' # Read CSV file from GitHub project df = github_raw_dataframe('jgehrcke/covid-19-germany-gae', 'data.csv') # Rename the appropriate columns df = df.rename(columns={'time_iso8601': 'Date'}) # Convert dates to ISO format df['Date'] = df['Date'].apply( lambda date: datetime.fromisoformat(date).date().isoformat()) # Get a list of all region codes regions = unique([col[3:5] for col in df.columns if col.startswith('DE-')]) # Transform the data from non-tabulated format to our record format records = [] for idx, row in df.iterrows(): record = {'Date': row['Date'], 'CountryCode': 'DE'}
from datetime import datetime, timedelta import pandas from utils import parse_level_args, github_raw_dataframe, dataframe_output, merge_previous # Root path of the project ROOT = Path(os.path.dirname(__file__)) / '..' # This script can parse both region-level and country-level data is_region = parse_level_args(sys.argv[1:]).level == 'region' # Confirmed and deaths come from different CSV files, parse them separately first confirmed = github_raw_dataframe( 'datadista/datasets', 'COVID%2019/ccaa_covid19_casos_long.csv').rename(columns={ 'fecha': 'Date', 'CCAA': '_RegionLabel', 'total': 'Confirmed' }) deaths = github_raw_dataframe( 'datadista/datasets', 'COVID%2019/ccaa_covid19_fallecidos_long.csv').rename(columns={ 'fecha': 'Date', 'CCAA': '_RegionLabel', 'total': 'Deaths' }) # Now we can simply join them into the same table df = confirmed.merge(deaths) # Parse date into a datetime object df['Date'] = df['Date'].apply(lambda date: datetime.fromisoformat(date).date())
import os from pathlib import Path from datetime import datetime from pandas import DataFrame from utils import github_raw_dataframe, dataframe_output, merge_previous # Root path of the project ROOT = Path(os.path.dirname(__file__)) / '..' df = github_raw_dataframe('covid-19-au/covid-19-au.github.io', 'src/data/state.json', branch='prod').transpose() # Transform the data from non-tabulated format to record format records = [] for idx, row in df.iterrows(): for code in df.columns: data = row[code] record = { 'Date': idx.date().isoformat(), 'RegionCode': code, 'Confirmed': data[0] } if len(data) > 1: record['Deaths'] = data[1] if len(data) > 2: record['Recovered'] = data[2] if len(data) > 3: record['Tested'] = data[3] records.append(record) df = DataFrame.from_records(records) # Output the results
import os import sys import datetime from pathlib import Path import pandas as pd from utils import github_raw_dataframe, dataframe_output, pivot_table, ROOT df = github_raw_dataframe('carranco-sga/Mexico-COVID-19', 'Mexico_COVID19.csv') df = df.rename(columns={'Fecha': 'Date'}).set_index('Date') deaths_columns = [col for col in df.columns if col.endswith('_D')] confirmed_columns = [col[:-2] for col in deaths_columns] deaths = df[deaths_columns] confirmed = df[confirmed_columns] deaths.columns = confirmed.columns deaths = pivot_table( deaths, pivot_name='RegionCode').rename(columns={'Value': 'Deaths'}) confirmed = pivot_table( confirmed, pivot_name='RegionCode').rename(columns={'Value': 'Confirmed'}) df = confirmed.merge(deaths).sort_values(['Date', 'RegionCode']) # Output the results dataframe_output(df, 'MX')
#!/usr/bin/env python from datetime import datetime import pandas from utils import github_raw_dataframe, dataframe_output # Read data from GitHub repo df = github_raw_dataframe('dssg-pt/covid19pt-data', 'data.csv') df['Date'] = df['data'].apply( lambda date: datetime.strptime(date, '%d-%m-%Y').date().isoformat()) # Extract regions from the data regions = [ col.split('_')[-1] for col in df.columns if col.startswith('confirmados_') ] regions = [ region for region in regions if len(region) > 2 and region not in ('novos', 'estrangeiro') ] # Aggregate regions into a single data frame subsets = [] for region in regions: subset = df[['Date', 'confirmados_%s' % region, 'obitos_%s' % region]] subset = subset.copy() subset['_RegionLabel'] = region.replace('ars', '') subset = subset.rename(columns={ 'confirmados_%s' % region: 'Confirmed', 'obitos_%s' % region: 'Deaths' }) subsets.append(subset)
import sys from pathlib import Path from datetime import datetime, timedelta import pandas from utils import parse_level_args, github_raw_dataframe, dataframe_output, merge_previous # Root path of the project ROOT = Path(os.path.dirname(__file__)) / '..' # This script can parse both region-level and country-level data is_region = parse_level_args(sys.argv[1:]).level == 'region' if is_region: df = github_raw_dataframe('pcm-dpc/COVID-19', 'dati-json/dpc-covid19-ita-regioni.json') else: df = github_raw_dataframe( 'pcm-dpc/COVID-19', 'dati-json/dpc-covid19-ita-andamento-nazionale.json') df = df.rename( columns={ 'data': 'Date', 'totale_casi': 'Confirmed', 'deceduti': 'Deaths', 'tamponi': 'Tested' }) if is_region: df['_RegionLabel'] = df['denominazione_regione']
import os from pathlib import Path from datetime import datetime from pandas import DataFrame from utils import github_raw_dataframe, dataframe_output # Root path of the project ROOT = Path(os.path.dirname(__file__)) / '..' # Read data from GitHub repo confirmed = github_raw_dataframe('elhenrico/covid19-Brazil-timeseries', 'confirmed-cases.csv') deaths = github_raw_dataframe('elhenrico/covid19-Brazil-timeseries', 'deaths.csv') for df in (confirmed, deaths): df.rename(columns={'Unnamed: 1': 'RegionCode'}, inplace=True) df.set_index('RegionCode', inplace=True) # Transform the data from non-tabulated format to record format records = [] for region_code in confirmed.index.unique(): for col in confirmed.columns[1:]: date = col + '/' + str(datetime.now().year) date = datetime.strptime(date, '%d/%m/%Y').date().isoformat() records.append({ 'Date': date, 'RegionCode': region_code, 'Confirmed': confirmed.loc[region_code, col], 'Deaths': confirmed.loc[region_code, col]
import os import sys from pathlib import Path from datetime import datetime from numpy import unique from pandas import DataFrame, isna from utils import github_raw_dataframe, dataframe_output # Root path of the project ROOT = Path(os.path.dirname(__file__)) / '..' # Read CSV file from GitHub project df = github_raw_dataframe('opencovid19-fr/data', 'dist/chiffres-cles.csv') # Rename the appropriate columns df = df.rename(columns={ 'date': 'Date', 'cas_confirmes': 'Confirmed', 'deces': 'Deaths' }) # Filter rows to those that are region-level df = df[df['granularite'] == 'region'] df['_RegionLabel'] = df['maille_nom'] # There may be more than one row per region if it has multiple sources g = df[['Date', '_RegionLabel', 'Confirmed', 'Deaths']].groupby(['Date', '_RegionLabel'])
#!/usr/bin/env python from datetime import datetime from pandas import DataFrame from utils import parse_level_args, github_raw_dataframe, dataframe_output, merge_previous # Confirmed and deaths come from different CSV files, parse them separately first confirmed = github_raw_dataframe( 'cedricguadalupe/FRANCE-COVID-19', 'france_coronavirus_time_series-confirmed.csv') deaths = github_raw_dataframe('cedricguadalupe/FRANCE-COVID-19', 'france_coronavirus_time_series-deaths.csv') for df in (confirmed, deaths): df.set_index('Date', inplace=True) # Iterate through all date-region combinations regions = confirmed.columns.tolist() df = DataFrame( columns=['Date', '_RegionLabel', 'Confirmed', 'Deaths']).set_index( ['Date', '_RegionLabel']) for region in regions: if region == 'Total': continue for date, value in zip(confirmed.index, confirmed[region]): df.loc[(date, region), 'Confirmed'] = value for date, value in zip(deaths.index, deaths[region]): df.loc[(date, region), 'Deaths'] = value # Dates need converted to ISO format df = df.sort_values(['Date', '_RegionLabel']).reset_index() df['Date'] = df['Date'].apply( lambda date: datetime.strptime(date, '%d/%m/%Y').date().isoformat())