def handle_il(res, mapping): state = 'IL' state_name = 'Illinois' mapped = {} # main dashboard for county in res[0]['characteristics_by_county']['values']: if county['County'] == state_name: mapped = map_attributes(county, mapping, state) last_update = res[0]['LastUpdateDate'] y = last_update['year'] m = last_update['month'] d = last_update['day'] timestamp = datetime(y, m, d).timestamp() mapped[Fields.TIMESTAMP.name] = timestamp # probable partial = map_attributes(res[0].get('probable_case_counts', {}), mapping, state) mapped.update(partial) # hospital data hosp_data = res[1]['statewideValues'] hosp_mapped = map_attributes(hosp_data, mapping, state) mapped.update(hosp_mapped) # fill in the date last_update = res[1]['lastUpdatedDate'] y = last_update['year'] m = last_update['month'] d = last_update['day'] updated = datetime(y, m, d, 23, 59).strftime("%m/%d/%Y %H:%M:%S") mapped[Fields.DATE.name] = updated return mapped
def handle_mo(res, mapping): testing = res[0] foo = testing[testing['Test Date'] == 'All'].groupby('Measure Names', as_index=True).sum() mapped = map_attributes(foo['Measure Values'], mapping, 'MO') county = res[1] county = county[county['County'] == 'All'].set_index('Measure Names') partial = map_attributes(county['Measure Values'].astype(int), mapping, 'MO') mapped.update(partial) return mapped
def handle_ma(res, mapping): tagged = {} files = ['DeathsReported.csv', 'Testing2.csv', 'Cases.csv'] with MaContextManager(res[0]) as zipdir: for filename in files: with open(os.path.join(zipdir, filename), 'r') as csvfile: reader = csv.DictReader(csvfile, dialect='unix') rows = list(reader) for rn in range(1, len(rows)): # find last non-empty if rows[-rn]['Date']: last_row = rows[-rn] break partial = map_attributes(last_row, mapping, 'MA') tagged.update(partial) inverse_mapping = {v: k for k, v in mapping.items()} keys = [Fields.HOSP.name, Fields.DEATH.name, Fields.POSITIVE.name] keys = [inverse_mapping[k] for k in keys] df = pd.read_excel( open(os.path.join(zipdir, 'Hospitalization from Hospitals.xlsx'), 'rb')) partial = map_attributes(df.iloc[-1].to_dict(), mapping, 'MA') tagged.update(partial) df = pd.read_excel( open(os.path.join(zipdir, 'TestingByDate.xlsx'), 'rb')) tagged[Fields.SPECIMENS_POS.name] = df.sum( )['All Positive Molecular Tests'] # weekly report: with MaContextManager(res[0], "Weekly Public Health Report - Raw Data", file_type='xls') as tmpfile: df = pd.read_excel(tmpfile, sheet_name=None) # recovered rec = df['Quarantine and Isolation'] recovered = rec[ rec['Status'] == 'Total Cases Released from Isolation'].iloc[-1]['Residents'] tagged[Fields.RECOVERED.name] = recovered antibody = df['Antibody'].sum() tagged[Fields.ANTIBODY_TOTAL_PEOPLE.name] = antibody['Total Tests'] tagged[Fields.ANTIBODY_POS_PEOPLE.name] = antibody['Positive Tests'] hosp = df['RaceEthnicity'] maxdate = hosp['Date'].max() tagged[Fields.HOSP.name] = hosp[hosp['Date'] == maxdate].sum()['Ever Hospitaltized'] return tagged
def handle_nd(res, mapping): soup = res[0] tagged = {} # Serology testing table = soup.find('table') rows = table.find_all('tr') titles = rows[0] data = rows[1].find_all('td') for i, title in enumerate(titles.find_all("td")): title = title.get_text(strip=True) if title in mapping: value = atoi(data[i].get_text(strip=True)) tagged[mapping[title]] = value # confirmed+probable death h2_death = soup.find("h2", string=re.compile("Deaths")) death_table = h2_death.find_next("table") for tr in death_table.find_all("tr"): cols = tr.find_all("td") if len(cols) < 2: continue strong = cols[0].find("strong") if not strong or len(strong.get_text()) < 10: continue name = strong.get_text(strip=True) value = atoi(cols[1].get_text(strip=True)) if len(cols) > 2: value += atoi(cols[2].get_text(strip=True)) if name in mapping: tagged[mapping[name]] = atoi(value) # by county testing snapshot: for negatives county_testing = res[1] columns = [ k for k, v in mapping.items() if v in [ Fields.CONFIRMED.name, Fields.NEGATIVE.name, Fields.DEATH_CONFIRMED.name ] ] values = csv_sum(county_testing, columns=columns) tagged.update(map_attributes(values, mapping)) # PCR encounters and other metrics pcr = res[2] partial = map_attributes(pcr.sum(), mapping) tagged.update(partial) # active hosp/icu should not be summed hosp = pcr.groupby('Date').sum().filter(like='Active').iloc[-1] tagged.update(map_attributes(hosp, mapping)) return tagged
def handle_ma(res, mapping): tagged = {} df = MaRawData(res[0]) tabs = ['DeathsReported (Report Date)', 'Testing2 (Report Date)', 'Cases (Report Date)', 'Hospitalization from Hospitals'] for tab in tabs: # report the last row partial = map_attributes(df[tab].iloc[-1], mapping, 'MA') tagged.update(partial) # positive pcr tests = df['TestingByDate (Test Date)'].filter(like='All Positive') tagged[Fields.SPECIMENS_POS.name] = tests.sum()['All Positive Molecular Tests'] hosp = df['RaceEthnicityLast2Weeks'] maxdate = hosp['Date'].max() tagged[Fields.HOSP.name] = hosp[hosp['Date'] == maxdate].sum()['Ever Hospitaltized'] # weekly report: df = MaRawData(res[0], "Weekly Public Health Report - Raw Data") # recovered rec = df['Quarantine and Isolation'].sort_values('Date') recovered = rec[rec['Status'] == 'Total Cases Released from Isolation'].iloc[-1]['Residents'] tagged[Fields.RECOVERED.name] = recovered antibody = df['Antibody'].sum() tagged[Fields.ANTIBODY_TOTAL_PEOPLE.name] = antibody['Total Tests'] tagged[Fields.ANTIBODY_POS_PEOPLE.name] = antibody['Positive Tests'] return tagged
def handle_fl(res, mapping): '''Need to add the non-FL residents to the totals: they separate it for death and hosp" ''' mapped = extract_arcgis_attributes(res[1], mapping, 'FL') pcr = map_attributes(res[0], mapping, 'FL') mapped.update(pcr) extra_hosp = 0 extra_death = 0 stats = res[1] try: extra_hosp = stats['features'][0]['attributes']['SUM_C_HospYes_NonRes'] extra_death = stats['features'][0]['attributes']['SUM_C_NonResDeaths'] except Exception as ex: logging.warning("Failed Florida extra processing: ", ex) raise mapped[Fields.HOSP.name] += extra_hosp mapped[Fields.DEATH.name] += extra_death # Current hosp csv hosp = res[2] for r in hosp: if r.get('County') == 'All': mapped[Fields.CURR_HOSP.name] = atoi(r.get('COVID Hospitalizations')) return mapped
def handle_ok(res, mapping): mapped = {} for result in res[:-1]: partial = map_attributes(result, mapping, 'OK') mapped.update(partial) # need to sum all values res = res[1] # sum all fields # TODO: functools probably has something nice cols = ['Cases', 'Deaths', 'Recovered'] summed = csv_sum(res, cols) mapped.update(map_attributes(summed, mapping, 'OK')) mapped[Fields.DATE.name] = res[0].get('ReportDate') return mapped
def handle_ma(res, mapping): tagged = {} files = ['DeathsReported.csv', 'Testing2.csv', 'Hospitalization from Hospitals.csv', 'Cases.csv'] with MaContextManager(res) as zipdir: for filename in files: with open(os.path.join(zipdir, filename), 'r') as csvfile: reader = csv.DictReader(csvfile, dialect='unix') rows = list(reader) last_row = rows[-1] partial = map_attributes(last_row, mapping, 'MA') tagged.update(partial) hosp_key = "" for k, v in mapping.items(): if v == Fields.HOSP.name: hosp_key = k hospfile = csv.DictReader(open(os.path.join(zipdir, "RaceEthnicity.csv"), 'r')) hosprows = list(hospfile) last_row = hosprows[-1] hosprows = [x for x in hosprows if x['Date'] == last_row['Date']] summed = csv_sum(hosprows, [hosp_key]) tagged[Fields.HOSP.name] = summed[hosp_key] return tagged
def handle_tx(res, mapping): tagged = {} for result in res[:-3]: partial = extract_arcgis_attributes(result, mapping, debug_state='TX') tagged.update(partial) # dashboard totals totals = res[-3] tagged.update( map_attributes( { x['attributes']['TestType']: x['attributes']['Count_'] for x in totals['features'] }, mapping, 'TX')) # positive pcr pcr_pos = res[-2] val = sum(pcr_pos['features'][0]['attributes'].values()) tagged[Fields.SPECIMENS_POS.name] = val # last item is the current ICU DataFrame df = res[-1] icu = df.loc[df[df.columns[0]] == 'Total'][df.columns[-1]].iloc[-1] tagged[Fields.CURR_ICU.name] = icu return tagged
def handle_ak(res, mapping): # collect values res = res[0].get('features', []) collected = { x['attributes']['Race']: x['attributes']['CountCases'] for x in res } return map_attributes(collected, mapping, 'AK')
def handle_in(res, mapping): daily_stats = res[0] mapped = map_attributes(daily_stats, mapping, 'IN') # Base data is in "daily statistics", hosp data in "data" for metric_category in ['data', 'daily_statistics']: df = pd.DataFrame(daily_stats['metrics'][metric_category]) df = df[df['district_type'] == 's'] # assuming "s" stands for State last_row = df.iloc[-1] # assume either unique or sorted by date (that's what we know) for k, v in last_row.iteritems(): if k in mapping: mapped[mapping[k]] = v partial = map_attributes(res[1]['result']['records'][0], mapping, 'IN') mapped.update(partial) return mapped
def handle_va(res, mapping): '''Getting multiple CVS files from the state and parsing each for the specific data it contains ''' tagged = {} # Res: # 0 -- cases & death, probable & confirmed # 1 -- testing info # 2 -- hospital/icu/vent cases = res[0] testing = res[1] hospital = res[2] date_format = "%m/%d/%Y" # Cases # expecting 2 rows in the following format # Report Date,Case Status,Number of Cases,Number of Hospitalizations,Number of Deaths # 5/14/2020,Probable,1344,24,28 # 5/14/2020,Confirmed,26469,3568,927 PROB = 'Probable' CONF = 'Confirmed' for row in cases: if (row['Case Status'] == CONF): for k, v in row.items(): if (k in mapping): tagged[mapping[k]] = atoi(v) elif (row['Case Status'] == PROB): tagged[Fields.PROBABLE.name] = atoi(row['Number of Cases']) tagged[Fields.DEATH_PROBABLE.name] = atoi(row['Number of Deaths']) tagged[Fields.POSITIVE. name] = tagged[Fields.CONFIRMED.name] + tagged[Fields.PROBABLE.name] # sum everything testing_cols = [ 'Number of PCR Testing Encounters', 'Number of Positive PCR Tests', 'Total Number of Testing Encounters', 'Total Number of Positive Tests' ] summed_testing = csv_sum(testing, testing_cols) tagged[Fields.SPECIMENS.name] = summed_testing[testing_cols[0]] tagged[Fields.SPECIMENS_POS.name] = summed_testing[testing_cols[1]] tagged[Fields.ANTIBODY_TOTAL.name] = summed_testing[ testing_cols[2]] - summed_testing[testing_cols[0]] tagged[Fields.ANTIBODY_POS.name] = summed_testing[ testing_cols[3]] - summed_testing[testing_cols[1]] # Hospitalizations hospital = sorted(hospital, key=lambda x: datetime.strptime(x['Date'], date_format), reverse=True) mapped_hosp = map_attributes(hospital[0], mapping, 'VA') tagged.update(mapped_hosp) return tagged
def handle_me(res, mapping): tagged = {} # summary csv from tableau df = res[0] df = df[df['Patient County'] == 'All'].set_index('Measure Names') partial = map_attributes(df['Measure Values'], mapping, 'ME') tagged.update(partial) # hospital capacity df = res[1] partial = map_attributes(df.iloc[0], mapping, 'ME') tagged.update(partial) soup = res[-1] th = soup.find( "th", string=re.compile("Results from Labs Reporting Electronically")) table = th.find_parent('table') for tr in table.find_all('tr'): tds = tr.find_all('td') if len(tds) < 3: continue name = tds[0].get_text(strip=True) if name not in ['Positive', 'Negative', 'Total']: continue antibody_val = atoi(tds[1].get_text(strip=True)) antigen_val = atoi(tds[2].get_text(strip=True)) pcr_val = atoi(tds[3].get_text(strip=True)) if name == 'Positive': tagged[Fields.ANTIBODY_POS.name] = antibody_val tagged[Fields.ANTIGEN_POS.name] = antigen_val tagged[Fields.SPECIMENS_POS.name] = pcr_val elif name == 'Negative': tagged[Fields.ANTIBODY_NEG.name] = antibody_val tagged[Fields.ANTIGEN_NEG.name] = antigen_val tagged[Fields.SPECIMENS_NEG.name] = pcr_val elif name == 'Total': tagged[Fields.ANTIBODY_TOTAL.name] = antibody_val tagged[Fields.ANTIGEN_TOTAL.name] = antigen_val tagged[Fields.SPECIMENS.name] = pcr_val return tagged
def handle_de(res, mapping): df = res[0] df['Date'] = pd.to_datetime(df[['Year', 'Month', 'Day']]) df = df[(df['Unit'] == 'people') & (df['Statistic'].isin(mapping.keys()))] max_date = df['Date'].max() df = df[df['Date'] == max_date] df = df.set_index('Statistic') mapped = map_attributes(df['Value'], mapping, 'DE') mapped.update({Fields.DATE.name: max_date}) return mapped
def handle_ok(res, mapping): # need to sum all values res = res[0] # sum all fields # TODO: functools probably has something nice cols = ['Cases', 'Deaths', 'Recovered'] summed = csv_sum(res, cols) mapped = map_attributes(summed, mapping, 'OK') mapped[Fields.DATE.name] = res[0].get('ReportDate') return mapped
def handle_de(res, mapping): df = res[0] df['Date'] = pd.to_datetime(df[['Year', 'Month', 'Day']]) people = df[(df['Unit'].isin(['people'])) & (df['Statistic'].isin(mapping.keys()))] max_date = df['Date'].max() people = people[people['Date'] == max_date] people = people.set_index('Statistic') mapped = map_attributes(people['Value'], mapping, 'DE') mapped.update({Fields.DATE.name: max_date}) # Here's the funny thing, while we need to *max* date for most metrics, we need # a 2-day lagged testing metric tests = df[(df['Unit'].isin(['tests'])) & (df['Statistic'].isin(mapping.keys()))] tests_date = tests['Date'].sort_values().unique()[-3] tests = tests[tests['Date'] == tests_date] tests = tests.set_index('Statistic') partial = map_attributes(tests['Value'], mapping, 'DE') mapped.update(partial) return mapped
def handle_sc(res, mapping): tagged = {} for result in res[:-1]: partial = extract_arcgis_attributes(result, mapping, debug_state='SC') tagged.update(partial) # testing df = res[-1] df = df.unstack() df.index = df.index.map("-".join) partial = map_attributes(df, mapping) tagged.update(partial) return tagged
def handle_fl(res, mapping): '''Need to add the non-FL residents to the totals: they separate it for death and hosp" ''' mapped = map_attributes(res[0], mapping, 'FL') for result in res[1:-1]: partial = extract_arcgis_attributes(result, mapping, 'FL') mapped.update(partial) # pcr encounters result = res[-1].get('features', [{}])[0].get('attributes') mapped[Fields.PCR_TEST_ENCOUNTERS.name] = sum(result.values()) return mapped
def handle_la(res, mapping): tagged = {} for stats in res[:2]: if 'features' in stats and len(stats['features']) > 0: attributes = stats['features'] attributes = {attr.get('attributes', {}).get('Measure'): attr.get('attributes', {}).get('SUM_Value') for attr in attributes} tagged.update(map_attributes(attributes, mapping, 'LA')) # everything else for result in res[2:]: partial = extract_arcgis_attributes(result, mapping, 'LA') tagged.update(partial) return tagged
def handle_fl(res, mapping): '''Need to add the non-FL residents to the totals: they separate it for death and hosp" ''' mapped = map_attributes(res[0], mapping, 'FL') for result in res[1:-1]: partial = extract_arcgis_attributes(result, mapping, 'FL') mapped.update(partial) # Current hosp csv hosp = res[-1] for r in hosp: if r.get('County') == 'All': mapped[Fields.CURR_HOSP.name] = atoi(r.get('COVID Hospitalizations')) return mapped
def handle_ga(res, mapping): tagged = {} for result in res[:-1]: partial = extract_arcgis_attributes(result, mapping, debug_state='GA') tagged.update(partial) tagged[Fields.CURR_HOSP.name] += tagged.pop('CURR_HOSP_PUI') # last item is zip files = ["total_testing.csv", "summary_totals.csv"] with zipContextManager(res[-1]) as zipdir: for filename in files: summary = csv.DictReader(open(os.path.join(zipdir, filename), 'r')) summary = list(summary) summary = summary[-1] partial = map_attributes(summary, mapping, 'GA') tagged.update(partial) return tagged
def handle_la(res, mapping): stats = res[0] state_tests = 'STATE_TESTS' tagged = {} if 'features' in stats and len(stats['features']) > 0: attributes = stats['features'] attributes = { attr.get('attributes', {}).get('Measure'): attr.get('attributes', {}).get('SUM_Value') for attr in attributes } tagged = map_attributes(attributes, mapping, 'LA') if state_tests in tagged: tests = tagged.pop(state_tests) tagged[Fields.TOTAL.name] += tests # parse the probable death and vent and hospital data # This is going to be fragile hosp_title = "Reported COVID-19 Patients in Hospitals" recovered_title = "Presumed Recovered" curr_hosp = "" curr_vent = "" recovered = "" widgets = res[1].get('widgets', {}) for widget in widgets: if widget.get('defaultSettings', {}) \ .get('topSection', {}).get('textInfo', {}).get('text') == hosp_title: # Take the hosp value from the main number, and vent number from the small text curr_hosp = atoi(widget['datasets'][0]['data']) vent_subtext = widget['defaultSettings']['bottomSection'][ 'textInfo']['text'] curr_vent = atoi(vent_subtext.split()[0]) elif widget.get('defaultSettings', {}) \ .get('topSection', {}).get('textInfo', {}).get('text', '').find(recovered_title) >= 0: recovered = atoi(widget['datasets'][0]['data']) tagged[Fields.CURR_HOSP.name] = curr_hosp tagged[Fields.CURR_VENT.name] = curr_vent tagged[Fields.RECOVERED.name] = recovered return tagged
def handle_nh(res, mapping): # we love soup t = res[0].find('table') mapped = {} for tr in t.find_all('tr'): th = tr.find('th').get_text(strip=True) td = tr.find('td').get_text(strip=True) # numbers here are funny, need to clean a bit td = td.split()[0] if th in mapping: # yay, the faster option mapped[mapping[th]] = atoi(re.search("[0-9,]+", td).group(0)) continue # cases + tests for df in res[1:]: mapped.update(map_attributes(df.sum(), mapping, 'NH')) return mapped
def handle_ma(res, mapping): soup = res[0] link = soup.find('a', string=re.compile("COVID-19 Raw Data")) link_part = link['href'] url = "https://www.mass.gov{}".format(link_part) tagged = {} # download zip req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'}) with urllib.request.urlopen(req) as response, \ NamedTemporaryFile(delete=True) as tmpfile , TemporaryDirectory() as tmpdir: shutil.copyfileobj(response, tmpfile) tmpfile.flush() shutil.unpack_archive(tmpfile.name, tmpdir, format="zip") # Now we can read the files files = [ 'DeathsReported.csv', 'Testing2.csv', 'Hospitalization from Hospitals.csv', 'Cases.csv' ] for filename in files: with open("{}/{}".format(tmpdir, filename), 'r') as csvfile: reader = csv.DictReader(csvfile, dialect='unix') rows = list(reader) last_row = rows[-1] partial = map_attributes(last_row, mapping, 'MA') tagged.update(partial) hosp_key = "" for k, v in mapping.items(): if v == Fields.HOSP.name: hosp_key = k hospfile = csv.DictReader(open(tmpdir + "/RaceEthnicity.csv", 'r')) hosprows = list(hospfile) last_row = hosprows[-1] hosprows = [x for x in hosprows if x['Date'] == last_row['Date']] summed = csv_sum(hosprows, [hosp_key]) tagged[Fields.HOSP.name] = summed[hosp_key] return tagged
def handle_ma(res, mapping): '''Returning a list of dictionaries (records) ''' tagged = [] # files we care about: attemting from mappings files = [f for k, f in mapping.items() if k.endswith("_file")] death_mapping = copy(mapping) death_mapping.update({ "Probable Total": Fields.DEATH_PROBABLE.name, "Confirmed Total": Fields.DEATH_CONFIRMED.name, }) with MaContextManager(res) as zipdir: for filename in files: print("Filename", filename) with open("{}/{}".format(zipdir, filename), 'r') as csvfile: reader = csv.DictReader(csvfile, dialect='unix') rows = list(reader) m = mapping if filename == 'DateofDeath.csv': m = death_mapping tagged_rows = [map_attributes(r, m, 'MA') for r in rows] tagged.extend(tagged_rows) return tagged
def collect_attributes(attributes, mapping, field, value, debug_state=None): collected = { x['attributes'][field]: x['attributes'][value] for x in attributes } return map_attributes(collected, mapping, debug_state)
def handle_ri(res, mapping): dict_res = {r[0]: r[1] for r in res[0]} mapped = map_attributes(dict_res, mapping, 'RI') return mapped
def handle_id(res, mapping): # yay, tableau mapped = {} for df in res[0]: mapped.update(map_attributes(df.iloc[-1], mapping)) return mapped
def handle_wv(res, mapping): return map_attributes(res[0].sum(), mapping)