def handle_ma(res, mapping): tagged = {} files = ['DeathsReported.csv', 'Testing2.csv', 'Hospitalization from Hospitals.csv', 'Cases.csv'] with MaContextManager(res) as zipdir: for filename in files: with open(os.path.join(zipdir, filename), 'r') as csvfile: reader = csv.DictReader(csvfile, dialect='unix') rows = list(reader) last_row = rows[-1] partial = map_attributes(last_row, mapping, 'MA') tagged.update(partial) hosp_key = "" for k, v in mapping.items(): if v == Fields.HOSP.name: hosp_key = k hospfile = csv.DictReader(open(os.path.join(zipdir, "RaceEthnicity.csv"), 'r')) hosprows = list(hospfile) last_row = hosprows[-1] hosprows = [x for x in hosprows if x['Date'] == last_row['Date']] summed = csv_sum(hosprows, [hosp_key]) tagged[Fields.HOSP.name] = summed[hosp_key] return tagged
def handle_va(res, mapping): '''Getting multiple CVS files from the state and parsing each for the specific data it contains ''' tagged = {} # Res: # 0 -- cases & death, probable & confirmed # 1 -- testing info # 2 -- hospital/icu/vent cases = res[0] testing = res[1] hospital = res[2] date_format = "%m/%d/%Y" # Cases # expecting 2 rows in the following format # Report Date,Case Status,Number of Cases,Number of Hospitalizations,Number of Deaths # 5/14/2020,Probable,1344,24,28 # 5/14/2020,Confirmed,26469,3568,927 PROB = 'Probable' CONF = 'Confirmed' for row in cases: if (row['Case Status'] == CONF): for k, v in row.items(): if (k in mapping): tagged[mapping[k]] = atoi(v) elif (row['Case Status'] == PROB): tagged[Fields.PROBABLE.name] = atoi(row['Number of Cases']) tagged[Fields.DEATH_PROBABLE.name] = atoi(row['Number of Deaths']) tagged[Fields.POSITIVE. name] = tagged[Fields.CONFIRMED.name] + tagged[Fields.PROBABLE.name] # sum everything testing_cols = [ 'Number of PCR Testing Encounters', 'Number of Positive PCR Tests', 'Total Number of Testing Encounters', 'Total Number of Positive Tests' ] summed_testing = csv_sum(testing, testing_cols) tagged[Fields.SPECIMENS.name] = summed_testing[testing_cols[0]] tagged[Fields.SPECIMENS_POS.name] = summed_testing[testing_cols[1]] tagged[Fields.ANTIBODY_TOTAL.name] = summed_testing[ testing_cols[2]] - summed_testing[testing_cols[0]] tagged[Fields.ANTIBODY_POS.name] = summed_testing[ testing_cols[3]] - summed_testing[testing_cols[1]] # Hospitalizations hospital = sorted(hospital, key=lambda x: datetime.strptime(x['Date'], date_format), reverse=True) mapped_hosp = map_attributes(hospital[0], mapping, 'VA') tagged.update(mapped_hosp) return tagged
def handle_nd(res, mapping): soup = res[0] tagged = {} # Serology testing table = soup.find('table') rows = table.find_all('tr') titles = rows[0] data = rows[1].find_all('td') for i, title in enumerate(titles.find_all("td")): title = title.get_text(strip=True) if title in mapping: value = atoi(data[i].get_text(strip=True)) tagged[mapping[title]] = value # confirmed+probable death h2_death = soup.find("h2", string=re.compile("Deaths")) death_table = h2_death.find_next("table") for tr in death_table.find_all("tr"): cols = tr.find_all("td") if len(cols) < 2: continue strong = cols[0].find("strong") if not strong or len(strong.get_text()) < 10: continue name = strong.get_text(strip=True) value = atoi(cols[1].get_text(strip=True)) if len(cols) > 2: value += atoi(cols[2].get_text(strip=True)) if name in mapping: tagged[mapping[name]] = atoi(value) # by county testing snapshot: for negatives county_testing = res[1] columns = [ k for k, v in mapping.items() if v in [ Fields.CONFIRMED.name, Fields.NEGATIVE.name, Fields.DEATH_CONFIRMED.name ] ] values = csv_sum(county_testing, columns=columns) tagged.update(map_attributes(values, mapping)) # PCR encounters and other metrics pcr = res[2] partial = map_attributes(pcr.sum(), mapping) tagged.update(partial) # active hosp/icu should not be summed hosp = pcr.groupby('Date').sum().filter(like='Active').iloc[-1] tagged.update(map_attributes(hosp, mapping)) return tagged
def handle_ok(res, mapping): # need to sum all values res = res[0] # sum all fields # TODO: functools probably has something nice cols = ['Cases', 'Deaths', 'Recovered'] summed = csv_sum(res, cols) mapped = map_attributes(summed, mapping, 'OK') mapped[Fields.DATE.name] = res[0].get('ReportDate') return mapped
def handle_ok(res, mapping): mapped = {} for result in res[:-1]: partial = map_attributes(result, mapping, 'OK') mapped.update(partial) # need to sum all values res = res[1] # sum all fields # TODO: functools probably has something nice cols = ['Cases', 'Deaths', 'Recovered'] summed = csv_sum(res, cols) mapped.update(map_attributes(summed, mapping, 'OK')) mapped[Fields.DATE.name] = res[0].get('ReportDate') return mapped
def handle_ma(res, mapping): soup = res[0] link = soup.find('a', string=re.compile("COVID-19 Raw Data")) link_part = link['href'] url = "https://www.mass.gov{}".format(link_part) tagged = {} # download zip req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'}) with urllib.request.urlopen(req) as response, \ NamedTemporaryFile(delete=True) as tmpfile , TemporaryDirectory() as tmpdir: shutil.copyfileobj(response, tmpfile) tmpfile.flush() shutil.unpack_archive(tmpfile.name, tmpdir, format="zip") # Now we can read the files files = [ 'DeathsReported.csv', 'Testing2.csv', 'Hospitalization from Hospitals.csv', 'Cases.csv' ] for filename in files: with open("{}/{}".format(tmpdir, filename), 'r') as csvfile: reader = csv.DictReader(csvfile, dialect='unix') rows = list(reader) last_row = rows[-1] partial = map_attributes(last_row, mapping, 'MA') tagged.update(partial) hosp_key = "" for k, v in mapping.items(): if v == Fields.HOSP.name: hosp_key = k hospfile = csv.DictReader(open(tmpdir + "/RaceEthnicity.csv", 'r')) hosprows = list(hospfile) last_row = hosprows[-1] hosprows = [x for x in hosprows if x['Date'] == last_row['Date']] summed = csv_sum(hosprows, [hosp_key]) tagged[Fields.HOSP.name] = summed[hosp_key] return tagged