def get_statewide_hospitalizations_timeseries(self, soup, update_date): print('Parsing statewide hospitalizations timeseries...') hosp_table = table = soup.find("table", {'id': 'hosptable'}) hosp_timeseries = timeseries_table_parser(hosp_table) if len(hosp_timeseries) > 0: today = datetime.date.today() # Remove old records from today existing_today_records = StatewideHospitalizationsDate.objects.filter( scrape_date=today) print('Removing {} records of hospitalizations timeseries data'. format(existing_today_records.count())) existing_today_records.delete() hosp_objs = [] for c in hosp_timeseries: if c['Date'] == 'Unknown/missing': reported_date = None elif c['Date'] == 'Admitted on or before 3/5/20': reported_date = self.parse_mdh_date('3/5/20', today) else: reported_date = self.parse_mdh_date(c['Date'], today) # check for hyphen in table cell, set to null if hyphen exists if c['Cases admitted to a hospital'] in ['-', '-\xa0\xa0 ']: new_hospitalizations = None else: new_hospitalizations = parse_comma_int( c['Cases admitted to a hospital']) if c['Cases admitted to an ICU'] in ['-', '-\xa0\xa0 ']: new_icu_admissions = None else: new_icu_admissions = parse_comma_int( c['Cases admitted to an ICU']) total_hospitalizations = parse_comma_int( c['Total hospitalizations (cumulative)']) total_icu_admissions = parse_comma_int( c['Total ICU hospitalizations (cumulative)']) std = StatewideHospitalizationsDate( reported_date=reported_date, new_hosp_admissions=new_hospitalizations, new_icu_admissions=new_icu_admissions, total_hospitalizations=total_hospitalizations, total_icu_admissions=total_icu_admissions, update_date=update_date, scrape_date=today, ) hosp_objs.append(std) print( 'Adding {} records of hospitalizations timeseries data'.format( len(hosp_objs))) StatewideHospitalizationsDate.objects.bulk_create(hosp_objs) return total_hospitalizations
def get_age_data(self, soup): age_table = soup.find("table", {'id': 'agetable'}) ages_data = timeseries_table_parser(age_table) cleaned_ages_data = [] for d in ages_data: d['Number of Cases'] = parse_comma_int(d['Number of Cases']) d['Number of Deaths'] = parse_comma_int(d['Number of Deaths']) cleaned_ages_data.append(d) return cleaned_ages_data
def get_statewide_cases_timeseries(self, soup, update_date): '''How to deal with back-dated statewide totals if they use sample dates''' print('Parsing statewide cases timeseries...') cases_table = soup.find("table", {'id': 'casetable'}) cases_timeseries = timeseries_table_parser(cases_table) # print(cases_timeseries) if len(cases_timeseries) > 0: today = datetime.date.today() # Remove old records from today existing_today_records = StatewideCasesBySampleDate.objects.filter( scrape_date=today) print('Removing {} records of case timeseries data'.format( existing_today_records.count())) existing_today_records.delete() case_objs = [] for c in cases_timeseries: if c['Specimen collection date'] == 'Unknown/missing': sample_date = None else: sample_date = self.parse_mdh_date( c['Specimen collection date'], today) new_pcr_tests = parse_comma_int( c['Confirmed cases (PCR positive)']) new_antigen_tests = parse_comma_int( c['Probable cases (Antigen positive)']) if new_antigen_tests: # Handle old dates new_cases = int(new_pcr_tests) + int(new_antigen_tests) else: new_cases = new_pcr_tests # print(sample_date, new_pcr_tests, new_antigen_tests, new_cases) co = StatewideCasesBySampleDate( sample_date=sample_date, new_cases=new_cases, total_cases=parse_comma_int( c['Total positive cases (cumulative)']), new_pcr_tests=new_pcr_tests, new_antigen_tests=new_antigen_tests, total_pcr_tests=parse_comma_int( c['Total confirmed cases (cumulative)']), total_antigen_tests=parse_comma_int( c['Total probable cases (cumulative)']), update_date=update_date, scrape_date=today, ) case_objs.append(co) print('Adding {} records of case timeseries data'.format( len(case_objs))) StatewideCasesBySampleDate.objects.bulk_create(case_objs)
def get_statewide_deaths_timeseries(self, soup, update_date): print('Parsing statewide deaths timeseries...') deaths_table = table = soup.find("table", {'id': 'deathtable'}) deaths_timeseries = timeseries_table_parser(deaths_table) # print(deaths_timeseries) if len(deaths_timeseries) > 0: today = datetime.date.today() # Remove old records from today existing_today_records = StatewideDeathsDate.objects.filter( scrape_date=today) print('Removing {} records of deaths timeseries data'.format( existing_today_records.count())) existing_today_records.delete() death_objs = [] for c in deaths_timeseries: if c['Date reported'] == 'Unknown/missing': reported_date = None else: reported_date = self.parse_mdh_date( c['Date reported'], today) # check for hyphen in deaths table, set to null if hyphen exists if c['Newly reported deaths'] in ['', '-', '-\xa0\xa0 ']: new_deaths = None else: new_deaths = parse_comma_int(c['Newly reported deaths']) total_deaths = parse_comma_int(c['Total deaths (cumulative)']) std = StatewideDeathsDate( reported_date=reported_date, new_deaths=new_deaths, total_deaths=total_deaths, update_date=update_date, scrape_date=today, ) death_objs.append(std) print('Adding {} records of deaths timeseries data'.format( len(death_objs))) StatewideDeathsDate.objects.bulk_create(death_objs) msg_output = '*{}* total deaths (*{}* reported today)\n\n'.format( f'{total_deaths:,}', self.change_sign(new_deaths)) print(msg_output) return msg_output
def get_county_data(self, soup): county_data = [] county_table = soup.find("table", {'id': 'maptable'}) county_list = timeseries_table_parser(county_table) for county in county_list: county_name = ' '.join(county['County'].split()).replace( ' County', '') if county_name != 'Unknown/missing': county_data.append({ 'county': county_name, 'cumulative_count': parse_comma_int(county['Total cases']), 'cumulative_confirmed_cases': parse_comma_int(county['Total confirmed cases']), 'cumulative_probable_cases': parse_comma_int(county['Total probable cases']), 'cumulative_deaths': parse_comma_int(county['Total deaths']), }) return county_data
def get_statewide_data(self, soup): output = {} hosp_table = soup.find("table", {'id': 'hosptotal'}) hosp_table_latest = self.totals_table_parser(hosp_table) output['cumulative_hospitalized'] = parse_comma_int( hosp_table_latest['Total cases hospitalized (cumulative)']) output['cumulative_icu'] = parse_comma_int( hosp_table_latest['Total cases hospitalized in ICU (cumulative)']) cumulative_cases_table = soup.find("table", {'id': 'casetotal'}) cumulative_cases_latest = self.totals_table_parser( cumulative_cases_table) output['cumulative_positive_tests'] = parse_comma_int( cumulative_cases_latest['Total positive cases (cumulative)']) output['cumulative_confirmed_cases'] = parse_comma_int( cumulative_cases_latest[ 'Total confirmed cases (PCR positive) (cumulative)']) output['cumulative_probable_cases'] = parse_comma_int( cumulative_cases_latest[ 'Total probable cases (Antigen positive) (cumulative)']) new_cases_table = soup.find("table", {'id': 'dailycasetotal'}) new_cases_latest = self.totals_table_parser(new_cases_table) output['cases_newly_reported'] = parse_comma_int( new_cases_latest['Newly reported cases']) output['confirmed_cases_newly_reported'] = parse_comma_int( new_cases_latest['Newly reported confirmed cases']) output['probable_cases_newly_reported'] = parse_comma_int( new_cases_latest['Newly reported probable cases']) cumulative_tests_table = soup.find("table", {'id': 'testtotal'}) cumulative_tests_latest = self.totals_table_parser( cumulative_tests_table) output['total_statewide_tests'] = parse_comma_int( cumulative_tests_latest[ 'Total approximate completed tests (cumulative)']) output['cumulative_pcr_tests'] = parse_comma_int( cumulative_tests_latest[ 'Total approximate number of completed PCR tests (cumulative)'] ) output['cumulative_antigen_tests'] = parse_comma_int( cumulative_tests_latest[ 'Total approximate number of completed antigen tests (cumulative)'] ) deaths_table = soup.find("table", {'id': 'deathtotal'}) deaths_table_latest = self.totals_table_parser(deaths_table) output['cumulative_statewide_deaths'] = parse_comma_int( deaths_table_latest['Total deaths (cumulative)']) output['cumulative_confirmed_statewide_deaths'] = parse_comma_int( deaths_table_latest['Deaths from confirmed cases (cumulative)']) output['cumulative_probable_statewide_deaths'] = parse_comma_int( deaths_table_latest['Deaths from probable cases (cumulative)']) recoveries_table = soup.find("table", {'id': 'noisototal'}) recoveries_latest = self.totals_table_parser(recoveries_table) output['cumulative_statewide_recoveries'] = parse_comma_int( recoveries_latest[ 'Patients no longer needing isolation (cumulative)']) uls = soup.find_all('ul') for ul in uls: daily_cases_removed_match = self.ul_regex('Cases removed', ul.text) if daily_cases_removed_match is not False: output['removed_cases'] = daily_cases_removed_match return output
def get_statewide_tests_timeseries(self, soup, update_date): print('Parsing statewide tests timeseries...') tests_table = soup.find("table", {'id': 'labtable'}) tests_timeseries = timeseries_table_parser(tests_table) if len(tests_timeseries) > 0: today = datetime.date.today() # Remove old records from today existing_today_records = StatewideTestsDate.objects.filter( scrape_date=today) print('Removing {} records of test timeseries data'.format( existing_today_records.count())) existing_today_records.delete() test_objs = [] for c in tests_timeseries: if c['Date reported to MDH'] == 'Unknown/missing': reported_date = None else: reported_date = self.parse_mdh_date( c['Date reported to MDH'], today) new_state_tests = parse_comma_int(c[ 'Completed PCR tests reported from the MDH Public Health Lab'] ) new_external_tests = parse_comma_int(c[ 'Completed PCR tests reported from external laboratories']) total_tests = parse_comma_int(c[ 'Total approximate number of completed tests (cumulative)'] ) new_antigen_tests = parse_comma_int(c[ 'Completed antigen tests reported from external laboratories'] ) total_pcr_tests = parse_comma_int(c[ 'Total approximate number of completed PCR tests (cumulative)'] ) total_antigen_tests = parse_comma_int(c[ 'Total approximate number of completed antigen tests (cumulative)'] ) new_pcr_tests = new_state_tests + new_external_tests if new_antigen_tests: new_tests = new_pcr_tests + new_antigen_tests else: new_tests = new_pcr_tests std = StatewideTestsDate( reported_date=reported_date, new_state_tests=new_state_tests, new_external_tests=new_external_tests, new_tests=new_tests, total_tests=total_tests, new_pcr_tests=new_pcr_tests, new_antigen_tests=new_antigen_tests, total_pcr_tests=total_pcr_tests, total_antigen_tests=total_antigen_tests, update_date=update_date, scrape_date=today, ) test_objs.append(std) print('Adding {} records of test timeseries data'.format( len(test_objs))) StatewideTestsDate.objects.bulk_create(test_objs) msg_output = '*{}* total tests completed (*{}* today)\n\n'.format( f'{total_tests:,}', self.change_sign(new_tests)) print(msg_output) return msg_output