コード例 #1
0
    def get_statewide_hospitalizations_timeseries(self, soup, update_date):
        print('Parsing statewide hospitalizations timeseries...')

        hosp_table = table = soup.find("table", {'id': 'hosptable'})
        hosp_timeseries = timeseries_table_parser(hosp_table)

        if len(hosp_timeseries) > 0:
            today = datetime.date.today()
            # Remove old records from today
            existing_today_records = StatewideHospitalizationsDate.objects.filter(
                scrape_date=today)
            print('Removing {} records of hospitalizations timeseries data'.
                  format(existing_today_records.count()))
            existing_today_records.delete()
            hosp_objs = []
            for c in hosp_timeseries:
                if c['Date'] == 'Unknown/missing':
                    reported_date = None
                elif c['Date'] == 'Admitted on or before 3/5/20':
                    reported_date = self.parse_mdh_date('3/5/20', today)
                else:
                    reported_date = self.parse_mdh_date(c['Date'], today)

                # check for hyphen in table cell, set to null if hyphen exists
                if c['Cases admitted to a hospital'] in ['-', '-\xa0\xa0 ']:
                    new_hospitalizations = None
                else:
                    new_hospitalizations = parse_comma_int(
                        c['Cases admitted to a hospital'])

                if c['Cases admitted to an ICU'] in ['-', '-\xa0\xa0 ']:
                    new_icu_admissions = None
                else:
                    new_icu_admissions = parse_comma_int(
                        c['Cases admitted to an ICU'])

                total_hospitalizations = parse_comma_int(
                    c['Total hospitalizations (cumulative)'])
                total_icu_admissions = parse_comma_int(
                    c['Total ICU hospitalizations (cumulative)'])

                std = StatewideHospitalizationsDate(
                    reported_date=reported_date,
                    new_hosp_admissions=new_hospitalizations,
                    new_icu_admissions=new_icu_admissions,
                    total_hospitalizations=total_hospitalizations,
                    total_icu_admissions=total_icu_admissions,
                    update_date=update_date,
                    scrape_date=today,
                )
                hosp_objs.append(std)

            print(
                'Adding {} records of hospitalizations timeseries data'.format(
                    len(hosp_objs)))
            StatewideHospitalizationsDate.objects.bulk_create(hosp_objs)

        return total_hospitalizations
コード例 #2
0
    def get_age_data(self, soup):
        age_table = soup.find("table", {'id': 'agetable'})
        ages_data = timeseries_table_parser(age_table)

        cleaned_ages_data = []
        for d in ages_data:
            d['Number of Cases'] = parse_comma_int(d['Number of Cases'])
            d['Number of Deaths'] = parse_comma_int(d['Number of Deaths'])
            cleaned_ages_data.append(d)

        return cleaned_ages_data
コード例 #3
0
    def get_statewide_cases_timeseries(self, soup, update_date):
        '''How to deal with back-dated statewide totals if they use sample dates'''
        print('Parsing statewide cases timeseries...')

        cases_table = soup.find("table", {'id': 'casetable'})
        cases_timeseries = timeseries_table_parser(cases_table)
        # print(cases_timeseries)
        if len(cases_timeseries) > 0:
            today = datetime.date.today()
            # Remove old records from today
            existing_today_records = StatewideCasesBySampleDate.objects.filter(
                scrape_date=today)
            print('Removing {} records of case timeseries data'.format(
                existing_today_records.count()))
            existing_today_records.delete()
            case_objs = []
            for c in cases_timeseries:
                if c['Specimen collection date'] == 'Unknown/missing':
                    sample_date = None
                else:
                    sample_date = self.parse_mdh_date(
                        c['Specimen collection date'], today)
                new_pcr_tests = parse_comma_int(
                    c['Confirmed cases (PCR positive)'])
                new_antigen_tests = parse_comma_int(
                    c['Probable cases (Antigen positive)'])

                if new_antigen_tests:  # Handle old dates
                    new_cases = int(new_pcr_tests) + int(new_antigen_tests)
                else:
                    new_cases = new_pcr_tests
                # print(sample_date, new_pcr_tests, new_antigen_tests, new_cases)

                co = StatewideCasesBySampleDate(
                    sample_date=sample_date,
                    new_cases=new_cases,
                    total_cases=parse_comma_int(
                        c['Total positive cases (cumulative)']),
                    new_pcr_tests=new_pcr_tests,
                    new_antigen_tests=new_antigen_tests,
                    total_pcr_tests=parse_comma_int(
                        c['Total confirmed cases (cumulative)']),
                    total_antigen_tests=parse_comma_int(
                        c['Total probable cases (cumulative)']),
                    update_date=update_date,
                    scrape_date=today,
                )
                case_objs.append(co)
            print('Adding {} records of case timeseries data'.format(
                len(case_objs)))
            StatewideCasesBySampleDate.objects.bulk_create(case_objs)
コード例 #4
0
    def get_statewide_deaths_timeseries(self, soup, update_date):
        print('Parsing statewide deaths timeseries...')

        deaths_table = table = soup.find("table", {'id': 'deathtable'})
        deaths_timeseries = timeseries_table_parser(deaths_table)
        # print(deaths_timeseries)

        if len(deaths_timeseries) > 0:
            today = datetime.date.today()
            # Remove old records from today
            existing_today_records = StatewideDeathsDate.objects.filter(
                scrape_date=today)
            print('Removing {} records of deaths timeseries data'.format(
                existing_today_records.count()))
            existing_today_records.delete()
            death_objs = []
            for c in deaths_timeseries:
                if c['Date reported'] == 'Unknown/missing':
                    reported_date = None
                else:
                    reported_date = self.parse_mdh_date(
                        c['Date reported'], today)

                # check for hyphen in deaths table, set to null if hyphen exists
                if c['Newly reported deaths'] in ['', '-', '-\xa0\xa0 ']:
                    new_deaths = None
                else:
                    new_deaths = parse_comma_int(c['Newly reported deaths'])

                total_deaths = parse_comma_int(c['Total deaths (cumulative)'])

                std = StatewideDeathsDate(
                    reported_date=reported_date,
                    new_deaths=new_deaths,
                    total_deaths=total_deaths,
                    update_date=update_date,
                    scrape_date=today,
                )
                death_objs.append(std)
            print('Adding {} records of deaths timeseries data'.format(
                len(death_objs)))
            StatewideDeathsDate.objects.bulk_create(death_objs)

            msg_output = '*{}* total deaths (*{}* reported today)\n\n'.format(
                f'{total_deaths:,}', self.change_sign(new_deaths))
            print(msg_output)

            return msg_output
コード例 #5
0
    def get_county_data(self, soup):
        county_data = []
        county_table = soup.find("table", {'id': 'maptable'})
        county_list = timeseries_table_parser(county_table)

        for county in county_list:
            county_name = ' '.join(county['County'].split()).replace(
                ' County', '')
            if county_name != 'Unknown/missing':
                county_data.append({
                    'county':
                    county_name,
                    'cumulative_count':
                    parse_comma_int(county['Total cases']),
                    'cumulative_confirmed_cases':
                    parse_comma_int(county['Total confirmed cases']),
                    'cumulative_probable_cases':
                    parse_comma_int(county['Total probable cases']),
                    'cumulative_deaths':
                    parse_comma_int(county['Total deaths']),
                })

        return county_data
コード例 #6
0
    def get_statewide_data(self, soup):
        output = {}

        hosp_table = soup.find("table", {'id': 'hosptotal'})
        hosp_table_latest = self.totals_table_parser(hosp_table)
        output['cumulative_hospitalized'] = parse_comma_int(
            hosp_table_latest['Total cases hospitalized (cumulative)'])
        output['cumulative_icu'] = parse_comma_int(
            hosp_table_latest['Total cases hospitalized in ICU (cumulative)'])

        cumulative_cases_table = soup.find("table", {'id': 'casetotal'})
        cumulative_cases_latest = self.totals_table_parser(
            cumulative_cases_table)
        output['cumulative_positive_tests'] = parse_comma_int(
            cumulative_cases_latest['Total positive cases (cumulative)'])
        output['cumulative_confirmed_cases'] = parse_comma_int(
            cumulative_cases_latest[
                'Total confirmed cases (PCR positive) (cumulative)'])
        output['cumulative_probable_cases'] = parse_comma_int(
            cumulative_cases_latest[
                'Total probable cases (Antigen positive) (cumulative)'])

        new_cases_table = soup.find("table", {'id': 'dailycasetotal'})
        new_cases_latest = self.totals_table_parser(new_cases_table)
        output['cases_newly_reported'] = parse_comma_int(
            new_cases_latest['Newly reported cases'])
        output['confirmed_cases_newly_reported'] = parse_comma_int(
            new_cases_latest['Newly reported confirmed cases'])
        output['probable_cases_newly_reported'] = parse_comma_int(
            new_cases_latest['Newly reported probable cases'])

        cumulative_tests_table = soup.find("table", {'id': 'testtotal'})
        cumulative_tests_latest = self.totals_table_parser(
            cumulative_tests_table)
        output['total_statewide_tests'] = parse_comma_int(
            cumulative_tests_latest[
                'Total approximate completed tests (cumulative)'])
        output['cumulative_pcr_tests'] = parse_comma_int(
            cumulative_tests_latest[
                'Total approximate number of completed PCR tests (cumulative)']
        )
        output['cumulative_antigen_tests'] = parse_comma_int(
            cumulative_tests_latest[
                'Total approximate number of completed antigen tests (cumulative)']
        )

        deaths_table = soup.find("table", {'id': 'deathtotal'})
        deaths_table_latest = self.totals_table_parser(deaths_table)
        output['cumulative_statewide_deaths'] = parse_comma_int(
            deaths_table_latest['Total deaths (cumulative)'])
        output['cumulative_confirmed_statewide_deaths'] = parse_comma_int(
            deaths_table_latest['Deaths from confirmed cases (cumulative)'])
        output['cumulative_probable_statewide_deaths'] = parse_comma_int(
            deaths_table_latest['Deaths from probable cases (cumulative)'])

        recoveries_table = soup.find("table", {'id': 'noisototal'})
        recoveries_latest = self.totals_table_parser(recoveries_table)
        output['cumulative_statewide_recoveries'] = parse_comma_int(
            recoveries_latest[
                'Patients no longer needing isolation (cumulative)'])

        uls = soup.find_all('ul')
        for ul in uls:

            daily_cases_removed_match = self.ul_regex('Cases removed', ul.text)
            if daily_cases_removed_match is not False:
                output['removed_cases'] = daily_cases_removed_match

        return output
コード例 #7
0
    def get_statewide_tests_timeseries(self, soup, update_date):
        print('Parsing statewide tests timeseries...')

        tests_table = soup.find("table", {'id': 'labtable'})
        tests_timeseries = timeseries_table_parser(tests_table)

        if len(tests_timeseries) > 0:
            today = datetime.date.today()
            # Remove old records from today
            existing_today_records = StatewideTestsDate.objects.filter(
                scrape_date=today)
            print('Removing {} records of test timeseries data'.format(
                existing_today_records.count()))
            existing_today_records.delete()
            test_objs = []
            for c in tests_timeseries:
                if c['Date reported to MDH'] == 'Unknown/missing':
                    reported_date = None
                else:
                    reported_date = self.parse_mdh_date(
                        c['Date reported to MDH'], today)

                new_state_tests = parse_comma_int(c[
                    'Completed PCR tests reported from the MDH Public Health Lab']
                                                  )
                new_external_tests = parse_comma_int(c[
                    'Completed PCR tests reported from external laboratories'])

                total_tests = parse_comma_int(c[
                    'Total approximate number of completed tests (cumulative)']
                                              )

                new_antigen_tests = parse_comma_int(c[
                    'Completed antigen tests reported from external laboratories']
                                                    )
                total_pcr_tests = parse_comma_int(c[
                    'Total approximate number of completed PCR tests (cumulative)']
                                                  )
                total_antigen_tests = parse_comma_int(c[
                    'Total approximate number of completed antigen tests (cumulative)']
                                                      )

                new_pcr_tests = new_state_tests + new_external_tests
                if new_antigen_tests:
                    new_tests = new_pcr_tests + new_antigen_tests
                else:
                    new_tests = new_pcr_tests

                std = StatewideTestsDate(
                    reported_date=reported_date,
                    new_state_tests=new_state_tests,
                    new_external_tests=new_external_tests,
                    new_tests=new_tests,
                    total_tests=total_tests,
                    new_pcr_tests=new_pcr_tests,
                    new_antigen_tests=new_antigen_tests,
                    total_pcr_tests=total_pcr_tests,
                    total_antigen_tests=total_antigen_tests,
                    update_date=update_date,
                    scrape_date=today,
                )
                test_objs.append(std)
            print('Adding {} records of test timeseries data'.format(
                len(test_objs)))
            StatewideTestsDate.objects.bulk_create(test_objs)

        msg_output = '*{}* total tests completed (*{}* today)\n\n'.format(
            f'{total_tests:,}', self.change_sign(new_tests))
        print(msg_output)

        return msg_output