コード例 #1
0
    def get_statewide_hospitalizations_timeseries(self, soup, update_date):
        print('Parsing statewide hospitalizations timeseries...')

        hosp_table = table = soup.find("table", {'id': 'hosptable'})
        hosp_timeseries = timeseries_table_parser(hosp_table)

        if len(hosp_timeseries) > 0:
            today = datetime.date.today()
            # Remove old records from today
            existing_today_records = StatewideHospitalizationsDate.objects.filter(
                scrape_date=today)
            print('Removing {} records of hospitalizations timeseries data'.
                  format(existing_today_records.count()))
            existing_today_records.delete()
            hosp_objs = []
            for c in hosp_timeseries:
                if c['Date'] == 'Unknown/missing':
                    reported_date = None
                elif c['Date'] == 'Admitted on or before 3/5/20':
                    reported_date = self.parse_mdh_date('3/5/20', today)
                else:
                    reported_date = self.parse_mdh_date(c['Date'], today)

                # check for hyphen in table cell, set to null if hyphen exists
                if c['Cases admitted to a hospital'] in ['-', '-\xa0\xa0 ']:
                    new_hospitalizations = None
                else:
                    new_hospitalizations = parse_comma_int(
                        c['Cases admitted to a hospital'])

                if c['Cases admitted to an ICU'] in ['-', '-\xa0\xa0 ']:
                    new_icu_admissions = None
                else:
                    new_icu_admissions = parse_comma_int(
                        c['Cases admitted to an ICU'])

                total_hospitalizations = parse_comma_int(
                    c['Total hospitalizations (cumulative)'])
                total_icu_admissions = parse_comma_int(
                    c['Total ICU hospitalizations (cumulative)'])

                std = StatewideHospitalizationsDate(
                    reported_date=reported_date,
                    new_hosp_admissions=new_hospitalizations,
                    new_icu_admissions=new_icu_admissions,
                    total_hospitalizations=total_hospitalizations,
                    total_icu_admissions=total_icu_admissions,
                    update_date=update_date,
                    scrape_date=today,
                )
                hosp_objs.append(std)

            print(
                'Adding {} records of hospitalizations timeseries data'.format(
                    len(hosp_objs)))
            StatewideHospitalizationsDate.objects.bulk_create(hosp_objs)

        return total_hospitalizations
コード例 #2
0
    def get_age_data(self, soup):
        age_table = soup.find("table", {'id': 'agetable'})
        ages_data = timeseries_table_parser(age_table)

        cleaned_ages_data = []
        for d in ages_data:
            d['Number of Cases'] = parse_comma_int(d['Number of Cases'])
            d['Number of Deaths'] = parse_comma_int(d['Number of Deaths'])
            cleaned_ages_data.append(d)

        return cleaned_ages_data
コード例 #3
0
    def get_statewide_cases_timeseries(self, soup, update_date):
        '''How to deal with back-dated statewide totals if they use sample dates'''
        print('Parsing statewide cases timeseries...')

        cases_table = soup.find("table", {'id': 'casetable'})
        cases_timeseries = timeseries_table_parser(cases_table)
        # print(cases_timeseries)
        if len(cases_timeseries) > 0:
            today = datetime.date.today()
            # Remove old records from today
            existing_today_records = StatewideCasesBySampleDate.objects.filter(
                scrape_date=today)
            print('Removing {} records of case timeseries data'.format(
                existing_today_records.count()))
            existing_today_records.delete()
            case_objs = []
            for c in cases_timeseries:
                if c['Specimen collection date'] == 'Unknown/missing':
                    sample_date = None
                else:
                    sample_date = self.parse_mdh_date(
                        c['Specimen collection date'], today)
                new_pcr_tests = parse_comma_int(
                    c['Confirmed cases (PCR positive)'])
                new_antigen_tests = parse_comma_int(
                    c['Probable cases (Antigen positive)'])

                if new_antigen_tests:  # Handle old dates
                    new_cases = int(new_pcr_tests) + int(new_antigen_tests)
                else:
                    new_cases = new_pcr_tests
                # print(sample_date, new_pcr_tests, new_antigen_tests, new_cases)

                co = StatewideCasesBySampleDate(
                    sample_date=sample_date,
                    new_cases=new_cases,
                    total_cases=parse_comma_int(
                        c['Total positive cases (cumulative)']),
                    new_pcr_tests=new_pcr_tests,
                    new_antigen_tests=new_antigen_tests,
                    total_pcr_tests=parse_comma_int(
                        c['Total confirmed cases (cumulative)']),
                    total_antigen_tests=parse_comma_int(
                        c['Total probable cases (cumulative)']),
                    update_date=update_date,
                    scrape_date=today,
                )
                case_objs.append(co)
            print('Adding {} records of case timeseries data'.format(
                len(case_objs)))
            StatewideCasesBySampleDate.objects.bulk_create(case_objs)
コード例 #4
0
    def get_statewide_deaths_timeseries(self, soup, update_date):
        print('Parsing statewide deaths timeseries...')

        deaths_table = table = soup.find("table", {'id': 'deathtable'})
        deaths_timeseries = timeseries_table_parser(deaths_table)
        # print(deaths_timeseries)

        if len(deaths_timeseries) > 0:
            today = datetime.date.today()
            # Remove old records from today
            existing_today_records = StatewideDeathsDate.objects.filter(
                scrape_date=today)
            print('Removing {} records of deaths timeseries data'.format(
                existing_today_records.count()))
            existing_today_records.delete()
            death_objs = []
            for c in deaths_timeseries:
                if c['Date reported'] == 'Unknown/missing':
                    reported_date = None
                else:
                    reported_date = self.parse_mdh_date(
                        c['Date reported'], today)

                # check for hyphen in deaths table, set to null if hyphen exists
                if c['Newly reported deaths'] in ['', '-', '-\xa0\xa0 ']:
                    new_deaths = None
                else:
                    new_deaths = parse_comma_int(c['Newly reported deaths'])

                total_deaths = parse_comma_int(c['Total deaths (cumulative)'])

                std = StatewideDeathsDate(
                    reported_date=reported_date,
                    new_deaths=new_deaths,
                    total_deaths=total_deaths,
                    update_date=update_date,
                    scrape_date=today,
                )
                death_objs.append(std)
            print('Adding {} records of deaths timeseries data'.format(
                len(death_objs)))
            StatewideDeathsDate.objects.bulk_create(death_objs)

            msg_output = '*{}* total deaths (*{}* reported today)\n\n'.format(
                f'{total_deaths:,}', self.change_sign(new_deaths))
            print(msg_output)

            return msg_output
コード例 #5
0
    def get_recent_deaths_data(self, soup, update_date):
        # today = datetime.date.today()
        recent_deaths_table = soup.find("table", {'id': 'dailydeathar'})
        if recent_deaths_table:
            recent_deaths_ages = timeseries_table_parser(recent_deaths_table)

            cleaned_data = []

            for group in recent_deaths_ages:
                # County workaround
                if group['County of residence'] == 'Otter':
                    d_county_name = 'Otter Tail'
                elif group['County of residence'] == 'Unknown/missing':
                    d_county_name = None
                else:
                    d_county_name = re.sub('\s+', ' ',
                                           group['County of residence'])

                print(d_county_name)
                clean_row = {
                    'scrape_date': update_date,
                    'county__name': d_county_name,
                    'age_group': group['Age group'].replace(' years',
                                                            '').strip(),
                    'count': int(group['Number of newly reported deaths']),
                }
                cleaned_data.append(clean_row)
            return cleaned_data
        else:
            # Find out if you have an error or if there really were zero deaths
            deaths_total_table = soup.find("table", {'id': 'dailydeathtotal'})
            last_td = deaths_total_table.find_all("td")[-1]
            num_deaths = int(last_td.text)
            if num_deaths == 0:
                print("Actual 0 death day, no error detected.")
                return 0
            else:
                print(
                    "WARNING: Couln't find recent deaths table but can't confirm 0."
                )
                return None
        return None
コード例 #6
0
    def get_county_data(self, soup):
        county_data = []
        county_table = soup.find("table", {'id': 'maptable'})
        county_list = timeseries_table_parser(county_table)

        for county in county_list:
            county_name = ' '.join(county['County'].split()).replace(
                ' County', '')
            if county_name != 'Unknown/missing':
                county_data.append({
                    'county':
                    county_name,
                    'cumulative_count':
                    parse_comma_int(county['Total cases']),
                    'cumulative_confirmed_cases':
                    parse_comma_int(county['Total confirmed cases']),
                    'cumulative_probable_cases':
                    parse_comma_int(county['Total probable cases']),
                    'cumulative_deaths':
                    parse_comma_int(county['Total deaths']),
                })

        return county_data
コード例 #7
0
    def get_statewide_tests_timeseries(self, soup, update_date):
        print('Parsing statewide tests timeseries...')

        tests_table = soup.find("table", {'id': 'labtable'})
        tests_timeseries = timeseries_table_parser(tests_table)

        if len(tests_timeseries) > 0:
            today = datetime.date.today()
            # Remove old records from today
            existing_today_records = StatewideTestsDate.objects.filter(
                scrape_date=today)
            print('Removing {} records of test timeseries data'.format(
                existing_today_records.count()))
            existing_today_records.delete()
            test_objs = []
            for c in tests_timeseries:
                if c['Date reported to MDH'] == 'Unknown/missing':
                    reported_date = None
                else:
                    reported_date = self.parse_mdh_date(
                        c['Date reported to MDH'], today)

                new_state_tests = parse_comma_int(c[
                    'Completed PCR tests reported from the MDH Public Health Lab']
                                                  )
                new_external_tests = parse_comma_int(c[
                    'Completed PCR tests reported from external laboratories'])

                total_tests = parse_comma_int(c[
                    'Total approximate number of completed tests (cumulative)']
                                              )

                new_antigen_tests = parse_comma_int(c[
                    'Completed antigen tests reported from external laboratories']
                                                    )
                total_pcr_tests = parse_comma_int(c[
                    'Total approximate number of completed PCR tests (cumulative)']
                                                  )
                total_antigen_tests = parse_comma_int(c[
                    'Total approximate number of completed antigen tests (cumulative)']
                                                      )

                new_pcr_tests = new_state_tests + new_external_tests
                if new_antigen_tests:
                    new_tests = new_pcr_tests + new_antigen_tests
                else:
                    new_tests = new_pcr_tests

                std = StatewideTestsDate(
                    reported_date=reported_date,
                    new_state_tests=new_state_tests,
                    new_external_tests=new_external_tests,
                    new_tests=new_tests,
                    total_tests=total_tests,
                    new_pcr_tests=new_pcr_tests,
                    new_antigen_tests=new_antigen_tests,
                    total_pcr_tests=total_pcr_tests,
                    total_antigen_tests=total_antigen_tests,
                    update_date=update_date,
                    scrape_date=today,
                )
                test_objs.append(std)
            print('Adding {} records of test timeseries data'.format(
                len(test_objs)))
            StatewideTestsDate.objects.bulk_create(test_objs)

        msg_output = '*{}* total tests completed (*{}* today)\n\n'.format(
            f'{total_tests:,}', self.change_sign(new_tests))
        print(msg_output)

        return msg_output