예제 #1
0
def handle_pa(res, mapping):
    '''Need to sum ECMO and Vent number for a total count
    '''
    tagged = {}
    for result in res[:-1]:
        partial = extract_arcgis_attributes(result, mapping, 'PA')
        tagged.update(partial)

    # soup time: recovered
    soup = res[-1]
    try:
        table = soup.find("table")
        total_cases = None
        recover_pct = None
        for td in table.find_all("td"):
            text = td.get_text(strip=True)
            text = text.strip(u'\u200b')
            if text.startswith('Total Cases'):
                total_cases = atoi(text[len('Total Cases*'):])
            elif text.startswith('Recovered'):
                recover_pct = atoi(text[len('Recovered***'):-1])
            elif text.startswith('Total PCR Tests'):
                specimens = atoi(text[len('Total PCR Tests'):])
                tagged[Fields.SPECIMENS.name] = specimens

        if total_cases and recover_pct:
            tagged[Fields.RECOVERED.name] = math.floor(total_cases *
                                                       recover_pct / 100)
    except Exception:
        logging.warning("PA: failed to parse recovered", exc_info=True)
    return tagged
예제 #2
0
def handle_ky(res, mapping):
    tagged = {}
    for result in res[:-1]:
        partial = extract_arcgis_attributes(result, mapping, 'KY')
        tagged.update(partial)

    # soup time
    soup = res[-1]
    datacards = soup.find_all('div', 'info-card')
    for item in datacards:
        title = item.find("span", "title")
        value = item.find("span", "number")
        if not value:
            continue

        probable = item.find_all("span", "probable")
        pattern = "([a-zA-Z ]*): ?([0-9,]*)"

        # class = title, number, probable
        title = title.get_text(strip=True)
        value = value.get_text(strip=True)

        probable = " ".join(
            [p.get_text(strip=True) if p else "" for p in probable])
        if probable and probable.strip():
            probable = re.findall(pattern, probable)

        if title.lower().find("total test") >= 0:
            for (k, v) in probable:
                if k.lower().find("pcr") >= 0:
                    tagged[Fields.SPECIMENS.name] = atoi(v)
                elif k.lower().find("serology") >= 0:
                    tagged[Fields.ANTIBODY_TOTAL.name] = atoi(v)
                elif k.lower().find('antigen') >= 0:
                    tagged[Fields.ANTIGEN_TOTAL.name] = atoi(v)
        elif title.lower().find("positive") >= 0:
            tagged[Fields.POSITIVE.name] = atoi(value)
            for (k, v) in probable:
                if k.lower().find("probable") >= 0:
                    tagged[Fields.PROBABLE.name] = atoi(v)
                elif k.lower().find("confirm") >= 0:
                    tagged[Fields.CONFIRMED.name] = atoi(v)
        elif title.lower().find("death") >= 0:
            tagged[Fields.DEATH.name] = atoi(value)
            for (k, v) in probable:
                if k.lower().find("probable") >= 0:
                    tagged[Fields.DEATH_PROBABLE.name] = atoi(v)
                elif k.lower().find("confirm") >= 0:
                    tagged[Fields.DEATH_CONFIRMED.name] = atoi(v)
        elif title.lower().find("recover") >= 0:
            tagged[Fields.RECOVERED.name] = atoi(value)

    updated = soup.find(
        'p', string=re.compile('Current as of')).get_text(strip=True)
    tagged[Fields.DATE.name] = updated

    return tagged
예제 #3
0
def handle_nd(res, mapping):
    soup = res[0]
    tagged = {}

    # Serology testing
    table = soup.find('table')
    rows = table.find_all('tr')
    titles = rows[0]
    data = rows[1].find_all('td')

    for i, title in enumerate(titles.find_all("td")):
        title = title.get_text(strip=True)
        if title in mapping:
            value = atoi(data[i].get_text(strip=True))
            tagged[mapping[title]] = value

    # confirmed+probable death
    h2_death = soup.find("h2", string=re.compile("Deaths"))
    death_table = h2_death.find_next("table")

    for tr in death_table.find_all("tr"):
        cols = tr.find_all("td")
        if len(cols) < 2:
            continue
        strong = cols[0].find("strong")
        if not strong or len(strong.get_text()) < 10:
            continue
        name = strong.get_text(strip=True)
        value = atoi(cols[1].get_text(strip=True))
        if len(cols) > 2:
            value += atoi(cols[2].get_text(strip=True))
        if name in mapping:
            tagged[mapping[name]] = atoi(value)

    # by county testing snapshot: for negatives
    county_testing = res[1]
    columns = [
        k for k, v in mapping.items() if v in [
            Fields.CONFIRMED.name, Fields.NEGATIVE.name,
            Fields.DEATH_CONFIRMED.name
        ]
    ]
    values = csv_sum(county_testing, columns=columns)
    tagged.update(map_attributes(values, mapping))

    # PCR encounters and other metrics
    pcr = res[2]
    partial = map_attributes(pcr.sum(), mapping)
    tagged.update(partial)

    # active hosp/icu should not be summed
    hosp = pcr.groupby('Date').sum().filter(like='Active').iloc[-1]
    tagged.update(map_attributes(hosp, mapping))

    return tagged
예제 #4
0
def handle_vi(res, mapping):
    # 0: covid page
    # 1: DoH page

    covid_page = res[0]
    container = covid_page.find(
        'div',
        'views-element-container block block-views block-views-blockcovid-19-epi-summary-block-1'
    )

    tagged = {}

    header = container.find('div', 'view-header')
    header_text = header.get_text(strip=True)
    if header_text.startswith('Last updated'):
        tagged[Fields.DATE.name] = header_text[len('Last updated') + 1:]

    divs = container.find_all('div', 'views-field')
    for x in divs:
        name = x.find('span').get_text(strip=True)
        if not x.find('div'):
            # this is the end
            break
        value = x.find('div').get_text(strip=True)

        if name == 'Recovered':
            # need to special case it
            value = value.split("/")[0]

        if name in mapping:
            tagged[mapping[name]] = atoi(value)

    return tagged
예제 #5
0
def handle_or(res, mapping):
    mapped = {}
    for result in res[:-1]:
        partial = extract_arcgis_attributes(result, mapping, 'OR')
        mapped.update(partial)

    # The last item is the page that needs to be scraped
    page = res[-1]
    # main stats
    main_table = page.find('table')
    for row in main_table.find_all('tr'):
        tds = row.find_all('td')
        if len(tds) < 2:
            continue
        name = tds[0].get_text(strip=True)
        if tds[1].find('sup') is not None:
            value = tds[1].find('b').find(text=True, recursive=False)
        else:
            value = tds[1].get_text(strip=True)
        if name in mapping:
            try:
                mapped[mapping[name]] = atoi(value)
            except Exception:
                logging.warning("OR: failed to parse {} for {}".format(
                    value, name),
                                exc_info=True)

    return mapped
예제 #6
0
def handle_me(res, mapping):
    tagged = {}
    # summary csv from tableau
    df = res[0]
    df = df[df['Patient County'] == 'All'].set_index('Measure Names')
    partial = map_attributes(df['Measure Values'], mapping, 'ME')
    tagged.update(partial)

    # hospital capacity
    df = res[1]
    partial = map_attributes(df.iloc[0], mapping, 'ME')
    tagged.update(partial)

    soup = res[-1]
    th = soup.find(
        "th", string=re.compile("Results from Labs Reporting Electronically"))
    table = th.find_parent('table')
    for tr in table.find_all('tr'):
        tds = tr.find_all('td')
        if len(tds) < 3:
            continue
        name = tds[0].get_text(strip=True)
        if name not in ['Positive', 'Negative', 'Total']:
            continue
        antibody_val = atoi(tds[1].get_text(strip=True))
        antigen_val = atoi(tds[2].get_text(strip=True))
        pcr_val = atoi(tds[3].get_text(strip=True))

        if name == 'Positive':
            tagged[Fields.ANTIBODY_POS.name] = antibody_val
            tagged[Fields.ANTIGEN_POS.name] = antigen_val
            tagged[Fields.SPECIMENS_POS.name] = pcr_val
        elif name == 'Negative':
            tagged[Fields.ANTIBODY_NEG.name] = antibody_val
            tagged[Fields.ANTIGEN_NEG.name] = antigen_val
            tagged[Fields.SPECIMENS_NEG.name] = pcr_val
        elif name == 'Total':
            tagged[Fields.ANTIBODY_TOTAL.name] = antibody_val
            tagged[Fields.ANTIGEN_TOTAL.name] = antigen_val
            tagged[Fields.SPECIMENS.name] = pcr_val

    return tagged
예제 #7
0
def handle_ms(res, mapping):
    soup = res[0]
    mapped = {}

    tables = soup.find_all('table')
    # expecting [<some number of tables>, cases/death table, testing]

    # skip tables until we get to the cases table
    for i, t in enumerate(tables):
        header = t.find('thead').get_text(strip=True)
        if header.lower() == 'confirmedprobabletotal':
            break
    tables = tables[i:]

    status = tables[0]
    trs = status.find('tbody').find_all('tr')
    cases = trs[0]
    cases_fields = [Fields.CONFIRMED, Fields.PROBABLE, Fields.POSITIVE]
    deaths = trs[1]
    deaths_fields = [
        Fields.DEATH_CONFIRMED, Fields.DEATH_PROBABLE, Fields.DEATH
    ]
    for tr, title, fields in [(cases, 'cases', cases_fields),
                              (deaths, 'deaths', deaths_fields)]:
        tds = tr.find_all('td')
        if tds[0].get_text(strip=True).lower() == title:
            # we're in the right place
            for i, field in enumerate(fields):
                mapped[field.name] = atoi(tds[i + 1].get_text(strip=True))

    testing = tables[1]
    for tr in testing.find_all('tr'):
        tds = tr.find_all('td')
        title = tds[0].get_text(strip=True).strip()
        if title in mapping:
            mapped[mapping[title]] = atoi(tds[1].get_text(strip=True))

    return mapped
예제 #8
0
def handle_oh(res, mapping):
    soup = res[0]
    container = soup.find('div', 'stats-cards__container')
    tagged = {}
    for div in container.find_all('div', 'stats-cards__item'):
        name = div.find('div', 'stats-cards__label')
        val = div.find('div', 'stats-cards__number')
        if name and val and name and name.get_text(strip=True) in mapping:
            val = atoi(val.get_text(strip=True))
            tagged[mapping[name.get_text(strip=True)]] = val

    # Get last updated date
    msg = container.find_next_sibling('div', 'stats-cards__update-msg')
    if msg:
        spans = msg.find_all('span')
        tagged[Fields.DATE.name] = spans[1].get_text(strip=True)
    return tagged
예제 #9
0
def handle_nj(res, mapping):
    '''Need to parse everything the same, and add past recoveries
    to the new query, because I do not know how to add a constant
    to the ArcGIS query
    '''
    mapped = {}
    for result in res[:-1]:
        partial = extract_arcgis_attributes(result, mapping, 'NJ')
        mapped.update(partial)

    # it's not a magic value, it's from an existing query, but
    # it's always the same
    mapped[Fields.RECOVERED.name] += 15642

    # Find the magic number added to probables
    widget = res[-1]['widgets'][17]
    val = atoi(widget.get('valueConversion', {}).get('offset', 0))
    mapped[Fields.PROBABLE.name] += val

    return mapped
예제 #10
0
def handle_nh(res, mapping):
    # we love soup
    t = res[0].find('table')

    mapped = {}
    for tr in t.find_all('tr'):
        th = tr.find('th').get_text(strip=True)
        td = tr.find('td').get_text(strip=True)
        # numbers here are funny, need to clean a bit
        td = td.split()[0]
        if th in mapping:
            # yay, the faster option
            mapped[mapping[th]] = atoi(re.search("[0-9,]+", td).group(0))
            continue

    # cases + tests
    for df in res[1:]:
        mapped.update(map_attributes(df.sum(), mapping, 'NH'))

    return mapped
예제 #11
0
def handle_mn(res, mapping):
    mapped = {}
    for result in res[:1]:
        partial = extract_arcgis_attributes(result, mapping, 'NJ')
        mapped.update(partial)

    # testing
    soup = res[-1]
    h2 = soup.find_all(['h2', 'h3'])
    for x in h2:
        title = x.get_text(strip=True).strip().strip(":")
        if title in ['Testing', 'Deaths', 'Hospitalizations', 'Daily Update']:
            tables = x.find_next_siblings('table', limit=2)
            for t in tables:
                for tr in t.find_all('tr'):
                    title = tr.find('th').get_text(strip=True).strip()
                    value = tr.find('td').get_text(strip=True).strip()
                    if title in mapping:
                        mapped[mapping[title]] = atoi(value)

    return mapped
예제 #12
0
def handle_al(res, mapping):
    tagged = {}
    for result in res[:-1]:
        partial = extract_arcgis_attributes(result, mapping, debug_state='AL')
        tagged.update(partial)

    widgets = res[-1].get('widgets', {})
    # 6 = hospitalizations
    # 29 = recoveries
    extras = [(widgets[6], Fields.HOSP.name),
              (widgets[29], Fields.RECOVERED.name)]

    for widget, field in extras:
        if widget.get('defaultSettings', {}) \
                    .get('description', "").find("STATEWIDE") >= 0:
            # now check that it's a numeric value
            val = widget['defaultSettings']['middleSection']['textInfo'][
                'text'].strip()
            if re.match("[1-9][0-9,]*", val) is not None:
                tagged[field] = atoi(val)

    return tagged
def handle_mn(res, mapping):
    mapped = []
    # Soup time. Yummy!
    page = res[0]

    table_ids = {'labtable': 'n/a', 'casetable': 'Specimen Collection'}

    for table_id, date_used in table_ids.items():
        table = page.find("table", id=table_id)
        if not table:
            continue

        # map table headers to fields
        headers = table.find_all('th')
        headers = [h.get_text(strip=True) for h in headers]
        headers = [mapping.get(h, '') for h in headers]

        for tr in table.find_all('tr'):
            tds = tr.find_all('td')
            if not tds:
                # expected for 1st line
                continue
            td_text = [td.get_text(strip=True) for td in tds]
            values = dict(zip(headers, td_text))
            if values[DATE].lower().find('unknown') >= 0:
                # ignore unknown date for now
                continue
            row = {}
            for k, v in values.items():
                if not k:
                    continue
                if k != DATE:
                    v = atoi(v) if v != '-' else None
                row[k] = v
            row[DATE_USED] = date_used
            mapped.append(row)

    return mapped
예제 #14
0
def handle_hi(res, mapping):
    tagged = {}
    for result in res[:-1]:
        partial = extract_arcgis_attributes(result, mapping, debug_state='HI')
        tagged.update(partial)

    probables = res[-1]
    h2 = probables.find('h3', id='probables')
    table = h2.find_next('table')
    probables_index = -1
    for i, th in enumerate(table.find('thead').find_all('th')):
        if th.get_text(strip=True).find("Total Probable Cases") >= 0:
            probables_index = i
            break

    probables_val = 0
    if probables_index >= 0:
        for tr in table.find('tbody').find_all('tr'):
            td = tr.find_all('td')[probables_index]
            probables_val += atoi(td.get_text(strip=True))

    tagged[Fields.PROBABLE.name] = probables_val

    return tagged
예제 #15
0
def handle_ut(res, mapping):
    tagged = {}
    soup_start = 1
    for result in res[:soup_start]:
        partial = extract_arcgis_attributes(result, mapping, 'UT')
        tagged.update(partial)

    stats = res[1]
    for k, v in mapping.items():
        x = stats.find(id=k)
        if x:
            value_item = x.find(class_='value')
            if not value_item:
                value_item = x.find(class_='value-output')
            if not value_item:
                continue
            value = atoi(value_item.get_text(strip=True))
            tagged[v] = value

    # inverse mapping
    revmap = {v: k for k, v in mapping.items()}
    hosp = res[2]
    tables = hosp.find_all('table')

    curr_hosp_table = tables[0]
    tds = curr_hosp_table.find_all('td',
                                   string=re.compile(
                                       revmap[Fields.CURR_HOSP.name]))
    curr_hosp = 0
    for td in tds:
        for x in td.next_siblings:
            if x.name == 'td':
                curr_hosp += atoi(x.get_text(strip=True))
    tagged[Fields.CURR_HOSP.name] = curr_hosp

    # TODO: code here can be improved, combined with top part
    td = curr_hosp_table.find('td',
                              string=re.compile(revmap[Fields.CURR_ICU.name]))
    for x in td.next_siblings:
        if x.name == 'td':
            val = atoi(x.get_text(strip=True))
            tagged[Fields.CURR_ICU.name] = val

    for t in tables[1:]:
        if t.caption.get_text(strip=True) in mapping:
            td = t.find_all('td', limit=2)[1]
            tagged[mapping[t.caption.get_text(strip=True)]] = atoi(
                td.get_text(strip=True))

    # Downloadable file
    zipurl = res[-1]
    # Sometimes there are files for multiple dates, we need the most recent
    specimens_file_prefix = 'Overview_Total Tests by'
    specimens_file_latest = specimens_file_prefix
    recovered_file = 'Overview_Cumulative COVID-19 Cases'
    recovered_file_latest = recovered_file
    people_tested_file = 'Overview_Number of People Tested by'
    people_tested_latest = people_tested_file
    test_type = ['PCR/amplification', 'Antigen by DFA/IF']
    result = ['POSITIVE', 'NEGATIVE']
    with zipContextManager(zipurl) as zipdir:
        with os.scandir(zipdir) as it:
            for entry in it:
                df = None
                fields = []
                if not entry.is_file:
                    # just in case
                    continue
                if entry.name.startswith(specimens_file_prefix):
                    if entry.name < specimens_file_latest:
                        continue
                    # specimens
                    fields = [
                        Fields.SPECIMENS_POS, Fields.SPECIMENS_NEG,
                        Fields.ANTIGEN_POS, Fields.ANTIGEN_NEG
                    ]
                    specimens_file_latest = entry.name
                elif entry.name.startswith(people_tested_file):
                    if entry.name < people_tested_latest:
                        continue
                    # people tested
                    fields = [
                        Fields.CONFIRMED, Fields.NEGATIVE,
                        Fields.ANTIGEN_POS_PEOPLE, Fields.ANTIGEN_NEG_PEOPLE,
                        Fields.TOTAL, Fields.ANTIGEN_TOTAL_PEOPLE
                    ]
                    people_tested_latest = entry.name
                elif entry.name.startswith(recovered_file):
                    if entry.name < recovered_file_latest:
                        continue
                    # recoveries
                    fields = [Fields.RECOVERED]
                    recovered_file_latest = entry.name
                if fields and entry.name.startswith(recovered_file):
                    df = pd.read_csv(os.path.join(zipdir, entry.name))
                    last = df['Estimated Recovered *'].iloc[-1]
                    if Fields.RECOVERED in fields:
                        tagged[Fields.RECOVERED.name] = last
                elif fields and not entry.name.startswith(recovered_file):
                    df = pd.read_csv(os.path.join(zipdir, entry.name))
                    summed = df.groupby(['Test Type', 'Result']).sum()
                    i = 0
                    for tt in test_type:
                        for rr in result:
                            tag = fields[i]
                            tag = tag if isinstance(tag, str) else tag.name
                            value = summed.loc[tt, rr]['Count']
                            tagged[tag] = value
                            i += 1
                    # handle totals
                    if Fields.CONFIRMED in fields:
                        tagged[Fields.TOTAL.name] = sum([
                            summed.loc[test_type[0], rr]['Count']
                            for rr in result
                        ])
    return tagged
예제 #16
0
def handle_mi(res, mapping):
    tagged = {}
    for result in res[:2]:
        partial = extract_arcgis_attributes(result, mapping, 'MI')
        tagged.update(partial)

    # Recoveries soup
    recovered_page = res[-3]
    recover_p = recovered_page.find('div', 'fullContent')
    span = recover_p.find('span').get_text(strip=True)
    tagged[Fields.RECOVERED.name] = atoi(span)

    # Hospitalization soup
    hospitalization_page = res[-2]
    tables = hospitalization_page.find_all('table')
    vent = 0
    icu = 0
    hosp = 0
    for t in tables:
        caption = t.find('caption').get_text(strip=True)
        if caption.startswith('COVID-19 Metrics'):
            for row in t.find_all('tr'):
                th = row.find('th')
                if th and th.get_text(
                        strip=True).startswith('Total Hospitalized Adult'):
                    # take last td
                    td = row.find_all('td')[-1]
                    hosp += atoi(td.get_text(strip=True))
                elif th and th.get_text(
                        strip=True).startswith('Hospitalized Peds'):
                    td = row.find_all('td')[-1]
                    hosp += atoi(td.get_text(strip=True))
                elif th and th.get_text(strip=True).startswith(
                        'Adult ICU Confirmed/Suspected'):
                    td = row.find_all('td')[-1]
                    icu += atoi(td.get_text(strip=True))
                elif th and th.get_text(
                        strip=True).startswith('Hospitalized and Ventilated'):
                    td = row.find_all('td')[-1]
                    vent += atoi(td.get_text(strip=True))

    tagged[Fields.CURR_VENT.name] = atoi(vent)
    tagged[Fields.CURR_HOSP.name] = atoi(hosp)
    tagged[Fields.CURR_ICU.name] = atoi(icu)

    # TODO: Can use the reverse mapping
    soup = res[-1]
    h = soup.find("h5", string=re.compile('[dD][aA][tT][aA]'))
    parent = h.find_parent("ul")
    links = parent.find_all("a")

    base_url = 'https://www.michigan.gov'
    cases_url = base_url + links[0]['href']
    tests_url = base_url + links[3]['href']
    results_url = base_url + links[4]['href']

    try:
        df = pd.read_excel(cases_url, engine='xlrd')
        filter_col = 'CASE_STATUS'
        summed = df.groupby(filter_col).sum()
        for m in ['Cases', 'Deaths']:
            for t in ['Confirmed', 'Probable']:
                tagged[mapping[m + t]] = summed[m][t]
    except Exception as e:
        logging.warning("Exception getting cases by status", e)

    try:
        df = pd.read_excel(tests_url, engine='xlrd')
        filter_col = 'TestType'
        summed = df.groupby(filter_col).sum()
        for m in ['Diagnostic', 'Serology']:
            tagged[mapping[m]] = summed['Count'][m]
    except Exception:
        logging.warning("[MI] failed to fetch test results")

    try:
        df = pd.read_excel(results_url, engine='xlrd')
        fields = ['Negative', 'Positive']
        summed = df[fields].sum()
        for x in fields:
            tagged[mapping[x]] = summed[x]
    except Exception:
        logging.warning("[MI] Failed to fetch test results")

    return tagged