コード例 #1
0
def get_digits(name, canvas, driver):
    cases = getCanvas(driver.find_element_by_xpath(canvas), driver).replace(",", "").replace(" ", "")
    reDigit = re.compile(r"(\d+)")
    match = reDigit.match(cases.strip().lower())
    if match:
        return match.group(1)
    else:
        print(f"Warning: no {name} extracted; got string", cases)
        return None
コード例 #2
0
def run_LA(args):

    # Parameters
    raw_name = '../LA/raw'
    data_name = '../LA/data/data.csv'
    parish_race_name = '../LA/data/parish_race_data.csv'
    now = str(datetime.now())

    fulldat = {}
    raw = requests.get(
        "https://services5.arcgis.com/O5K6bb5dZVZcTo5M/arcgis/rest/services/Combined_COVID_Reporting/FeatureServer/0/query?f=json&where=Measure%3D%27Age%27&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&groupByFieldsForStatistics=Group_Num%2CValueType&outStatistics=%5B%7B%22statisticType%22%3A%22sum%22%2C%22onStatisticField%22%3A%22Value%22%2C%22outStatisticFieldName%22%3A%22value%22%7D%5D&outSR=102100&resultType=standard&cacheHint=true"
    ).json()
    with open("%s/age_%s.json" % (raw_name, now), "w") as fp:
        json.dump(raw, fp)
    groups_death = [
        "%s_0_17", "%s_18_29", "%s_30_39", "%s_40_49", "%s_50_59", "%s_60_69",
        "%s_70_plus"
    ]
    groups_case = [
        "%s_0_4", "%s_5_17", "%s_18_29", "%s_30_39", "%s_40_49", "%s_50_59",
        "%s_60_69", "%s_70_plus"
    ]
    if len(raw["features"]) != 15:
        raise Exception("Unexpected number of ages in LA: " +
                        str(len(raw["features"])))

    raw_cases = []
    raw_deaths = []
    for entry in raw["features"]:
        if entry["attributes"]["ValueType"] == "case":
            raw_cases.append(entry["attributes"])
        else:
            raw_deaths.append(entry["attributes"])
    if len(raw_cases) != 8:
        raise Exception("Unexpected number of entries for age cases: " +
                        str(len(raw_cases)))
    if len(raw_deaths) != 7:
        raise Exception("Unexpected number of entries for age deaths: " +
                        str(len(raw_deaths)))

    for apos in range(8):
        fulldat[groups_case[apos] % "Case"] = raw_cases[apos]["value"]
    for apos in range(7):
        fulldat[groups_death[apos] % "Deaths"] = raw_deaths[apos]["value"]

    # for apos in range(8):
    #     for atype, aname in [("case", "Cases"), ("death", "Deaths")]:
    #         print(len(raw["features"]))
    #         exit()
    #         dat = [x["attributes"] for x in raw["features"] if x["attributes"]["Group_Num"] == apos+1 and x["attributes"]["ValueType"] == atype]
    #         if len(dat) != 1:
    #             print(dat)
    #             raise Exception("Missing some age data")
    #         if atype == "case":
    #             fulldat[groups_case[apos] % aname] = dat[0]["value"]
    #         else:
    #             fulldat[groups_death[apos] % aname] = dat[0]["value"]

    raw = requests.get(
        "https://services5.arcgis.com/O5K6bb5dZVZcTo5M/arcgis/rest/services/Combined_COVID_Reporting/FeatureServer/0/query?f=json&where=Measure%3D%27Gender%27%20AND%20ValueType%3D%27case%27&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&groupByFieldsForStatistics=Group_Num&outStatistics=%5B%7B%22statisticType%22%3A%22sum%22%2C%22onStatisticField%22%3A%22Value%22%2C%22outStatisticFieldName%22%3A%22value%22%7D%5D&outSR=102100&resultType=standard&cacheHint=true"
    ).json()
    with open("%s/gender_%s.json" % (raw_name, now), "w") as fp:
        json.dump(raw, fp)
    if len(raw["features"]) != 3:
        raise Exception("Unexpected number of genders in LA")
    groups = [(1, "Case_Pct_Male"), (2, "Case_Pct_Female"),
              (3, "Case_Pct_Other")]
    for gnum, name in groups:
        dat = [
            x["attributes"] for x in raw["features"]
            if x["attributes"]["Group_Num"] == gnum
        ]
        if len(dat) != 1:
            raise Exception("Missing some gender data")
        fulldat[name] = dat[0]["value"]

    raw = requests.get(
        "https://services5.arcgis.com/O5K6bb5dZVZcTo5M/arcgis/rest/services/Combined_COVID_Reporting/FeatureServer/0/query?f=json&where=Measure%3D%27State%20Tests%27&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&outStatistics=%5B%7B%22statisticType%22%3A%22sum%22%2C%22onStatisticField%22%3A%22Value%22%2C%22outStatisticFieldName%22%3A%22value%22%7D%5D&outSR=102100&resultType=standard&cacheHint=true"
    ).json()
    with open("%s/statelab_%s.json" % (raw_name, now), "w") as fp:
        json.dump(raw, fp)
    fulldat["TestsByStateLab"] = int(raw["features"][0]["attributes"]["value"])

    raw = requests.get(
        "https://services5.arcgis.com/O5K6bb5dZVZcTo5M/arcgis/rest/services/Combined_COVID_Reporting/FeatureServer/0/query?f=json&where=Measure%3D%27Commercial%20Tests%27&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&outStatistics=%5B%7B%22statisticType%22%3A%22sum%22%2C%22onStatisticField%22%3A%22Value%22%2C%22outStatisticFieldName%22%3A%22value%22%7D%5D&outSR=102100&resultType=standard&cacheHint=true"
    ).json()
    with open("%s/commercial_%s.json" % (raw_name, now), "w") as fp:
        json.dump(raw, fp)
    fulldat["CommercialTests"] = int(raw["features"][0]["attributes"]["value"])

    # raw = requests.get("https://www.arcgis.com/sharing/rest/content/items/69b726e2b82e408f89c3a54f96e8f776/data?f=json").json()
    # with open("%s/hospital_%s.json" % (raw_name, now), "w") as fp:
    #     json.dump(raw, fp)
    # hospInfo = [x for x in raw["widgets"] if "defaultSettings" in x and "bottomSection" in x["defaultSettings"] and "textInfo" in x["defaultSettings"]["bottomSection"] and "text" in x["defaultSettings"]["bottomSection"]["textInfo"] and "ventilators" in x["defaultSettings"]["bottomSection"]["textInfo"]["text"]]
    # if len(hospInfo) != 1:
    #     raise Exception("Bad ventilator layout in LA")
    # fulldat["OnVentilator"] = int(hospInfo[0]["defaultSettings"]["bottomSection"]["textInfo"]["text"].split()[0])
    # ds = [x for x in hospInfo[0]["datasets"] if x["type"] == "staticDataset" and x["name"] == "reference"]
    # if len(ds) != 1:
    #     raise Exception("Bad hospitalized layout")
    # fulldat["Hospitalized"] = int(ds[0]["data"])
    fulldat["Scrape_Time"] = now

    raw = requests.get(
        "https://services5.arcgis.com/O5K6bb5dZVZcTo5M/arcgis/rest/services/Combined_COVID_Reporting/FeatureServer/0/query?f=json&where=Measure%3D%27Beds%27&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&groupByFieldsForStatistics=Geography%2CGroup_Num&outStatistics=%5B%7B%22statisticType%22%3A%22sum%22%2C%22onStatisticField%22%3A%22Value%22%2C%22outStatisticFieldName%22%3A%22value%22%7D%5D&outSR=102100&resultType=standard&cacheHint=true"
    ).json()
    with open("%s/bedsbyregion_%s.json" % (raw_name, now), "w") as fp:
        json.dump(raw, fp)
    if len(raw["features"]) != 18:
        raise Exception("Unexpected number of bed/regions in LA")
    for region in range(1, 10):
        for gnum, cat in [(1, "InUse"), (2, "StillAvailable")]:
            dat = [
                x["attributes"] for x in raw["features"]
                if x["attributes"]["Geography"] == "LDH Region %d" %
                region and x["attributes"]["Group_Num"] == gnum
            ]
            if len(dat) != 1:
                raise Exception("Bad bed/region")
            fulldat["Beds_" + cat + "Region" + str(region)] = dat[0]["value"]

    raw = requests.get(
        "https://services5.arcgis.com/O5K6bb5dZVZcTo5M/arcgis/rest/services/Combined_COVID_Reporting/FeatureServer/0/query?f=json&where=Measure%3D%27ICU%20Beds%27&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&groupByFieldsForStatistics=Geography%2CGroup_Num&outStatistics=%5B%7B%22statisticType%22%3A%22sum%22%2C%22onStatisticField%22%3A%22Value%22%2C%22outStatisticFieldName%22%3A%22value%22%7D%5D&outSR=102100&resultType=standard&cacheHint=true"
    ).json()
    with open("%s/ICUbedsbyregion_%s.json" % (raw_name, now), "w") as fp:
        json.dump(raw, fp)
    if len(raw["features"]) != 18:
        raise Exception("Unexpected number of ICU bed/regions in LA")
    for region in range(1, 10):
        for gnum, cat in [(1, "InUse"), (2, "StillAvailable")]:
            dat = [
                x["attributes"] for x in raw["features"]
                if x["attributes"]["Geography"] == "LDH Region %d" %
                region and x["attributes"]["Group_Num"] == gnum
            ]
            if len(dat) != 1:
                raise Exception("Bad ICU bed/region")
            fulldat["ICUBeds_" + cat + "Region" +
                    str(region)] = dat[0]["value"]

    raw = requests.get(
        "https://services5.arcgis.com/O5K6bb5dZVZcTo5M/arcgis/rest/services/Combined_COVID_Reporting/FeatureServer/0/query?f=json&where=Measure%3D%27Hospital%20Vents%27&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&groupByFieldsForStatistics=Geography%2CGroup_Num&outStatistics=%5B%7B%22statisticType%22%3A%22sum%22%2C%22onStatisticField%22%3A%22Value%22%2C%22outStatisticFieldName%22%3A%22value%22%7D%5D&outSR=102100&resultType=standard&cacheHint=true"
    ).json()
    with open("%s/ventbyregion_%s.json" % (raw_name, now), "w") as fp:
        json.dump(raw, fp)
    if len(raw["features"]) != 18:
        raise Exception("Unexpected number of ventilator/regions in LA")
    for region in range(1, 10):
        for gnum, cat in [(1, "InUse"), (2, "StillAvailable")]:
            dat = [
                x["attributes"] for x in raw["features"]
                if x["attributes"]["Geography"] == "LDH Region %d" %
                region and x["attributes"]["Group_Num"] == gnum
            ]
            if len(dat) != 1:
                raise Exception("Bad ventilator/region")
            fulldat["Vent_" + cat + "Region" + str(region)] = dat[0]["value"]

    # New data - Race by region
    raw = requests.get(
        'https://services5.arcgis.com/O5K6bb5dZVZcTo5M/arcgis/rest/services/Case_Deaths_Race_Region_new/FeatureServer/0/query?f=json&where=1%3D1&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&groupByFieldsForStatistics=LDH_Region%2CRace&outStatistics=%5B%7B%22statisticType%22%3A%22sum%22%2C%22onStatisticField%22%3A%22Deaths%22%2C%22outStatisticFieldName%22%3A%22value%22%7D%5D&outSR=102100&resultType=standard&cacheHint=true'
    ).json()
    with open("%s/DeathRacebyRegion_%s.json" % (raw_name, now), "w") as fp:
        json.dump(raw, fp)
    race_data = raw["features"]
    if len(race_data) != 63:
        raise Exception("Unexpected number of regions/races")
    expected_regions = [
        "Region 1", "Region 2", "Region 3", "Region 4", "Region 5", "Region 6",
        "Region 7", "Region 8", "Region 9"
    ]
    expected_races = [
        "White", "Black", "Unknown", "Asian",
        "Native Hawaiian/Other Pacific Islander",
        "American Indian/Alaskan Native", "Other"
    ]
    for attribute in race_data:
        race_data = attribute["attributes"]
        if race_data["LDH_Region"] not in expected_regions:
            raise Exception("Unexpeted region " + race_data["LDH_Region"])
        if race_data["Race"] not in expected_races:
            raise Exception("Unexpected race " + race_data["Race"])
        fulldat["Deaths_" + race_data["LDH_Region"].strip() + "_race_" +
                race_data["Race"]] = race_data["value"]

    #Case Race by region
    raw = requests.get(
        'https://services5.arcgis.com/O5K6bb5dZVZcTo5M/arcgis/rest/services/Case_Deaths_Race_Region_new/FeatureServer/0/query?f=json&where=1%3D1&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&groupByFieldsForStatistics=LDH_Region%2CRace&outStatistics=%5B%7B%22statisticType%22%3A%22sum%22%2C%22onStatisticField%22%3A%22Cases%22%2C%22outStatisticFieldName%22%3A%22value%22%7D%5D&outSR=102100&resultType=standard&cacheHint=true'
    ).json()
    with open("%s/CaseRacebyRegion_%s.json" % (raw_name, now), "w") as fp:
        json.dump(raw, fp)
    race_data = raw["features"]
    if len(race_data) != 63:
        raise Exception("Unexpected number of regions/races")
    expected_regions = [
        "Region 1", "Region 2", "Region 3", "Region 4", "Region 5", "Region 6",
        "Region 7", "Region 8", "Region 9"
    ]
    expected_races = [
        "White", "Black", "Unknown", "Asian",
        "Native Hawaiian/Other Pacific Islander",
        "American Indian/Alaskan Native", "Other"
    ]
    for attribute in race_data:
        race_data = attribute["attributes"]
        if race_data["LDH_Region"] not in expected_regions:
            raise Exception("Unexpeted region " + race_data["LDH_Region"])
        if race_data["Race"] not in expected_races:
            raise Exception("Unexpected race " + race_data["Race"])
        fulldat["Casess_" + race_data["LDH_Region"].strip() + "_race_" +
                race_data["Race"]] = race_data["value"]

    # New data - Race by parish
    out_parish = []
    raw = requests.get(
        'https://services5.arcgis.com/O5K6bb5dZVZcTo5M/arcgis/rest/services/Cases_and_Deaths_by_Race_by_Parish_and_Region/FeatureServer/0/query?f=json&where=1%3D1&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&resultOffset=0&resultRecordCount=65&resultType=standard&cacheHint=true'
    ).json()
    with open("%s/RacebyParish_%s.json" % (raw_name, now), "w") as fp:
        json.dump(raw, fp)
    race_parish_data = raw["features"]
    expected_races = [
        "White", "Black", "Unknown", "Asian",
        "Native_Hawaiian_Other_Pacific_Islander",
        "American_Indian_Alaskan_Native", "Other"
    ]
    for row in race_parish_data:
        parish_data = row["attributes"]
        parish_dict = {"Parish": parish_data["Parish"], "Scrape Time": now}
        for key in parish_data:
            if "Deaths_" in key:
                val = parish_data[key]
                if val == "":
                    val = 0
                parish_dict[key + "_race"] = val
            elif "Cases_" in key:
                val = parish_data[key]
                if val == "":
                    val = 0
                parish_dict[key + "_race"] = val
            elif "LDHH" in key:
                parish_dict[key] = parish_data[key]
        # for race in expected_races:
        # try:
        #     parish_dict["Deaths_Race_" + race] = parish_data[race]
        # except:
        #     print("Unexpected race: " + race)
        #     raise
        out_parish.append(parish_dict)

    # Tableau - Probable Deaths

    # Using Selenium
    # driver = webdriver.Safari()
    driver = webdriver.Chrome(
        executable_path="andrew/ChromeDriver/chromedriver.exe")
    driver.maximize_window()
    driver.get(
        "https://public.tableau.com/profile/lee.mendoza#!/vizhome/COVID19demog/DataonCOVIN-19RelatedDeathsToDate"
    )
    time.sleep(5)
    # Get raw
    driver.save_screenshot(raw_name + "/probable_deaths_pt1_" + now + ".png")
    driver.execute_script("window.scrollTo(0, 400)")
    driver.save_screenshot(raw_name + "/probable_deaths_pt2_" + now + ".png")

    frame = driver.find_element_by_xpath(
        '//*[@id="ng-app"]/body/div[1]/div[2]/section/div/div[2]/section[2]/figure/js-api-viz/div/iframe'
    )
    driver.switch_to.frame(frame)

    # # Total Probable Deaths
    # total_prob_deaths = driver.find_element_by_xpath('//*[@id="tabZoneId19"]/div/div/div/div[1]/div/span/div[1]/span').text
    # total_prob_deaths_num = re.sub('[^0-9]', '', total_prob_deaths)
    # fulldat["Total Probable Deaths"] = total_prob_deaths_num

    # Probable Deaths by Race
    headers_race = driver.find_element_by_xpath(
        '//*[@id="tabZoneId3"]/div/div/div/div[1]/div[5]/div[1]/canvas')
    values_race = driver.find_element_by_xpath(
        '//*[@id="view13678703414402932068_2418008377866606056"]/div[1]/div[2]/canvas[1]'
    )

    head = getCanvas(headers_race, driver).replace("\n\n", "\n")
    val = getCanvas(values_race, driver).replace("\n\n", "\n")

    expected_race = [
        "American Indian/Alaska N..", "Asian", "Black", "Native Hawaiian/Pl",
        "Other", "Unknown", "White"
    ]
    extracted_races = []
    for line in head.splitlines():
        if line != "\n" or line != "":
            extracted_races.append(line)
    for race, pct in zip(extracted_races, val.splitlines()):
        percent = pct.replace("%", "")
        if race.strip() == "Native Hawaiian/PI":
            race = "Native Hawaiian/Pl"
        if race.strip() not in expected_race:
            raise Exception("Unexpected race in Probable Deaths " + race)
        fulldat["% Probable Deaths by Race: " + race] = percent

    # Probable Deaths by Ethnicity
    headers_ethnicity = driver.find_element_by_xpath(
        '//*[@id="tabZoneId10"]/div/div/div/div[1]/div[5]/div[1]/canvas')
    values_ethnicity = driver.find_element_by_xpath(
        '//*[@id="view13678703414402932068_2377024103324179123"]/div[1]/div[2]/canvas[1]'
    )

    head = getCanvas(headers_ethnicity, driver).replace("\n\n", "\n")
    val = getCanvas(values_ethnicity, driver).replace("\n\n", "\n")
    expected_ethn = ["Hispanic/Latino", "Non-Hispanic/Latino", "Unknown"]
    for ethn, pct in zip(head.splitlines(), val.splitlines()):
        if ethn.strip() not in expected_ethn:
            raise Exception("Unexpected Ethnicity " + ethn)
        percent = pct.replace("%", "")
        fulldat["% Probable Deaths by Ethnicity: " + ethn] = percent
    # Probable Deaths by Gender
    headers_gender = driver.find_element_by_xpath(
        '//*[@id="tabZoneId18"]/div/div/div/div[1]/div[5]/div[1]/canvas')
    values_gender = driver.find_element_by_xpath(
        '//*[@id="view13678703414402932068_1339666610323305087"]/div[1]/div[2]/canvas[1]'
    )

    head = getCanvas(headers_gender, driver).replace("\n\n", "\n")
    val = getCanvas(values_gender, driver).replace("\n\n", "\n")
    expected_gender = ["Female", "Male", "Unknown/Other"]
    for gen, pct in zip(head.splitlines(), val.splitlines()):
        if gen.strip() not in expected_gender:
            raise Exception("Unexpected Gender " + gen)
        percent = pct.replace("%", "")
        fulldat["% Probable Deaths by Gender: " + gen] = percent

    # Probable Deaths by Underlying Conditions
    headers_conditions = driver.find_element_by_xpath(
        '//*[@id="tabZoneId5"]/div/div/div/div[1]/div[5]/div[1]/canvas')
    values_conditions = driver.find_element_by_xpath(
        '//*[@id="view13678703414402932068_5659047270258252395"]/div[1]/div[2]/canvas[1]'
    )

    head = getCanvas(headers_conditions, driver).replace("\n\n", "\n")
    val = getCanvas(values_conditions, driver).replace("\n\n", "\n")
    expected_conditions = [
        "Asthma", "Cancer", "Cardiac Disease", "Chronic Kidney Disease",
        "Congestive Heart Failure", "Diabetes", "Hypertension", "Neurological",
        "Obesity", "Pulmonary", "None"
    ]
    for con, pct in zip(head.splitlines(), val.splitlines()):
        if con.strip() not in expected_conditions:
            raise Exception("Unexpected Underlying Condition " + con)
        percent = pct.replace("%", "")
        fulldat["% Probable Deaths by Underlying Condition: " + con] = percent

    # Probable Deaths - Avg and Median Ages
    headers_age = driver.find_element_by_xpath(
        '//*[@id="tabZoneId21"]/div/div/div/div[1]/div[5]/div[1]/canvas')
    values_age = driver.find_element_by_xpath(
        '//*[@id="view13678703414402932068_10330976522668559202"]/div[1]/div[2]/canvas[1]'
    )

    head = getCanvas(headers_age, driver).replace("\n\n", "\n")
    val = getCanvas(values_age, driver).replace("\n\n", "\n")
    expected_metrics = ["Average", "Median"]

    for metric, age in zip(head.splitlines(), val.splitlines()):
        if metric.strip() not in expected_metrics:
            raise Exception("Unexpected Age Metric " + con)
        fulldat["Probable Deaths Age: " + metric] = age

    # Output
    fields = sorted([x for x in fulldat])
    exists = os.path.exists(data_name)
    with open(data_name, "a") as fp:
        writer = csv.writer(fp)
        if not exists:
            writer.writerow(fields)
        writer.writerow([fulldat[x] for x in fields])

    # Output - Parish
    for parish in out_parish:
        fields = sorted([x for x in parish])
        exists = os.path.exists(parish_race_name)
        with open(parish_race_name, "a") as fp:
            writer = csv.writer(fp)
            if not exists:
                writer.writerow(fields)
            writer.writerow([parish[x] for x in fields])

    # Merge Parish Race data
    merge_parish()
コード例 #3
0
def run_ID(args):
    # Parameters
    raw_name = '../ID/raw'
    data_name = '../ID/data/data.csv'
    now = str(datetime.now())

    out = {}

    # driver = webdriver.Safari()
    driver = webdriver.Chrome(
        executable_path="andrew/ChromeDriver/chromedriver.exe")
    driver.maximize_window()
    driver.get(
        "https://public.tableau.com/profile/idaho.division.of.public.health#!/vizhome/DPHIdahoCOVID-19Dashboard_V2/Story1"
    )
    time.sleep(10)  # More robust to wait for elements to appear...
    driver.switch_to.frame(
        driver.find_element_by_xpath(
            '//*[@id="ng-app"]/body/div[1]/div[2]/section/div/div[2]/section[2]/figure/js-api-viz/div/iframe'
        ))

    # out["TotalTested"] = None  # Removed this one

    # OCR scan text info
    texts = [
        ("TotalCases", r"(\d+)\s+\(\d+\s+new\)\s+statewide\s+cases",
         '//*[@id="view11831741491762752444_11141899506553115835"]/div[1]/div[2]/canvas[1]',
         False),
        ("TotalHospitalizations", r"(\d+)\s+cases hospitalized",
         '//*[@id="view11831741491762752444_14784563920108749745"]/div[1]/div[2]/canvas[1]',
         True),
        ("ICUAdmissions", r"(\d+)\s+cases admitted to icu",
         '//*[@id="view11831741491762752444_8851338240052320464"]/div[1]/div[2]/canvas[1]',
         True),
        ("CasesAmongHCW", r"(\d+)\s+cases among health care workers",
         '//*[@id="view11831741491762752444_378066509776727316"]/div[1]/div[2]/canvas[1]',
         False),
        ("CasesRecovered", r"(\d+)\s+cases estimated recovered",
         '//*[@id="view11831741491762752444_15348675858672874598"]/div[1]/div[2]/canvas[1]',
         True)
    ]
    # ("TotalDeaths", r"(\d+)", '//*[@id="view2142284533943777519_7098283575370063084"]/div[1]/div[2]/canvas[1]', False)

    texts.append((
        "TotalDeaths",
        r"total deaths:+\s*(\d+)\s*\(\s*\d+\s*confirmed\s+\d+\s+probable\)\s+rate per 100000 population:+\s+(\S+)\s*",
        '//*[@id="view13810090252421852225_17430862024409208946"]/div[1]/div[2]/canvas[1]',
        False))

    # Click Demographics tab
    driver.find_element_by_xpath(
        '//*[@id="tabZoneId4"]/div/div/div/span[2]/div/span/span/span[2]'
    ).click()
    time.sleep(10)

    for field, regex, xpath, flipBW in texts:
        if field == "TotalDeaths":
            # Click Deaths Tab
            driver.find_element_by_xpath(
                '//*[@id="tabZoneId4"]/div/div/div/span[2]/div/span/span/span[7]'
            ).click()
            time.sleep(10)
        # print(field)
        text = getCanvas(driver.find_element_by_xpath(xpath), driver,
                         flipBW).replace(",", "")
        rr = re.compile(regex)
        match = rr.search(text.strip().lower())
        if match:
            out[field] = match.group(1).strip()
        else:
            if field == "TotalDeaths":
                list_deaths = text.split()
                print(list_deaths)
                print(len(list_deaths))
                exit()
                if len(list_deaths) != 8:
                    raise Exception("Check Total Deaths")
                total_deaths = None
                death_rate_100k = None
                try:
                    total_deaths = int(list_deaths[2])
                except ValueError:
                    print("Total Deaths not Int - Check!")
                    raise
                try:
                    death_rate_100k = float(list_deaths[7])
                except ValueError:
                    print("Death Rate not Number - Check!")
                    raise
                out["TotalDeaths"] = total_deaths
                out["Death_Rate_Per_100000"] = death_rate_100k
            else:
                out[field] = None
                # print(field)
                raise Exception("Warning: No " + field +
                                " extracted for Idaho; got string " + text)

    # Grab a few data points in the DOM
    # out["TestsStateLab"] = None  # No longer convenient to pull
    # out["TestsCommercialLab"] = None  # No longer convenient to pull

    # Click Demographics tab
    driver.find_element_by_xpath(
        '//*[@id="tabZoneId4"]/div/div/div/span[2]/div/span/span/span[2]'
    ).click()
    time.sleep(10)

    # Grab graphs for demographics
    genders = getGraph(
        driver.find_element_by_xpath(
            '//*[@id="view11831741491762752444_4953159310065112757"]/div[1]/div[2]/canvas[1]'
        ), (78, 121, 167, 255), driver)
    genderLabels = [
        x.title() for x in getCanvas(
            driver.find_element_by_xpath(
                '//*[@id="tabZoneId65"]/div/div/div/div[1]/div[5]/div[1]/canvas'
            ), driver).strip().split()
    ]
    if len(genders) != 2 or len(genderLabels) != 2 or " ".join(
            sorted(genderLabels)) != "Female Male":
        raise Exception("Wrong gender vals for ID")
    for gender, val in zip(genderLabels, genders):
        out["Pct_Gender_" + gender] = round(val, 1)

    ages = getGraph(
        driver.find_element_by_xpath(
            '//*[@id="view11831741491762752444_7110063204799374782"]/div[1]/div[2]/canvas[1]'
        ), (78, 121, 167, 255), driver)
    ageLabels = getCanvas(
        driver.find_element_by_xpath(
            '//*[@id="tabZoneId77"]/div/div/div/div[1]/div[5]/div[1]/canvas'),
        driver).strip().split()
    ageExpect = [
        "<18", "18-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80-89",
        "90-99", "100+"
    ]
    if len(ages) != 10 or sorted(ageLabels) != sorted(ageExpect):
        raise Exception("Wrong age groups for ID")
    for age, val in zip(ageLabels, ages):
        out["Pct_Age_" + age.replace("-", "_").replace("<18", "0_17").replace(
            "+", "_plus")] = round(val, 1)

    # raceCanvas = driver.find_element_by_xpath('//*[@id="view2142284533943777519_17257039996537996977"]/div[1]/div[2]/canvas[1]')
    # cols = getColors(raceCanvas, driver)
    # nonGray = sorted([x for x in cols if cols[x] > 100 and (x[0] != x[1] or x[0] != x[2])])
    # if nonGray != [(43, 92, 138, 255)]:
    #     raise Exception("Unexpected colors in race plot for ID: " + str(nonGray))
    # raceDat = getStackedGraph(raceCanvas, [(43, 92, 138, 255)], driver)
    # raceLabels = ["White"]
    # for dat, lab in zip(raceDat, raceLabels):
    #     out["Death_Pct_Race_" + lab] = round(dat, 1)
    # if not "Asian" in raceLabels:
    #     out["Death_Pct_Race_Asian"] = None

    # # Click Deaths Tab
    # driver.find_element_by_xpath('//*[@id="tabZoneId4"]/div/div/div/span[2]/div/span/span/span[6]').click()
    # time.sleep(10)

    # # Get Ethnicity Death Pct
    # ethCanvas = driver.find_element_by_xpath('//*[@id="view13810090252421852225_17815945649314726624"]/div[1]/div[2]/canvas[1]')
    # cols = getColors(ethCanvas, driver)
    # nonGray = sorted([x for x in cols if cols[x] > 100 and (x[0] != x[1] or x[0] != x[2])])
    # if nonGray != [(44, 89, 133, 255), (196, 216, 243, 255)]:
    #     raise Exception("Unexpected colors in ethnicity plot for ID: " + str(nonGray))
    # ethDat = getStackedGraph(ethCanvas, [(44, 89, 133, 255), (196, 216, 243, 255)], driver)
    # ethLabels = ["NotHispanic", "Hispanic"]
    # for dat, lab in zip(ethDat, ethLabels):
    #     out["Death_Pct_Eth_" + lab] = round(dat, 1)
    driver.close()

    # # Manually collect race and ethn data
    print(
        "Please load https://public.tableau.com/profile/idaho.division.of.public.health#!/vizhome/DPHIdahoCOVID-19Dashboard_V2/Story1 and click on the COVID-19 Demographics Tab"
    )

    # Cases
    goodRace = input(
        "Are there exactly 7 races in the dashboard: White, Asian, Black, Other Race, Multiple Race, American Indian, Native Hawaiian? Are there exactly two ethnicities: Non-hispanic and hispanic? (Y/N) "
    ).lower()
    if goodRace not in ["y", "n"]:
        raise Exception("Invalid input")
    if goodRace == "n":
        raise Exception("Invalid races and/or ethnicities")
    out["Case_Pct_Race_White"] = float(input("Case Pct White? "))
    out["Case_Pct_Race_Other"] = float(input("Case Pct Other? "))
    out["Case_Pct_Race_NativeHawaiian"] = float(
        input("Case Pct Native Hawaiian? "))
    out["Case_Pct_Race_MultipleRaces"] = float(
        input("Case Pct Multiple Races? "))
    out["Case_Pct_Race_Black"] = float(input("Case Pct Black? "))
    out["Case_Pct_Race_Asian"] = float(input("Case Pct Asian? "))
    out["Case_Pct_Race_AmericanIndian"] = float(
        input("Case Pct American Indian? "))
    out["Case_Pct_Ethn_NonHispanic"] = float(input("Case Pct Non-Hispanic? "))
    out["Case_Pct_Ethn_Hispanic"] = float(input("Case Pct Hispanic? "))

    # Death
    print("Now click on the COVID-19 Related Deaths Demographics Tab")
    goodRace = input(
        "Are there exactly 6 races in the dashboard: White, Asian, Black, American Indian, Native Hawaiian and Other? Are there exactly 2 ethnicities: Non-hispanic and hispanic? (Y/N) "
    ).lower()
    if goodRace not in ["y", "n"]:
        raise Exception("Invalid input")
    if goodRace == "n":
        raise Exception("Invalid races and/or ethnicities")
    out["Death_Pct_Race_White"] = float(input("Death Pct White? "))
    out["Death_Pct_Race_Other"] = float(input("Death Pct Other? "))
    out["Death_Pct_Race_NativeHawaiian"] = float(
        input("Death Pct Native Hawaiian? "))
    out["Death_Pct_Race_Black"] = float(input("Death Pct Black? "))
    out["Death_Pct_Race_Asian"] = float(input("Death Pct Asian? "))
    out["Death_Pct_Race_AmericanIndian"] = float(
        input("Death Pct American Indian? "))
    out["Death_Pct_Ethn_NonHispanic"] = float(
        input("Death Pct Non-Hispanic? "))
    out["Death_Pct_Ethn_Hispanic"] = float(input("Death Pct Hispanic? "))

    out["Scrape_Time"] = now
    fields = sorted([x for x in out])
    exists = os.path.exists(data_name)
    with open(data_name, "a") as fp:
        writer = csv.writer(fp)
        if not exists:
            writer.writerow(fields)
        writer.writerow([out[x] for x in fields])

    # Let's make a best effort to get the raw data...
    img = requests.get(
        "https://public.tableau.com/static/images/DP/DPHIdahoCOVID-19Dashboard_V2/Story1/1.png"
    )
    with open("%s/%s.png" % (raw_name, now), "wb") as fp:
        fp.write(img.content)
コード例 #4
0
def run_CA(args):
    # Parameters
    raw_name = '../CA/raw'
    data_name = '../CA/data/data.csv'
    race_data_name = '../CA/data/race_data.csv'
    hospital_data_name = '../CA/data/hospital_data.csv'
    now = str(datetime.now())
    new = True

    if new:
        run_new_CA()
    else:
        # driver = webdriver.Safari()
        driver = webdriver.Chrome(executable_path="andrew/ChromeDriver/chromedriver.exe")
        driver.maximize_window()
        driver.get("https://public.tableau.com/views/COVID-19PublicDashboard/Covid-19Public?:embed=y&:display_count=no&:showVizHome=no")
        time.sleep(10)  # More robust to wait for elements to appear...
        #driver.switch_to.frame("viz_embedded_frame")


        out = {}
        
        out["TotalCases"] = get_digits("TotalCases", 
                '//*[@id="view8860806102834544352_2954032034214900649"]/div[1]/div[2]/canvas[1]', driver)
        out["TotalFatalities"] = get_digits("TotalFatalities", 
                '//*[@id="view8860806102834544352_10936283936734129650"]/div[1]/div[2]/canvas[1]', driver)
        out["TotalTested"] = get_digits("TotalTested", 
                '//*[@id="view8860806102834544352_12188172174700680575"]/div[1]/div[2]/canvas[1]', driver)

        age_groups = getCanvas(driver.find_element_by_xpath(
            '//*[@id="tabZoneId257"]/div/div/div/div/div[5]/div[1]/canvas'), driver).replace("\n", " ")
        age_text = age_groups.replace(".", "-")
        try: 
            age_perc = getCanvas(driver.find_element_by_xpath('//*[@id="view8860806102834544352_11651535759491462876"]/div[1]/div[2]/canvas[1]'), driver).replace("\n", " ")
            age_text = f"{age_text} {age_perc}"
        except:
            pass 
        age_cats_together = True
        five_cats = False
        #match = re.search(
        #        "([\d,-]+)[ ]+([\d,-]+)[ ]+([\d,-]+)[ ]+([A-Z][a-z]+)[ ]+([\d]+)%[ ]+([\d]+)%[ ]+([\d]+)%[ ]+([\d]+)%", age_text)
        match = re.search(
                "([\d,-]+)[ ]+([\d,-]+)[ ]+([\d,-]+)[ ]+([\d,+]+)[ ]+[‘]*([A-Z][a-z]+)[ ]+([\d]+)%[ ]+([\d]+)%[ ]+([\d]+)%[ ]+([\d]+)%[ ]+([\d]+)%", age_text)
        if match is None:
            match = re.search("([\d,-]+)[ ]+([\d,-]+)[ ]+([\d,-]+)[ ]+([\d,-]+)[ ]+([A-Z][a-z]+)[ ]+([\d]+%[ ]*)+", age_text)
            five_cats = match is not None
        #if match is None:
        #    match = re.search(
        #        "([\d]+)[ ]+([\d]+)%[ ]+([\d,-]+)[ ]+([\d]+)%[ ]+([\d,-]+)[ ]+([\d]+)%[ ]+([\d,+]+)[ ]+([\d]+)%", age_text)
        #    age_cats_together = False
        if match is None:
            raise KeyError ("Failed at finding age groups")
        else: 
            #age_groups = [f"Age_{i}" for i in match.groups()[::2]]
            #age_percentages =  [int(i) for i in match.groups()[1::2]]
            if age_cats_together:
                if not five_cats:
                    age_groups = [f"Age_{i}" for i in match.groups()[:len(match.groups())//2]]
                    age_percentages =  [int(i) for i in match.groups()[len(match.groups())//2:]]
                else: 
                    age_groups = [f"Age_{i}" for i in match.groups()[:5]]
                    age_percentages =  [int(i.replace("%", "")) for i in re.findall("[\d]+%", age_text)]
                    if len(age_percentages)==3:
                        age_percentages = [np.nan] + age_percentages + [np.nan]
            else:
                raise Exception("Not implemented")
        #unknown_age = getCanvas(driver.find_element_by_xpath('//*[@id="view8860806102834544352_17557392999253321305"]/div[1]/div[2]/canvas[2]'), driver)
        #match = re.search("([\d]+)%", unknown_age)
        #if match is None:
        #    u_age = np.nan
        #    raise Exception("Failed at collecting unkown age")
        #else:
        #     u_age = int(match.groups()[0])
        #age_groups.append("Age_Unknown")
        #age_percentages.append(u_age)
        for title, cnt in zip(age_groups, age_percentages):
            out[title] = cnt

        # Figure out how to do this...
        #ages = getGraph(driver.find_element_by_xpath('//*[@id="view8860806102834544352_11651535759491462876"]/div[1]/div[2]/canvas[1]'), driver)
        #ages = getCanvas(driver.find_element_by_xpath('//*[@id="view8860806102834544352_11651535759491462876"]/div[1]/div[2]/canvas[1]'), driver)
        #match = re.search("([\d]+)[ ]+[\d,-]+%[ ]+([\d,-]+)[ ]+[\d]+%[ ]+([\d,-]+)[ ]+[\d]+%[ ]+([\d,-]+)[ ]+[\d]+%", age_groups)

        # Sex
        #male = getCanvas(driver.find_element_by_xpath('//*[@id="view8860806102834544352_4610613615443112592"]/div[1]/div[2]/canvas[2]'), driver)
        #//*[@id="view8860806102834544352_4610613615443112592"]/div[1]/div[2]/canvas[1]
        #//*[@id="view8860806102834544352_4610613615443112592"]/div[1]/div[2]/canvas[2]
        #/html/body/div[2]/div[2]/div[1]/div[1]/div/div[2]/div[43]/div/div/div/div/div[11]/div[1]/div[2]/canvas[2]
        #male_match = re.search("([\d]+)%", male)
        #female = getCanvas(driver.find_element_by_xpath('//*[@id="view8860806102834544352_15384157321978781716"]/div[1]/div[2]/canvas[1]'), driver)
        #female_match = re.search("([\d]+)%", female)
        #unknown = getCanvas(driver.find_element_by_xpath('//*[@id="view8860806102834544352_6009561529893989681"]/div[1]/div[2]/canvas[2]'), driver)
        #//*[@id="view8860806102834544352_6009561529893989681"]/div[1]/div[2]/canvas[2]
        #//*[@id="view8860806102834544352_6009561529893989681"]/div[1]/div[2]/canvas[1]
            #'//*[@id="view8860806102834544352_6009561529893989681"]/div[2]/div[2]/canvas[1]'), driver)
        #unknown_match = re.search("([\d]+)%", unknown)
        #if (unknown_match is None) + (female_match is None) + (male_match is None)>1:
        #    raise Exception("Cound not collect gender information")
        #else: 
        sex = getCanvas(driver.find_element_by_xpath('//*[@id="tabZoneId247"]/div/div/div/div/div[5]/div[1]/canvas'), driver)
        sex = sex.replace("‘", "").replace("\n\n", " ")
        sex_perc = getCanvas(driver.find_element_by_xpath('//*[@id="view8860806102834544352_15384157321978781716"]/div[1]/div[2]/canvas[1]'), driver).replace("\n\n", " ")
        sex = f"{sex}\n\n{sex_perc}"
        match = re.search("Female\nMale Unknown\n\n([\d]+)% ([\d]+)% ([\d]+)%", sex)
        out["male_pos"] = int(match.groups()[1]) if match is not None else np.nan
        out["female_pos"] = int(match.groups()[0]) if match is not None else np.nan
        out["sex_unknown_pos"] = int(match.groups()[2]) if match is not None else np.nan
        #Race/ethnicity
        race = getCanvas(driver.find_element_by_xpath('//*[@id="tabZoneId246"]/div/div/div/div[1]/div[5]/div[1]/canvas'), driver)
        race_perc = getCanvas(driver.find_element_by_xpath('//*[@id="view8860806102834544352_2377024103324179123"]/div[1]/div[2]/canvas[1]'), driver)
        race = race.split("\n") + race_perc.split("\n")
        race = list(filter(len, race))
        
        if len(race)!=16: 
            raise ValueError("incorrect number of races")
        def fk_every_dk(x):
            try:
                value = int(x.replace("%", ""))
            except ValueError:
                value = np.nan
            return value

        race_cats = map(lambda x: x.replace('‘', "").replace(".", ""), race[:8])
        race_perc = map(fk_every_dk , race[8:])
        out_race = {x:y for x,y in zip(race_cats, race_perc)}

        driver.close()
            
        out["Scrape_Time"] = now
        fields = sorted([x for x in out])
        exists = os.path.exists(data_name)
        with open(data_name, "a") as fp:
            writer = csv.writer(fp)
            if not exists:
                writer.writerow(fields)
            writer.writerow([out[x] for x in fields])

        # Let's make a best effort to get the raw data...
        img = requests.get("https://public.tableau.com/static/images/CO/COVID-19PublicDashboard/Covid-19Public/1_rss.png")

        out_race["Scrape_Time"] = now
        fields = sorted([x for x in out_race])
        exists = os.path.exists(race_data_name)
        with open(race_data_name, "a") as fp:
            writer = csv.writer(fp)
            if not exists:
                writer.writerow(fields)
            writer.writerow([out_race[x] for x in fields])

        with open("%s/%s.png" % (raw_name, now), "wb") as fp:
            fp.write(img.content)

        # California hospital situation 
        # driver = webdriver.Safari()
        driver = webdriver.Chrome(executable_path="andrew/ChromeDriver/chromedriver.exe")
        driver.get("https://public.tableau.com/views/COVID-19PublicDashboard/Covid-19Hospitals?%3Aembed=y&%3Adisplay_count=no&%3AshowVizHome=no")
        time.sleep(10)  # More robust to wait for elements to appear...
        driver.implicitly_wait(5) 
        stoi = lambda x: int(x.replace(",", ""))
        out = {}
        out["posPatients"] = get_digits("PosPatients", 
                '//*[@id="view11327846829742299964_9307352602670595869"]/div[1]/div[2]/canvas[1]', driver)
        out["posICU"] = get_digits("PosICU", 
                '//*[@id="view11327846829742299964_7957542083138737667"]/div[1]/div[2]/canvas[1]', driver)
        out["suspectedICU"] = get_digits("suspectedICU", 
                '//*[@id="view11327846829742299964_12557426117314542746"]/div[1]/div[2]/canvas[1]', driver)
        out["suspectedPatients"] = get_digits("suspectedPatients", 
                '//*[@id="view11327846829742299964_7498269305876793953"]/div[1]/div[2]/canvas[1]', driver)
        responding_fasilities = getCanvas(driver.find_element_by_xpath(
            '//*[@id="view11327846829742299964_17652355579425549403"]/div[1]/div[2]/canvas[1]'), driver)
        responding_beds = getCanvas(driver.find_element_by_xpath(
            '//*[@id="view11327846829742299964_12615353459920747640"]/div[1]/div[2]/canvas[1]'), driver)
        #match = re.search("[No]*\n\n([0-9]+)%\n\n[Yes\n]*([\d]+)", responding_fasilities)
        match = re.search("([\d,,]+) of ([\d,,]+)", responding_fasilities)
        #out["responding_facilities_yes_percent"] = int(match.groups()[0]) if match is not None else np.nan
        out["responding_facilities_yes_percent"] = stoi(match.groups()[0])/stoi(match.groups()[1])
        #out["responding_fasilities_yes_num"] = int(match.groups()[1].replace(",", "")) if match is not None else np.nan
        out["responding_fasilities_yes_num"] = stoi(match.groups()[1])

        #match = re.search("([\d,,]+)\n[\d\d%\n]*Yes[:]*\n([\d,,]+)", responding_beds)
        match = re.search("([\d,,]+) of ([\d,,]+)", responding_beds)
        #out["responding_beds_no_num"] = int(match.groups()[0].replace(",", "")) if match is not None else np.nan
        #out["responding_beds_yes_num"] = int(match.groups()[1].replace(",", "")) if match is not None else np.nan
        out["responding_beds_no_num"] = stoi(match.groups()[1]) - stoi(match.groups()[0])
        out["responding_beds_yes_num"] = stoi(match.groups()[1])
        driver.close()
            
        out["Scrape_Time"] = now
        fields = sorted([x for x in out])
        exists = os.path.exists(hospital_data_name)
        with open(hospital_data_name, "a") as fp:
            writer = csv.writer(fp)
            if not exists:
                writer.writerow(fields)
            writer.writerow([out[x] for x in fields])  
        img = requests.get("https://public.tableau.com/static/images/CO/COVID-19PublicDashboard/Covid-19Hospitals/1_rss.png")
        with open("%s/%s_hospital.png" % (raw_name, now), "wb") as fp:
            fp.write(img.content)
コード例 #5
0
def run_WY(args):
    # Parameters
    raw_name = '../WY/raw'
    data_name = '../WY/data/data.csv'
    now = str(datetime.now())

    # driver = webdriver.Safari()
    driver = webdriver.Chrome(
        executable_path="andrew/ChromeDriver/chromedriver.exe")
    driver.maximize_window()
    driver.get(
        "https://public.tableau.com/profile/melissa.taylor#!/vizhome/EpiCOVIDtest/Dashboard"
    )
    time.sleep(10)  # More robust to wait for elements to appear...

    frames = driver.find_elements_by_tag_name('iframe')
    if len(frames) != 1:
        raise Exception("Could not find iframe")
    driver.switch_to.frame(frames[0])

    out = {}

    cases = getCanvas(
        driver.find_element_by_xpath(
            '//*[@id="view3855800012607193825_5172391045180469540"]/div[1]/div[2]/canvas[1]'
        ), driver).replace(",", "").replace("/", "")
    reCases = re.compile(
        r"(\d+)\s+lab\s+confirmed\s+cases\s+(\d+)\s+recovered")
    match = reCases.match(cases.strip().lower())
    if match:
        out["TotalConfirmedCases"] = match.group(1)
        out["RecoveredConfirmedCases"] = match.group(2)
    else:
        raise Exception(
            "Warning: no total cases extracted for Wyoming; got string" +
            cases)

    pcases = getCanvas(
        driver.find_element_by_xpath(
            '//*[@id="view3855800012607193825_2191712128240212356"]/div[1]/div[2]/canvas[1]'
        ), driver).replace(",", "").replace("/", "")
    rePCases = re.compile(r"(\d+)\s+probable\s+cases\s+(\d+)\s+recovered")
    match = rePCases.match(pcases.strip().lower())
    if match:
        out["TotalProbableCases"] = match.group(1)
        out["RecoveredProbableCases"] = match.group(2)
    else:
        raise Exception(
            "Warning: no total cases extracted for Wyoming; got string" +
            cases)

    deaths = getCanvas(
        driver.find_element_by_xpath(
            '//*[@id="view3855800012607193825_11972683903544902318"]/div[1]/div[2]/canvas[1]'
        ), driver).replace(",", "").replace("/", "")
    reDeath = re.compile(r"(\d+)\s+death")
    match = reDeath.match(deaths.strip().lower())
    if match:
        out["Deaths"] = match.group(1)
    else:
        out["Deaths"] = None
        print("Warning: no death count for Wyoming; got string", deaths)

    ages = getGraph(
        driver.find_element_by_xpath(
            '//*[@id="view3855800012607193825_719033729591027206"]/div[1]/div[2]/canvas[1]'
        ), (78, 121, 167, 255), driver)
    if len(ages) != 8:
        raise Exception("Wrong age count for WY")
    for age, val in zip([
            "0_17", "18_29", "30_39", "40_49", "50_59", "60_69", "70_79",
            "80_plus"
    ], ages):
        out["Pct_Age_" + age] = round(val, 1)

    genders = getGraph(
        driver.find_element_by_xpath(
            '//*[@id="view3855800012607193825_14275175901841894353"]/div[1]/div[2]/canvas[1]'
        ), (78, 121, 167, 255), driver)
    genderLabels = [
        x.title() for x in getCanvas(
            driver.find_element_by_xpath(
                '//*[@id="tabZoneId32"]/div/div/div/div[1]/div[5]/div[1]/canvas'
            ), driver).strip().split()
    ]
    if len(genders) != 4 or len(genderLabels) != 4 or " ".join(
            sorted(genderLabels)) != "Female Male Other Unknown":
        raise Exception("Wrong gender vals for WY")
    for gender, val in zip(genderLabels, genders):
        out["Pct_Gender_" + gender] = round(val, 1)
    symptoms = getGraph(
        driver.find_element_by_xpath(
            '//*[@id="view3855800012607193825_13010788587209822541"]/div[1]/div[2]/canvas[1]'
        ), (78, 121, 167, 255), driver)
    symptomLabels = getCanvas(
        driver.find_element_by_xpath(
            '//*[@id="tabZoneId49"]/div/div/div/div[1]/div[5]/div[1]/canvas'),
        driver).strip()
    symptomLabels = [
        x.strip().title().replace(" ", "") for x in symptomLabels.split("\n")
        if x != ""
    ]
    if len(symptoms) != 14 or len(symptomLabels) != 14 or " ".join(
            sorted(symptomLabels)
    ) != 'AbdominalPain Chills Cough Diarrhea Fatigue Fever Headache LossOfSmell/Taste MuscleAches NauseaOrVomiting None RunnyNose ShortnessOfBreath SoreThroat':
        print(sorted(symptomLabels))
        raise Exception("Unexpected symptoms in WY")
    for symptom, val in zip(symptomLabels, symptoms):
        out["Pct_Symptom_" + symptom] = round(val, 1)

    exposures = getGraph(
        driver.find_element_by_xpath(
            '//*[@id="view3855800012607193825_11422738650703355835"]/div[1]/div[2]/canvas[1]'
        ), (78, 121, 167, 255), driver)
    exposureLabels = getCanvas(
        driver.find_element_by_xpath(
            '//*[@id="tabZoneId52"]/div/div/div/div/div[5]/div[1]/canvas'),
        driver).strip()
    exposureLabels = [
        x.strip().title().replace(" ", "").replace("CommunitySpread",
                                                   "CommunityAcquired")
        for x in exposureLabels.split("\n") if x != ""
    ]
    expected = 'CommunalLiving CommunityAcquired ContactWithAKnownCase DomesticTravel InternationalTravel Other PendingInvestigation Unknown'
    if len(exposures) != 8 or len(exposureLabels) != 8 or " ".join(
            sorted(exposureLabels)) != expected:
        print(" ".join(sorted(exposureLabels)))
        print(expected)
        raise Exception("Unexpected exposures in WY")
    for exposure, val in zip(exposureLabels, exposures):
        out["Pct_Exposure_" + exposure] = round(val, 1)

    underlying = getGraph(
        driver.find_element_by_xpath(
            '//*[@id="view3855800012607193825_1672645675164053982"]/div[1]/div[2]/canvas[1]'
        ), (78, 121, 167, 255), driver)
    underlyingLabels = getCanvas(
        driver.find_element_by_xpath(
            '//*[@id="tabZoneId53"]/div/div/div/div[1]/div[5]/div[1]/canvas'),
        driver).strip()
    underlyingLabels = [
        x.strip().title().replace(" ", "")
        for x in underlyingLabels.split("\n") if x != ""
    ]
    if len(underlyingLabels) != 3 or len(underlying) != 3 or " ".join(
            sorted(underlyingLabels)) != "No Unknown Yes":
        raise Exception("Unexpected underlying conditions in WY")
    for ul, val in zip(underlyingLabels, underlying):
        out["Pct_UnderlyingCond_" + ul] = round(val, 1)

    hosp = getGraph(
        driver.find_element_by_xpath(
            '//*[@id="view3855800012607193825_16872468006943659536"]/div[1]/div[2]/canvas[1]'
        ), (78, 121, 167, 255), driver)
    hospLabels = getCanvas(
        driver.find_element_by_xpath(
            '//*[@id="tabZoneId54"]/div/div/div/div[1]/div[5]/div[1]/canvas'),
        driver).strip()
    hospLabels = [
        x.strip().title().replace(" ", "").replace(
            "NoHospitalization", "No").replace("Hospitalization", "Yes")
        for x in hospLabels.split("\n") if x != ""
    ]
    if len(hospLabels) != 3 or len(hosp) != 3 or " ".join(
            sorted(hospLabels)) != "No Unknown Yes":
        raise Exception("Unexpected hospitalization data in WY")
    for hh, val in zip(hospLabels, hosp):
        out["Pct_Hospitalized_" + hh] = round(val, 1)

    race = getGraph(
        driver.find_element_by_xpath(
            '//*[@id="view3855800012607193825_4426486129312330342"]/div[1]/div[2]/canvas[1]'
        ), (78, 121, 167, 255), driver)
    raceLabels = getCanvas(
        driver.find_element_by_xpath(
            '//*[@id="tabZoneId60"]/div/div/div/div[1]/div[5]/div[1]/canvas'),
        driver).strip()
    raceLabels = [
        x.strip().title().replace(" ", "") for x in raceLabels.split("\n")
        if x != ""
    ]
    for idx in range(len(raceLabels)):
        if raceLabels[idx].find("Hawaii") >= 0:
            raceLabels[idx] = "PacificIslander"
    if len(race) != len(raceLabels) or sorted(raceLabels) != [
            'AmericanIndian', 'Asian', 'Black', 'Hispanic', 'Other',
            'PacificIslander', 'Unknown', 'White'
    ]:
        raise Exception("Unexpected race data in WY")
    for rr, val in zip(raceLabels, race):
        out["Pct_Race_" + rr] = round(val, 1)

    driver.get(
        "https://public.tableau.com/profile/melissa.taylor#!/vizhome/shared/8BBTPD39D"
    )
    # driver.get("https://public.tableau.com/profile/melissa.taylor#!/vizhome/WyomingCOVID-19TestingDataDashboard/Dashboard1")
    # https://health.wyo.gov/publichealth/infectious-disease-epidemiology-unit/disease/novel-coronavirus/covid-19-testing-data/
    time.sleep(10)  # More robust to wait for elements to appear...
    frames = driver.find_elements_by_tag_name('iframe')
    if len(frames) != 1:
        raise Exception("Could not find iframe on second page")
    driver.switch_to.frame(frames[0])

    testing = getCanvas(
        driver.find_element_by_xpath(
            '//*[@id="view4597669659173455094_6899958757650081769"]/div[1]/div[2]/canvas[1]'
        ), driver).replace(",", "")
    testingLabels = getCanvas(
        driver.find_element_by_xpath(
            '//*[@id="tabZoneId10"]/div/div/div/div[1]/div[5]/div[1]/canvas'),
        driver)
    testingLabels = [
        x.strip().title().replace(" ", "").replace("_", "-")
        for x in testingLabels.split("\n") if x != ""
    ]
    if sorted(testingLabels) != ['GrandTotal', 'Non-Wphl', 'Wphl']:
        print(testingLabels)
        raise Exception("Unexpected testing lab labels in WY")
    reTesting = re.compile(r"(\d+)\s+(\d+)\s+(\d+)")
    match = reTesting.match(testing.strip())
    if match:
        out["WPHLTotalTest"] = match.group(testingLabels.index("Wphl") + 1)
        out["CommercialLabTotalTest"] = match.group(
            testingLabels.index("Non-Wphl") + 1)
    else:
        print(testing.strip())
        print(
            "Warning: unexpected testing lab results in WY; skipping extraction"
        )
        out["WPHLTotalTest"] = None
        out["CommercialLabTotalTest"] = None

    # Find width of the testing positive region within the whole image
    testPos = driver.find_element_by_xpath(
        '//*[@id="view4597669659173455094_10103530389136289716"]/div[1]/div[2]/canvas[1]'
    )
    b64 = driver.execute_script(
        "return arguments[0].toDataURL('image/png').substring(21);", testPos)
    img = Image.open(BytesIO(base64.b64decode(b64)))
    pix = img.load()
    cols, rows = img.size  # indexing is backward...
    maxCol = None
    for c in range(cols):
        for r in range(rows):
            if pix[c, r] == (252, 141, 98, 255):
                maxCol = c
                continue
    if maxCol is None:
        print(getColors(testPos, driver))
        raise Exception("Could not find testing positive color in WY")
    out["TestPositivePercentage"] = (maxCol + 1) / cols * 100

    testByAge = getGraph(
        driver.find_element_by_xpath(
            '//*[@id="view4597669659173455094_719033729591027206"]/div[1]/div[2]/canvas[1]'
        ), (191, 198, 212, 255), driver)
    ageLabels = getCanvas(
        driver.find_element_by_xpath(
            '//*[@id="tabZoneId12"]/div/div/div/div[1]/div[5]/div[1]/canvas'),
        driver)
    ageLabels = [
        x.strip().title().replace(" ", "") for x in ageLabels.split("\n")
        if x != ""
    ]
    ageMap = {
        '<18Years': "0_18",
        '19-29Years': "19_29",
        '30-39Years': "30_39",
        '40-49Years': "40_49",
        '50-59Years': "50_59",
        '60-69Years': "60_69",
        '70-79Years': "70_79",
        '80+Years': "80_plus"
    }
    if len(ageLabels) != 8 or len(testByAge) != 8 or not all(
            x in ageMap for x in ageLabels):
        raise Exception("Unexpected test age layout")
    for dat, lab in zip(testByAge, ageLabels):
        out["Test_Pct_Age_" + ageMap[lab]] = dat

    driver.close()

    out["Scrape_Time"] = now
    fields = sorted([x for x in out])
    exists = os.path.exists(data_name)
    with open(data_name, "a") as fp:
        writer = csv.writer(fp)
        if not exists:
            writer.writerow(fields)
        writer.writerow([out[x] for x in fields])

    # Let's make a best effort to get the raw data...
    img = requests.get(
        "https://public.tableau.com/static/images/Ep/EpiCOVIDtest/Dashboard/1.png"
    )
    with open("%s/%s.png" % (raw_name, now), "wb") as fp:
        fp.write(img.content)

    img = requests.get(
        "https://public.tableau.com/static/images/Wy/WyomingCOVID-19TestingDataDashboard/Dashboard1/1.png"
    )
    with open("%s/testing_%s.png" % (raw_name, now), "wb") as fp:
        fp.write(img.content)