Ejemplo n.º 1
0
def scraper():
    # make an HTTP web request to get the source information
    response = requests.get(URL)

    if response.status_code == requests.codes.ok:
        # Success - print to the console that the HTTP request succeeeded
        print(' ', STATE_ABBR, ': Downloaded succeeded')

        table = bs4.BeautifulSoup(response.text, features="html.parser").select('#msdhTotalCovid-19Cases tbody tr')
        
        counties = []

        for item in table:
            
            row = item.find_all('td')

            county_name = row[0].text
            confirmed = int(row[1].text.replace(',', '').replace('*', ''))
            deaths = int(row[2].text.replace(',', '').replace('*', ''))
            
            county = county_report.CountyReport(STATE, county_name, confirmed, deaths, -1, -1, datetime.datetime.now())
            counties.append(county)

        # print the number of counties we processed
        print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

        # build the state-level report object that will include all of the counties
        stateReport = state_report.StateReport(STATE, STATE_ABBR, counties, datetime.datetime.now())
        
        # return the state-level report
        return stateReport
        
    else:
        # Fail
        print(' ', STATE_ABBR, ': ERROR : Web download failed - HTTP status code ', response.status_code)
Ejemplo n.º 2
0
def scraper():

    payload = ''

    filepath = pathlib.Path.cwd().joinpath('config', 'nv_post_body.json')
    with open(filepath, 'r') as file:
        payload = file.read().replace('\n', '')

    # make an HTTP web request to get the data
    response = requests.post(URL, data=payload)

    if response.status_code == requests.codes.ok:
        # Success - print to the console that the HTTP request succeeeded
        print(' ', STATE_ABBR, ': Downloaded succeeded')

        jsonPayload = json.loads(response.text)
        features = jsonPayload['results'][0]['result']['data']['dsr']['DS'][0][
            'PH'][0]['DM0']

        counties = []

        for feature in features:

            if 'S' in feature:
                continue

            county_object = feature['C']
            has_R = 'R' in feature

            deaths = 0

            cases_index = 3
            if has_R:
                cases_index = 2
            else:
                deaths = int(county_object[1])

            county_name = county_object[0]
            confirmed = int(county_object[cases_index])

            county = county_report.CountyReport(STATE, county_name, confirmed,
                                                deaths, -1, -1,
                                                datetime.datetime.now())
            counties.append(county)

        # print the number of counties we processed
        print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

        # build the state-level report object that will include all of the counties
        stateReport = state_report.StateReport(STATE, STATE_ABBR, counties,
                                               datetime.datetime.now())

        # return the state-level report
        return stateReport

    else:
        # Fail
        print(' ', STATE_ABBR,
              ': ERROR : Web download failed - HTTP status code ',
              response.status_code)
Ejemplo n.º 3
0
def scraper():
    # make an HTTP web request to get the MI XLSX file
    response = requests.get(URL)

    counties = []

    if response.status_code == requests.codes.ok:
        # Success - print to the console that the HTTP request succeeeded
        print(' ', STATE_ABBR, ': Downloaded succeeded')

        temppath = 'temp'
        if not os.path.exists(temppath):
            os.makedirs(temppath)

        tempfilename = datetime.datetime.now().strftime(
            "%Y-%m-%d_%H%M%S") + '_temp_' + STATE_ABBR + '.xlsx'
        tempfilepath = pathlib.Path.cwd().joinpath('temp', tempfilename)

        with open(tempfilepath, "wb") as file:
            file.write(response.content)

        wb = openpyxl.load_workbook(filename=tempfilepath)

        sheet = wb.worksheets[0]
        max_rows = sheet.max_row
        max_cols = sheet.max_column

        for i in range(4, max_rows):
            rowCount = str(i)

            county_name = sheet['A' + rowCount].value

            if county_name == 'Unknown' or county_name == 'Total' or len(
                    county_name) == 0:
                break

            confirmed = sheet.cell(row=i, column=max_cols).value

            county = county_report.CountyReport(STATE, county_name,
                                                (int)(confirmed), -1, -1, -1,
                                                datetime.datetime.now())
            counties.append(county)

        # print the number of counties we processed
        print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

        # build the state-level report object that will include all of the counties
        stateReport = state_report.StateReport(STATE, STATE_ABBR, counties,
                                               datetime.datetime.now())

        # return the state-level report
        return stateReport

    else:
        # Fail
        print(' ', STATE_ABBR,
              ': ERROR : Web download failed - HTTP status code ',
              response.status_code)
Ejemplo n.º 4
0
def scraper():
    # make an HTTP web request to get the CA CSV file
    response = requests.get(URL)

    if response.status_code == requests.codes.ok:
        # Success - print to the console that the HTTP request succeeeded
        print(' ', STATE_ABBR, ': Downloaded succeeded')

        csvData = response.text

        # read the in-memory string using the 'csv' module so we can iterate over each row
        csvReader = csv.reader(csvData.splitlines(), delimiter=',', quotechar='"')
        
        # create a list that will contain our county data
        counties = []

        # iterate over every row in the CSV
        for row in csvReader:
            # skip the header row
            if row[0] == 'county':
                continue    
            
            county_name = row[0]
            confirmedStr = row[1]
            confirmed = 0
            if '.' in confirmedStr:
                confirmed = int(float(confirmedStr))
            elif len(confirmedStr) > 0:
                confirmed = int(confirmedStr)

            deathsStr = row[2]
            deaths = 0
            if '.' in deathsStr:
                deaths = int(float(deathsStr))
            elif len(deathsStr) > 0:
                deaths = int(deathsStr)

            county = findCounty(county_name, counties)

            if county == None:
                county = county_report.CountyReport(STATE, county_name, confirmed, deaths, -1, -1, datetime.datetime.now())
                counties.append(county) # append the countyReport to our list of counties
            else:
                county.confirmed = confirmed
                county.deaths = deaths
                
        # print the number of counties we processed
        print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

        # build the state-level report object that will include all of the counties
        stateReport = state_report.StateReport(STATE, STATE_ABBR, counties, datetime.datetime.now())
        
        # return the state-level report
        return stateReport

    else:
        # Fail
        print(' ', STATE_ABBR, ': ERROR : Download failed - HTTP status code ', response.status_code)
Ejemplo n.º 5
0
def scraper():
    # make an HTTP web request to get the data
    response = requests.get(URL)

    if response.status_code == requests.codes.ok:
        # Success - print to the console that the HTTP request succeeeded
        print(' ', STATE_ABBR, ': Downloaded succeeded')

        jsonPayload = json.loads(response.text)
        features = jsonPayload['features']
        
        counties = []
        
        for feature in features:
            attribute = feature['attributes']
            
            county_name = attribute['NAME']

            if county_name == None or len(county_name) == 0 or county_name == '' or county_name == 'WI':
                continue

            confirmed = int(attribute['POSITIVE'])

            hospitalizations = 0
            if attribute['HOSP_YES'] != None:
                hospitalizations = int(attribute['HOSP_YES'])

            deaths = 0
            if attribute['DEATHS'] != None:
                deaths = int(attribute['DEATHS'])
            
            county = county_report.CountyReport(STATE, county_name, confirmed, deaths, hospitalizations, -1, datetime.datetime.now())

            existing_county = findCounty(county_name, counties)
            if existing_county == None:
                counties.append(county)
            elif existing_county.confirmed < county.confirmed or existing_county.deaths < county.deaths or existing_county.hospitalizations < county.hospitalizations:
                existing_county.confirmed = county.confirmed
                existing_county.deaths = county.deaths
                existing_county.hospitalizations = county.hospitalizations
            
            
        # print the number of counties we processed
        print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

        # build the state-level report object that will include all of the counties
        stateReport = state_report.StateReport(STATE, STATE_ABBR, counties, datetime.datetime.now())
        
        # return the state-level report
        return stateReport
        

    else:
        # Fail
        print(' ', STATE_ABBR, ': ERROR : Web download failed - HTTP status code ', response.status_code)
Ejemplo n.º 6
0
def scraper():
    # make an HTTP web request to get the FL Json file
    response = requests.get(URL)

    if response.status_code == requests.codes.ok:
        # Success - print to the console that the HTTP request succeeeded
        print(' ', STATE_ABBR, ': Downloaded succeeded')

        jsonPayload = json.loads(response.text)
        items = jsonPayload['features']

        counties = []

        for item in items:
            attributes = item['properties']
            county_name = attributes['County_1']

            if county_name == 'State':  # this is FL's total, so skip
                continue

            confirmedStr = attributes['CasesAll']
            confirmed = int(confirmedStr)

            deathsStr = attributes['Deaths']
            deaths = int(deathsStr)

            hospitalizationsResStr = attributes[
                'C_HospYes_Res']  # hospitalizations - Florida residents
            hospitalizationsRes = int(hospitalizationsResStr)

            hospitalizationsNonResStr = attributes[
                'C_HospYes_NonRes']  # hospitalizations - Florida residents
            hospitalizationsNonRes = int(hospitalizationsNonResStr)

            county = county_report.CountyReport(
                STATE, county_name, confirmed, deaths,
                hospitalizationsRes + hospitalizationsNonRes, -1,
                datetime.datetime.now())
            counties.append(county)

        # print the number of counties we processed
        print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

        # build the state-level report object that will include all of the counties
        stateReport = state_report.StateReport(STATE, STATE_ABBR, counties,
                                               datetime.datetime.now())

        # return the state-level report
        return stateReport

    else:
        # Fail
        print(' ', STATE_ABBR,
              ': ERROR : Web download failed - HTTP status code ',
              response.status_code)
Ejemplo n.º 7
0
def scraper():
    # make an HTTP web request to get the GA ZIP file
    response = requests.get(URL)

    if response.status_code == requests.codes.ok:
        # Success - print to the console that the HTTP request succeeeded
        print(' ', STATE_ABBR, ': ZIP downloaded succeeded')

        # read ZIP into memory
        z = zipfile.ZipFile(io.BytesIO(response.content))

        # extract the CSV file from the ZIP file into an in-memory byte array
        csvDataBytes = z.read('countycases.csv')

        # convert the byte array into a string so we can read it as a CSV file
        csvData = csvDataBytes.decode(encoding='UTF-8')

        # read the in-memory string using the 'csv' module so we can iterate over each row
        csvReader = csv.reader(csvData.splitlines(),
                               delimiter=',',
                               quotechar='"')

        # create a list that will contain our county data
        counties = []

        # iterate over every row in the CSV
        for row in csvReader:
            # skip the header row
            if row[0] == 'county_resident':
                continue

            # take the row we're iterating over and build a countyReport object out of it - this has the confirmed cases, deaths, etc that we're interested in
            county = county_report.CountyReport(STATE, row[0], (int)(row[1]),
                                                (int)(row[2]), (int)(row[3]),
                                                (float)(row[4]),
                                                datetime.datetime.now())
            counties.append(
                county)  # append the countyReport to our list of counties

        # print the number of counties we processed
        print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

        # build the state-level report object that will include all of the counties
        stateReport = state_report.StateReport(STATE, STATE_ABBR, counties,
                                               datetime.datetime.now())

        # return the state-level report
        return stateReport

    else:
        # Fail
        print(' ', STATE_ABBR,
              ': ERROR : ZIP download failed - HTTP status code ',
              response.status_code)
Ejemplo n.º 8
0
def scraper():
    counties = []

    # You will need a WebDriver for Edge. See https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/

    browser = webdriver.Edge("msedgedriver.exe")
    browser.get(URL)

    counties_link = WebDriverWait(browser, 20).until(
        EC.presence_of_element_located(
            (By.XPATH, '//*[@id="togConfirmedCasesDeathsTbl"]')))
    counties_link.click()

    counties_table = WebDriverWait(browser, 20).until(
        EC.presence_of_element_located((
            By.XPATH,
            '/html/body/form/div[3]/div/div/div[4]/div/div/div/div[2]/main/div/div[6]/div/div/div[3]/div/div/div/div/div/div/div/div[2]/div/div/table'
        )))

    time.sleep(2)

    htmlRows = counties_table.find_elements_by_xpath(".//tbody/tr")
    rows = get_row_data(htmlRows)

    for row in rows:
        county_name = row[0]

        if county_name == 'Total' or county_name == 'Unassigned' or len(
                row) < 4:
            continue

        confirmed = int(row[1].replace(',', ''))
        hospitalizations = int(row[2].replace(',', ''))
        deaths = int(row[3].replace(',', ''))
        county = county_report.CountyReport(STATE, county_name, confirmed,
                                            deaths, hospitalizations, -1,
                                            datetime.datetime.now())
        counties.append(county)

    browser.quit()

    # print the number of counties we processed
    print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

    # build the state-level report object that will include all of the counties
    stateReport = state_report.StateReport(STATE, STATE_ABBR, counties,
                                           datetime.datetime.now())

    # return the state-level report
    return stateReport
Ejemplo n.º 9
0
def scraper():
    # make an HTTP web request to get the AK Json
    response = requests.get(URL)

    if response.status_code == requests.codes.ok:
        # Success - print to the console that the HTTP request succeeeded
        print(' ', STATE_ABBR, ': Downloaded succeeded')

        jsonPayload = json.loads(response.text)
        features = jsonPayload['features']

        counties = []

        for feature in features:
            attribute = feature['attributes']

            county_name = attribute['Borough_Census_Area']
            confirmed = int(attribute['All_Cases'])
            hospitalizations = int(attribute['Hospitalizations'])
            deaths = int(attribute['Deaths'])

            county = findCounty(county_name, counties)

            if county == None:
                county = county_report.CountyReport(STATE, county_name,
                                                    (int)(confirmed),
                                                    (int)(deaths), -1, -1,
                                                    datetime.datetime.now())
                counties.append(county)
            else:
                county.confirmed += confirmed
                county.hospitalizations += hospitalizations
                county.deaths += deaths

        # print the number of counties we processed
        print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

        # build the state-level report object that will include all of the counties
        stateReport = state_report.StateReport(STATE, STATE_ABBR, counties,
                                               datetime.datetime.now())

        # return the state-level report
        return stateReport

    else:
        # Fail
        print(' ', STATE_ABBR,
              ': ERROR : Web download failed - HTTP status code ',
              response.status_code)
Ejemplo n.º 10
0
def scraper():
    # make an HTTP web request to get the MI XLSX file
    response = requests.get(URL)

    if response.status_code == requests.codes.ok:
        # Success - print to the console that the HTTP request succeeeded
        print(' ', STATE_ABBR, ': Downloaded succeeded')

        data = io.BytesIO(response.content)

        wb = openpyxl.load_workbook(filename=data,
                                    read_only=True,
                                    data_only=True)

        sheet = wb.worksheets[0]

        counties = []

        for i in range(2, 169):
            rowCount = str(i)
            status = sheet['B' + rowCount].value
            if status == 'Confirmed':
                county = sheet['A' + rowCount].value
                confirmed = sheet['C' + rowCount].value
                deaths = sheet['D' + rowCount].value

                county = county_report.CountyReport(STATE, county,
                                                    (int)(confirmed),
                                                    (int)(deaths), -1, -1,
                                                    datetime.datetime.now())
                counties.append(
                    county)  # append the countyReport to our list of counties

        # print the number of counties we processed
        print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

        # build the state-level report object that will include all of the counties
        stateReport = state_report.StateReport(STATE, STATE_ABBR, counties,
                                               datetime.datetime.now())

        # return the state-level report
        return stateReport

    else:
        # Fail
        print(' ', STATE_ABBR,
              ': ERROR : Web download failed - HTTP status code ',
              response.status_code)
Ejemplo n.º 11
0
def scraper():
    # make an HTTP web request to get the CO Json
    response = requests.get(URL)

    if response.status_code == requests.codes.ok:
        # Success - print to the console that the HTTP request succeeeded
        print(' ', STATE_ABBR, ': Downloaded succeeded')

        jsonPayload = json.loads(response.text)
        features = jsonPayload['features']
        
        counties = []
        
        for feature in features:
            attribute = feature['attributes']
            
            county_name = attribute['LABEL']

            county = findCounty(county_name, counties)

            if county == None:
                county = county_report.CountyReport(STATE, county_name, 0, 0, -1, -1, datetime.datetime.now())
                counties.append(county)

            metric = attribute['Metric']
            
            if metric == 'Cases':
                confirmed = int(attribute['Value'])
                county.confirmed = confirmed
            
            if metric == 'Deaths':
                deaths = int(attribute['Value'])
                county.deaths = deaths

        # print the number of counties we processed
        print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

        # build the state-level report object that will include all of the counties
        stateReport = state_report.StateReport(STATE, STATE_ABBR, counties, datetime.datetime.now())
        
        # return the state-level report
        return stateReport
        

    else:
        # Fail
        print(' ', STATE_ABBR, ': ERROR : Web download failed - HTTP status code ', response.status_code)
Ejemplo n.º 12
0
def scraper():
    # make an HTTP web request to get the CT Json file
    response = requests.get(URL)

    if response.status_code == requests.codes.ok:
        # Success - print to the console that the HTTP request succeeeded
        print(' ', STATE_ABBR, ': Downloaded succeeded')

        jsonPayload = json.loads(response.text)
        
        counties = []
        
        for item in jsonPayload:
            county_name = item['county']
            
            confirmed = 0
            if 'confirmedcases' in item:
                confirmed = int(item['confirmedcases'])

            hospitalizations = 0
            if 'hospitalization' in item:
                hospitalizations = int(item['hospitalization'])

            deaths = 0
            if 'confirmeddeaths' in item:
                deaths = int(item['confirmeddeaths'])

            county = findCounty(county_name, counties)

            if county == None:
                county = county_report.CountyReport(STATE, county_name, (int)(confirmed), (int)(deaths), -1, -1, datetime.datetime.now())
                counties.append(county)

        # print the number of counties we processed
        print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

        # build the state-level report object that will include all of the counties
        stateReport = state_report.StateReport(STATE, STATE_ABBR, counties, datetime.datetime.now())
        
        # return the state-level report
        return stateReport
        

    else:
        # Fail
        print(' ', STATE_ABBR, ': ERROR : Web download failed - HTTP status code ', response.status_code)
Ejemplo n.º 13
0
def scraper():
    # make an HTTP web request to get the data
    response = requests.get(URL)

    if response.status_code == requests.codes.ok:
        # Success - print to the console that the HTTP request succeeeded
        print(' ', STATE_ABBR, ': Downloaded succeeded')

        csvData = response.text

        # read the in-memory string using the 'csv' module so we can iterate over each row
        csvReader = csv.reader(csvData.splitlines(), delimiter=',', quotechar='"')
        
        # create a list that will contain our county data
        counties = []

        # iterate over every row in the CSV
        for row in csvReader:
            # skip the header row
            if row[0] == 'BOROUGH_GROUP':
                continue
            
            county_name = row[0]
            confirmed = int(row[4])
            deaths = int(row[6])
            hospitalizations = int(row[5])

            county = county_report.CountyReport(STATE, county_name, confirmed, deaths, hospitalizations, -1, datetime.datetime.now())
            counties.append(county)
                
        # print the number of counties we processed
        print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

        # build the state-level report object that will include all of the counties
        stateReport = state_report.StateReport(STATE, STATE_ABBR, counties, datetime.datetime.now())
        
        # return the state-level report
        return stateReport

    else:
        # Fail
        print(' ', STATE_ABBR, ': ERROR : Download failed - HTTP status code ', response.status_code)
Ejemplo n.º 14
0
def scraper():
    # make an HTTP web request to get the data
    response = requests.get(URL)

    if response.status_code == requests.codes.ok:
        # Success - print to the console that the HTTP request succeeeded
        print(' ', STATE_ABBR, ': Downloaded succeeded')

        jsonPayload = json.loads(response.text)
        features = jsonPayload['features']

        counties = []

        for feature in features:
            attribute = feature['attributes']

            county_name = attribute['NAME']
            confirmed = int(attribute['CASES'])
            deaths = int(attribute['DEATHS'])

            county = county_report.CountyReport(STATE, county_name, confirmed,
                                                deaths, -1, -1,
                                                datetime.datetime.now())
            counties.append(county)

        # print the number of counties we processed
        print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

        # build the state-level report object that will include all of the counties
        stateReport = state_report.StateReport(STATE, STATE_ABBR, counties,
                                               datetime.datetime.now())

        # return the state-level report
        return stateReport

    else:
        # Fail
        print(' ', STATE_ABBR,
              ': ERROR : Web download failed - HTTP status code ',
              response.status_code)
Ejemplo n.º 15
0
def scraper():
    counties = []

    # You will need a WebDriver for Edge. See https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/

    browser = webdriver.Edge("msedgedriver.exe")
    browser.get(URL)

    try:
        counties_link = browser.find_element_by_id('open-counties-table-modal')
        counties_link.click()

        rootCountyDiv = browser.find_elements_by_class_name('counties-table')
        htmlRows = rootCountyDiv[0].find_elements_by_xpath(".//tbody/tr")

        rows = get_row_data(htmlRows)

        for row in rows:
            county_name = row[0]
            confirmed = int(row[1])
            deaths = int(row[2])
            county = county_report.CountyReport(STATE, county_name, confirmed,
                                                deaths, -1, -1,
                                                datetime.datetime.now())
            counties.append(county)

    except:
        print("Unexpected error:", sys.exc_info()[0])

    browser.quit()

    # print the number of counties we processed
    print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

    # build the state-level report object that will include all of the counties
    stateReport = state_report.StateReport(STATE, STATE_ABBR, counties,
                                           datetime.datetime.now())

    # return the state-level report
    return stateReport
Ejemplo n.º 16
0
def scraper():
    # make an HTTP web request to get the AR data
    response = requests.get(URL)

    if response.status_code == requests.codes.ok:
        # Success - print to the console that the HTTP request succeeeded
        print(' ', STATE_ABBR, ': Downloaded succeeded')

        table = bs4.BeautifulSoup(response.text,
                                  features="html.parser").select('table tr')

        counties = []

        for i in range(1, 75):
            row = table[i].find_all('td')
            county_name = row[0].find('p').getText()
            confirmed = int(row[1].find('p').getText())
            deaths = int(row[3].find('p').getText())

            county = county_report.CountyReport(STATE, county_name, confirmed,
                                                deaths, -1, -1,
                                                datetime.datetime.now())
            counties.append(county)

        # print the number of counties we processed
        print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

        # build the state-level report object that will include all of the counties
        stateReport = state_report.StateReport(STATE, STATE_ABBR, counties,
                                               datetime.datetime.now())

        # return the state-level report
        return stateReport

    else:
        # Fail
        print(' ', STATE_ABBR,
              ': ERROR : Web download failed - HTTP status code ',
              response.status_code)
Ejemplo n.º 17
0
def scraper():
    counties = []

    # You will need a WebDriver for Edge. See https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/

    try:

        browser = webdriver.Edge("msedgedriver.exe")
        browser.get(URL)

        file_path = pathlib.Path.home().joinpath('Downloads', FILE_NAME)

        if os.path.isfile(file_path):
            print(
                "  FAILED on ", STATE, " : Please delete ", file_path,
                " and start the process over. This file must not exist prior to running the scrape operation."
            )

        download_link = WebDriverWait(browser, 30).until(
            EC.presence_of_element_located(
                (By.XPATH,
                 '/html/body/div[2]/div[2]/div/div[1]/div/div[1]/a[2]')))
        download_link.click()

        time.sleep(4)

        wb = openpyxl.load_workbook(filename=file_path)

        sheet = wb.worksheets[0]

        counties = []

        max_rows = sheet.max_row

        for i in range(2, max_rows):
            rowCount = str(i)
            #     print(rowCount)
            county_name = sheet['A' + rowCount].value

            if county_name == None or len(county_name) == 0:
                continue

            confirmed = sheet['B' + rowCount].value
            deaths = sheet['D' + rowCount].value

            county = county_report.CountyReport(STATE, county_name,
                                                (int)(confirmed),
                                                (int)(deaths), -1, -1,
                                                datetime.datetime.now())
            counties.append(
                county)  # append the countyReport to our list of counties

        wb.close()

    except:
        print("Unexpected error:", sys.exc_info()[0])

    browser.quit()

    os.remove(file_path)

    # print the number of counties we processed
    print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

    # build the state-level report object that will include all of the counties
    stateReport = state_report.StateReport(STATE, STATE_ABBR, counties,
                                           datetime.datetime.now())

    # return the state-level report
    return stateReport
Ejemplo n.º 18
0
def scraper():
    # make an HTTP web request to get the MI XLSX file
    response = requests.get(URL)

    if response.status_code == requests.codes.ok:
        # Success - print to the console that the HTTP request succeeeded
        print(' ', STATE_ABBR, ': Downloaded succeeded')

        temppath = 'temp' 
        if not os.path.exists(temppath):
            os.makedirs(temppath)
        
        tempfilename = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S") + '_temp_' + STATE_ABBR + '.xlsx'
        tempfilepath = pathlib.Path.cwd().joinpath('temp', tempfilename)

        with open(tempfilepath, "wb") as file:
            file.write(response.content)

        wb = openpyxl.load_workbook(filename=tempfilepath)
        
        sheet = wb.worksheets[0]
        max_rows = sheet.max_row

        counties = []
        countyDictionary = {}

        i = max_rows

        while i > 2:
            rowCount = str(i)
            county_name = sheet['B' + rowCount].value

            county = findCounty(county_name, countyDictionary)

            if county == None:
                
                confirmed = int(sheet['E' + rowCount].value)
                deaths = int(sheet['P' + rowCount].value)
            
                county = county_report.CountyReport(STATE, county_name, confirmed, deaths, -1, -1, datetime.datetime.now())
                counties.append(county) # append the countyReport to our list of counties
                countyDictionary[county_name] = county

            i = i - 1

        # since the above algorithm outputs the counties in reverse-ABC order, let's reverse that so they're in ABC order...
        counties = list(reversed(counties))

        # print the number of counties we processed
        print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

        # build the state-level report object that will include all of the counties
        stateReport = state_report.StateReport(STATE, STATE_ABBR, counties, datetime.datetime.now())
        
        # return the state-level report
        return stateReport
        

    else:
        # Fail
        print(' ', STATE_ABBR, ': ERROR : Web download failed - HTTP status code ', response.status_code)
Ejemplo n.º 19
0
def scraper():
    counties = []

    # You will need a WebDriver for Edge. See https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/

    try:

        browser = webdriver.Edge("msedgedriver.exe")
        browser.get(URL)

        time.sleep(4)

        county_link = WebDriverWait(browser, 30).until(
            EC.presence_of_element_located((
                By.XPATH,
                '/html/body/div[1]/div[3]/div/article/div/div/div/ul[1]/li[1]/a'
            )))
        county_link.click()

        time.sleep(4)

        all_link = WebDriverWait(browser, 30).until(
            EC.presence_of_element_located(
                (By.XPATH, '//*[@id="pagin"]/li[12]/a')))
        all_link.click()

        time.sleep(2)

        county_table = WebDriverWait(browser, 30).until(
            EC.presence_of_element_located(
                (By.XPATH,
                 '/html/body/div[1]/div[3]/div/article/div/div/div/table/tbody'
                 )))

        time.sleep(2)
        # print(county_table)

        htmlRows = county_table.find_elements_by_xpath(".//tr")

        # print(htmlRows)
        rows = get_row_data(htmlRows)
        # print(rows)

        for row in rows:
            # print(row)
            county_name = row[0]

            if county_name == 'Illinois':
                continue

            confirmed = int(row[2])
            deaths = int(row[3])
            county = county_report.CountyReport(STATE, county_name, confirmed,
                                                deaths, -1, -1,
                                                datetime.datetime.now())
            counties.append(county)

    except:
        print("Unexpected error:", sys.exc_info()[0])

    browser.quit()

    # print the number of counties we processed
    print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

    # build the state-level report object that will include all of the counties
    stateReport = state_report.StateReport(STATE, STATE_ABBR, counties,
                                           datetime.datetime.now())

    # return the state-level report
    return stateReport
Ejemplo n.º 20
0
def scraper():
    # make an HTTP web request to get the source information
    response = requests.get(URL)

    if response.status_code == requests.codes.ok:
        # Success - print to the console that the HTTP request succeeeded
        print(' ', STATE_ABBR, ': Downloaded succeeded')

        soup = bs4.BeautifulSoup(response.text, features="html.parser")

        table = soup.find_all("table", attrs={"summary": "Cases by County"})

        counties = []

        for item in table[0].find_all('tr'):

            row = item.find_all('td')

            if len(row) == 0:
                continue

            county_name = row[0].text

            if county_name == 'Total':
                continue

            casesStr = row[1].text
            deathsStr = row[2].text

            if len(
                    casesStr
            ) == 0 or casesStr == '' or casesStr == '\xa0' or casesStr == '\xa0\n\t':
                casesStr = '0'

            if len(
                    deathsStr
            ) == 0 or casesStr == '' or deathsStr == '\xa0' or deathsStr == '\xa0\n\t':
                deathsStr = '0'

            confirmed = int(casesStr)
            deaths = int(deathsStr)

            county = county_report.CountyReport(STATE, county_name, confirmed,
                                                deaths, -1, -1,
                                                datetime.datetime.now())
            counties.append(county)

        # print the number of counties we processed
        print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

        # build the state-level report object that will include all of the counties
        stateReport = state_report.StateReport(STATE, STATE_ABBR, counties,
                                               datetime.datetime.now())

        # return the state-level report
        return stateReport

    else:
        # Fail
        print(' ', STATE_ABBR,
              ': ERROR : Web download failed - HTTP status code ',
              response.status_code)
Ejemplo n.º 21
0
def scraper():
    # make an HTTP web request to get the file
    response = requests.get(URL)

    if response.status_code == requests.codes.ok:
        # Success - print to the console that the HTTP request succeeeded
        print(' ', STATE_ABBR, ': Downloaded succeeded')

        # Writing the XLSX to disk makes the loop below orders of magnitude faster
        # versus keeping the XLSX doc in-memory, so we create a temp folder and download
        # the file there.
        temppath = 'temp'
        if not os.path.exists(temppath):
            os.makedirs(temppath)

        tempfilename = datetime.datetime.now().strftime(
            "%Y-%m-%d_%H%M%S") + '_temp_' + STATE_ABBR + '.xlsx'
        tempfilepath = pathlib.Path.cwd().joinpath('temp', tempfilename)

        with open(tempfilepath, "wb") as file:
            file.write(response.content)

        wb = openpyxl.load_workbook(filename=tempfilepath)

        sheet = wb.worksheets[0]

        parishes = []
        parishesDictionary = {}

        max_rows = sheet.max_row

        for i in range(2, max_rows):
            rowCount = str(i)
            # print(rowCount)
            parish_name = sheet['B' + rowCount].value
            confirmed = sheet['F' + rowCount].value

            parish = findParish(parish_name, parishesDictionary)

            if parish == None:
                parish = county_report.CountyReport(STATE, parish_name,
                                                    (int)(confirmed), -1,
                                                    -1, -1,
                                                    datetime.datetime.now())
                parishes.append(parish)
                parishesDictionary[parish_name] = parish
            else:
                parish.confirmed += int(confirmed)

        # print the number of counties we processed
        print(' ', STATE_ABBR, ':', len(parishes), ' counties processed OK')

        # build the state-level report object that will include all of the counties
        stateReport = state_report.StateReport(STATE, STATE_ABBR, parishes,
                                               datetime.datetime.now())

        # return the state-level report
        return stateReport

    else:
        # Fail
        print(' ', STATE_ABBR,
              ': ERROR : Web download failed - HTTP status code ',
              response.status_code)
Ejemplo n.º 22
0
def scraper():
    counties = []

    # You will need a WebDriver for Edge. See https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/

    browser = webdriver.Edge("msedgedriver.exe")
    browser.get(URL)

    file_path = pathlib.Path.home().joinpath(
        'Downloads', 'Testing and Outcomes by County.csv')

    if os.path.isfile(file_path):
        print(
            "  FAILED on ", STATE, " : Please delete ", file_path,
            " and start the process over. This file must not exist prior to running the scrape operation."
        )

    download_link = browser.find_element_by_xpath(
        '/html/body/div[2]/div[3]/div[2]/div[1]/div[2]/div[5]')
    download_link.click()

    crosstab_link = WebDriverWait(browser, 30).until(
        EC.presence_of_element_located(
            (By.XPATH,
             '/html/body/div[6]/div/div/div/div/div[2]/div/button[3]')))
    crosstab_link.click()

    counties_link = WebDriverWait(browser, 30).until(
        EC.presence_of_element_located((
            By.XPATH,
            '/html/body/div[7]/div/div/div/div/div[2]/div/div[1]/div[2]/div/div/div[2]/div/div/div'
        )))
    counties_link.click()

    csv_link = WebDriverWait(browser, 30).until(
        EC.presence_of_element_located((
            By.XPATH,
            '/html/body/div[7]/div/div/div/div/div[2]/div/div[2]/div[2]/div/label[2]'
        )))
    csv_link.click()

    download_button = WebDriverWait(browser, 30).until(
        EC.presence_of_element_located(
            (By.XPATH,
             '/html/body/div[7]/div/div/div/div/div[2]/div/div[3]/button')))
    download_button.click()
    time.sleep(2)

    with open(file_path, 'rt', encoding='utf-16-le') as file_contents:
        data = file_contents.read()
        infile = StringIO(data)

        with open(file_path) as csv_file:
            csv_reader = csv.reader(data.splitlines(),
                                    delimiter='\t',
                                    quotechar='"')

            for row in csv_reader:
                # print(row)
                county_name = row[0]
                if county_name == 'County' or county_name == 'All' or row[
                        1] == 'Cases per 100,000':
                    continue

                confirmed = row[2].replace(',', '')
                if len(confirmed) == 0 or confirmed == '':
                    confirmed = '0'

                deaths = row[3].replace(',', '')
                if len(deaths) == 0 or deaths == '':
                    deaths = '0'

                county = county_report.CountyReport(STATE, county_name,
                                                    (int)(confirmed),
                                                    (int)(deaths), -1, -1,
                                                    datetime.datetime.now())
                counties.append(county)

    browser.quit()

    os.remove(file_path)

    # print the number of counties we processed
    print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

    # build the state-level report object that will include all of the counties
    stateReport = state_report.StateReport(STATE, STATE_ABBR, counties,
                                           datetime.datetime.now())

    # return the state-level report
    return stateReport
Ejemplo n.º 23
0
def scraper():
    counties = []

    # You will need a WebDriver for Edge. See https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/

    try:

        browser = webdriver.Edge("msedgedriver.exe")
        browser.get(URL)

        time.sleep(1)

        show_table_link = WebDriverWait(browser, 30).until(
            EC.presence_of_element_located((
                By.XPATH,
                '/html/body/div[1]/div[1]/div[3]/div[3]/div/div/div[3]/div[2]/div/div[1]/div[2]/div[1]/div[1]/div/div[2]/span'
            )))
        show_table_link.click()

        time.sleep(1)

        county_div = WebDriverWait(browser, 30).until(
            EC.presence_of_element_located((
                By.XPATH,
                '/html/body/div[1]/div[1]/div[3]/div[5]/div[4]/div[1]/div/div/div/div[1]/div/div/div[2]/div/div[2]/div'
            )))

        county_div_rows = county_div.find_elements_by_xpath(
            './/div[@role="row"]')

        # SC puts its county level data into lots of <div> elements, with one <div> per county. Each <div> has its own single-row <table> that contains the county data. Thus, we
        # have some extra stuff to do to make this work right.
        for div_row in county_div_rows:
            county_table = div_row.find_element_by_xpath('.//table')
            htmlRows = county_table.find_elements_by_xpath(".//tr")
            rows = get_row_data(htmlRows)

            for row in rows:
                county_name = row[0]

                if county_name == 'Unknown':
                    continue

                confirmed = int(row[3].replace(',', ''))
                deaths = int(row[4].replace(',', ''))
                county = county_report.CountyReport(STATE, county_name,
                                                    confirmed, deaths, -1, -1,
                                                    datetime.datetime.now())
                counties.append(county)

    except:
        print("Unexpected error:", sys.exc_info()[0])

    browser.quit()

    # print the number of counties we processed
    print(' ', STATE_ABBR, ':', len(counties), ' counties processed OK')

    # build the state-level report object that will include all of the counties
    stateReport = state_report.StateReport(STATE, STATE_ABBR, counties,
                                           datetime.datetime.now())

    # return the state-level report
    return stateReport