def _scrape(self, **kwargs): runner = WebdriverRunner() results = runner.run(WebdriverSteps().go_to_url( self.CASES_URL).wait_for_number_of_elements( (By.XPATH, '//canvas'), 58).find_request( 'cases', find_by=tableau.find_tableau_request ).clear_request_history().go_to_url( self.DEATHS_URL).wait_for_number_of_elements( (By.XPATH, '//canvas'), 29).find_request('deaths', find_by=tableau.find_tableau_request)) parser = tableau.TableauParser(request=results.requests['cases']) raw_date_str = pydash.head( parser.extract_data_from_key('cases')['ATTR(Date Updated)']) date = datetime.strptime(raw_date_str, '%A, %B %d, %Y').date() confirmed_cases = pydash.head( parser.extract_data_from_key('cases') ['SUM(# Lab Confirmed Cases)']) probable_cases = pydash.head( parser.extract_data_from_key('probable cases')['SUM(# probable)']) cases = confirmed_cases + probable_cases cases_df = pd.DataFrame.from_dict( parser.extract_data_from_key('raceth')).set_index('sub-category') aa_cases = cases_df.loc['Black']['SUM(count)'] known_race_cases = cases - cases_df.loc['unknown']['SUM(count)'] parser = tableau.TableauParser(request=results.requests['deaths']) deaths = pydash.head( parser.extract_data_from_key('death (2)') ['SUM(# lab confirmed deaths)']) deaths_df = pd.DataFrame.from_dict( parser.extract_data_from_key('raceth (death)')).set_index( 'sub-category') deaths_df = deaths_df.assign(Count=[ round(v * deaths) for v in deaths_df['SUM(% of deaths)'].values ]) aa_deaths = deaths_df.loc['Black']['Count'] known_race_deaths = deaths - deaths_df.loc['unknown']['Count'] pct_aa_cases = misc.to_percentage(aa_cases, known_race_cases) pct_aa_deaths = misc.to_percentage(aa_deaths, known_race_deaths) return [ self._make_series( date=date, cases=cases, deaths=deaths, aa_cases=aa_cases, aa_deaths=aa_deaths, pct_aa_cases=pct_aa_cases, pct_aa_deaths=pct_aa_deaths, pct_includes_unknown_race=False, pct_includes_hispanic_black=False, known_race_cases=known_race_cases, known_race_deaths=known_race_deaths, ) ]
def _scrape(self, **kwargs): runner = WebdriverRunner() results = runner.run(WebdriverSteps().go_to_url( self.CASES_URL).wait_for_number_of_elements( (By.XPATH, '//canvas'), 38).find_request( 'cases', find_by=tableau.find_tableau_request ).clear_request_history().go_to_url( self.DEATHS_URL).wait_for_number_of_elements( (By.XPATH, '//canvas'), 20).find_request('deaths', find_by=tableau.find_tableau_request)) parser = tableau.TableauParser(request=results.requests['cases']) date_str = pydash.head( parser.extract_data_from_key('Footer')['AGG(Today)']) date = datetime.strptime(date_str, '%m-%d-%y').date() cases = pydash.head( parser.extract_data_from_key('Total Cases')['AGG(Total Cases)']) deaths = pydash.head( parser.extract_data_from_key('Total Deaths') ['SUM(Count Of Deaths)']) cases_pct_df = pd.DataFrame.from_dict( parser.extract_data_from_key('Race Breakdown ')).set_index('Race') cases_df = cases_pct_df.assign(Count=[ round(v * cases) for v in cases_pct_df['CNTD(Caseid 1)'].values ]) aa_cases = cases_df.loc['Black']['Count'] known_race_cases = cases - cases_df.loc['Unknown']['Count'] parser = tableau.TableauParser(request=results.requests['deaths']) deaths_pct_df = pd.DataFrame.from_dict( parser.extract_data_from_key('Bar | Race')).set_index('Race') deaths_df = deaths_pct_df.assign(Count=[ round(v * deaths) for v in deaths_pct_df['SUM(Death Count)'].values ]) aa_deaths = deaths_df.loc['Black']['Count'] known_race_deaths = deaths - deaths_df.loc['Unknown']['Count'] pct_aa_cases = misc.to_percentage(aa_cases, known_race_cases) pct_aa_deaths = misc.to_percentage(aa_deaths, known_race_deaths) return [ self._make_series(date=date, cases=cases, deaths=deaths, aa_cases=aa_cases, aa_deaths=aa_deaths, pct_aa_cases=pct_aa_cases, pct_aa_deaths=pct_aa_deaths, pct_includes_unknown_race=False, pct_includes_hispanic_black=True, known_race_cases=known_race_cases, known_race_deaths=known_race_deaths) ]
def _scrape(self, **kwargs): runner = WebdriverRunner() results = runner.run(WebdriverSteps().go_to_url( self.HOME_PAGE_URL).get_page_source().go_to_url( self.DEMOGRAPHIC_CASES_URL).find_request( key='cases', find_by=tableau.find_tableau_request). clear_request_history().go_to_url( self.DEMOGRAPHIC_DEATHS_URL).find_request( 'deaths', find_by=tableau.find_tableau_request)) date_str_element = results.page_source.find( 'strong', string=re.compile('current')) assert date_str_element, 'No date element found' date_str = date_str_element.get_text() pattern = re.compile(r'(\d{1,2}\/\d{1,2}\/\d{4})') matches = pattern.search(date_str) assert matches, 'Date not found.' date = datetime.strptime(matches.group(), '%m/%d/%Y').date() parser = tableau.TableauParser(request=results.requests['cases']) cases_df = parser.get_dataframe_from_key('CaseRace').set_index( 'Measure Status') cases = cases_df.loc['Black or African American']['AGG(Calculation1)'] aa_cases = round( cases_df.loc['Black or African American']['SUM(Count)'] * cases) parser = tableau.TableauParser(request=results.requests['deaths']) deaths = parser.get_dataframe_from_key( 'Total Deaths (2)')['SUM(Deaths)'].sum() deaths_df = parser.get_dataframe_from_key('Race').set_index( 'Measure Status11') aa_deaths = round( deaths_df.loc['Black or African American']['SUM(Deaths)'] * deaths) pct_aa_cases = to_percentage(aa_cases, cases) pct_aa_deaths = to_percentage(aa_deaths, deaths) return [ self._make_series( date=date, cases=cases, deaths=deaths, aa_cases=aa_cases, aa_deaths=aa_deaths, pct_aa_cases=pct_aa_cases, pct_aa_deaths=pct_aa_deaths, pct_includes_unknown_race=False, pct_includes_hispanic_black=True, ) ]
def _scrape(self, **kwargs): runner = WebdriverRunner() results = runner.run(WebdriverSteps().go_to_url( self.CASES_URL).find_request('cases', find_by=tableau.find_tableau_request). clear_request_history().go_to_url( self.DEATHS_URL).find_request( 'deaths', find_by=tableau.find_tableau_request)) parser = tableau.TableauParser(request=results.requests['cases']) raw_date_str = pydash.head( parser.extract_data_from_key('cases')['ATTR(dateupdated)']) date = datetime.strptime(raw_date_str, '%m/%d/%Y').date() cases = pydash.head( parser.extract_data_from_key('cases') ['SUM(Laboratory Confirmed Cases)']) cases_df = pd.DataFrame.from_dict( parser.extract_data_from_key('raceth')).set_index('subcategory') aa_cases = cases_df.loc['Black']['SUM(count)'] known_race_cases = cases - cases_df.loc['Unknown']['SUM(count)'] parser = tableau.TableauParser(request=results.requests['deaths']) deaths = pydash.head( parser.extract_data_from_key('death (2)')['SUM(Deaths)']) deaths_df = pd.DataFrame.from_dict( parser.extract_data_from_key('raceth (death)')).set_index( 'subcategory') aa_deaths = deaths_df.loc['Black']['SUM(count)'] known_race_deaths = deaths - deaths_df.loc['Unknown']['SUM(count)'] pct_aa_cases = misc.to_percentage(aa_cases, known_race_cases) pct_aa_deaths = misc.to_percentage(aa_deaths, known_race_deaths) return [ self._make_series( date=date, cases=cases, deaths=deaths, aa_cases=aa_cases, aa_deaths=aa_deaths, pct_aa_cases=pct_aa_cases, pct_aa_deaths=pct_aa_deaths, pct_includes_unknown_race=False, pct_includes_hispanic_black=False, known_race_cases=known_race_cases, known_race_deaths=known_race_deaths, ) ]
def _scrape(self, **kwargs): runner = WebdriverRunner() results = runner.run(WebdriverSteps().go_to_url( self.SUMMARY_URL).find_request( 'summary', find_by=tableau.find_tableau_request ).clear_request_history().go_to_url(self.DEATHS_URL).find_request( 'deaths', find_by=tableau.find_tableau_request).clear_request_history(). go_to_url(self.NYS_RACE_DEATHS_URL).find_request( 'race_deaths', find_by=tableau.find_tableau_request)) parser = tableau.TableauParser(request=results.requests['summary']) date_info = parser.extract_data_from_key('DASHBOARD 1 DATE (2)') date_str = pydash.get(date_info, 'MAX(Last_Reported_Test_Formatted).0') date = datetime.strptime(date_str, '%m/%d/%Y').date() cases_df = self.get_cases_df( parser.extract_data_from_key('TABLE VIEW (LARGE)')) cases = cases_df.loc['%all%']['Measure Values'] parser = tableau.TableauParser(request=results.requests['deaths']) deaths_df = self.get_deaths_df( parser.extract_data_from_key('Fatalaties by County')) deaths = deaths_df.loc['%all%']['Measure Values'] parser = tableau.TableauParser(request=results.requests['race_deaths']) nys_race_deaths_df = self.get_nys_race_deaths_df( parser.extract_data_from_key('Race/Ethnicity Table (2)')) nyc_race_deaths_df = pd.read_csv( self.NYC_RACE_DEATHS_URL).set_index('RACE_GROUP') aa_deaths = ( nys_race_deaths_df.loc['Black']['Measure Values'] + nyc_race_deaths_df.loc['Black/African-American']['DEATH_COUNT']) known_race_deaths = nys_race_deaths_df.loc['Total'][ 'Measure Values'] + nyc_race_deaths_df['DEATH_COUNT'].sum() pct_aa_deaths = misc.to_percentage(aa_deaths, known_race_deaths) return [ self._make_series(date=date, cases=cases, deaths=deaths, aa_deaths=aa_deaths, pct_aa_deaths=pct_aa_deaths, pct_includes_unknown_race=False, pct_includes_hispanic_black=False, known_race_deaths=known_race_deaths) ]
def _scrape(self, **kwargs): runner = WebdriverRunner() results = runner.run( WebdriverSteps() .go_to_url(self.URL) .wait_for_number_of_elements((By.XPATH, '//canvas'), 12) .find_request('summary', find_by=tableau.find_tableau_request)) parser = tableau.TableauParser(request=results.requests['summary']) date_str = parser.get_dataframe_from_key('Date Stamp').loc[0]['Date Stamp'] match = re.search(r'\d{1,2}\/\d{1,2}\/\d{4}', date_str) date = datetime.strptime(match.group(), '%m/%d/%Y').date() cases_df = parser.get_dataframe_from_key('Demographic Data - Hospitalizaton Status') cases_df = cases_df.set_index(['Status Value', 'Demographic', 'Categories']).sort_index().loc[('%all%', 'Race')] cases_df['SUM(Count)'] = cases_df['SUM(Count)'].apply(raw_string_to_int) cases = cases_df.loc['All', 'SUM(Count)'] aa_cases = cases_df.loc['Black', 'SUM(Count)'] known_race_cases = cases - cases_df.loc['Refused/Unknown', 'SUM(Count)'] deaths_df = parser.get_dataframe_from_key('Demographic Data - Survival Outcomes') deaths_df = deaths_df.set_index(['Status Value', 'Demographic', 'Categories']).sort_index().loc[('Died', 'Race')] deaths_df['SUM(Count)'] = deaths_df['SUM(Count)'].apply(raw_string_to_int) deaths = deaths_df.loc['All', 'SUM(Count)'] aa_deaths = deaths_df.loc['Black', 'SUM(Count)'] known_race_deaths = deaths - deaths_df.loc['Refused/Unknown', 'SUM(Count)'] pct_aa_cases = to_percentage(aa_cases, known_race_cases) pct_aa_deaths = to_percentage(aa_deaths, known_race_deaths) return [self._make_series( date=date, cases=cases, deaths=deaths, aa_cases=aa_cases, aa_deaths=aa_deaths, pct_aa_cases=pct_aa_cases, pct_aa_deaths=pct_aa_deaths, pct_includes_unknown_race=False, pct_includes_hispanic_black=True, known_race_cases=known_race_cases, known_race_deaths=known_race_deaths )]
def _scrape(self, **kwargs): _, summary_df = arcgis.query_geoservice(**self.SUMMARY_QUERY) cases = summary_df.loc[0, 'Cases'] deaths = summary_df.loc[0, 'Deaths'] runner = WebdriverRunner() results = runner.run(WebdriverSteps().go_to_url( self.RACE_URL).wait_for_number_of_elements( (By.XPATH, '//canvas'), 14).find_request( 'race_cases', find_by=tableau.find_tableau_request).go_to_url( self.MAIN_PAGE_URL).get_page_source()) soup = results.page_source date = self.get_date(soup) parser = tableau.TableauParser(request=results.requests['race_cases']) cases_df = parser.get_dataframe_from_key('Census') cases_df = cases_df[cases_df['Measure Names'] == 'Case %'].set_index( 'Race') aa_cases = cases_df.loc['Black', 'SUM(Case Count)'] known_race_cases = cases_df['SUM(Case Count)'].sum() pct_aa_cases = misc.to_percentage(aa_cases, known_race_cases) return [ self._make_series( date=date, cases=cases, deaths=deaths, aa_cases=aa_cases, pct_aa_cases=pct_aa_cases, pct_includes_unknown_race=False, pct_includes_hispanic_black=True, known_race_cases=known_race_cases, ) ]
def _scrape(self, **kwargs): runner = WebdriverRunner() results = runner.run(WebdriverSteps().go_to_url( self.HOME_URL).get_page_source()) date = self.parse_date(results.page_source) results = runner.run(WebdriverSteps().go_to_url( self.URL ).wait_for_number_of_elements((By.XPATH, '//canvas'), 32).find_request( 'cases', tableau.find_tableau_request ).clear_request_history().find_element_by_xpath( '//*[@id="tabZoneId4"]/div/div/div/span[2]/div/span/span/span[2]/div[2]/div' ).click_on_last_element_found().wait_for_number_of_elements( (By.XPATH, "//span[contains(text(), 'Deaths')]"), 6).find_request( 'deaths', find_by=lambda r: 'set-active-story-point' in r.path)) parser = tableau.TableauParser(request=results.requests['cases']) cases = pydash.head( parser.extract_data_from_key('Cases')['SUM(Number of Records)']) cases_pct_df = pd.DataFrame.from_dict( parser.extract_data_from_key('Race_Cases')).set_index( 'Assigned Race') cases_df = cases_pct_df.assign(Count=[ round(v * cases) for v in cases_pct_df['SUM(Number of Records)'].values ]) aa_cases = cases_df.loc['Black']['Count'] known_race_cases = cases - cases_df.loc['Unknown']['Count'] parser = tableau.TableauParser(request=results.requests['deaths']) deaths = pydash.head( parser.extract_data_from_key('NumberDeaths') ['SUM(Number of Records)']) deaths_pct_df = pd.DataFrame.from_dict( parser.extract_data_from_key('Race_Deaths')).set_index( 'Assigned Race') deaths_df = deaths_pct_df.assign(Count=[ round(v * deaths) for v in deaths_pct_df['SUM(Number of Records)'].values ]) aa_deaths = deaths_df.loc['Black']['Count'] known_race_deaths = deaths - deaths_df.loc['Unknown']['Count'] pct_aa_cases = misc.to_percentage(aa_cases, known_race_cases) pct_aa_deaths = misc.to_percentage(aa_deaths, known_race_deaths) return [ self._make_series(date=date, cases=cases, deaths=deaths, aa_cases=aa_cases, aa_deaths=aa_deaths, pct_aa_cases=pct_aa_cases, pct_aa_deaths=pct_aa_deaths, pct_includes_unknown_race=False, pct_includes_hispanic_black=True, known_race_cases=known_race_cases, known_race_deaths=known_race_deaths) ]