예제 #1
0
    def _scrape(self, **kwargs):
        runner = WebdriverRunner()
        results = runner.run(WebdriverSteps().go_to_url(
            self.CASES_URL).wait_for_number_of_elements(
                (By.XPATH, '//canvas'), 58).find_request(
                    'cases', find_by=tableau.find_tableau_request
                ).clear_request_history().go_to_url(
                    self.DEATHS_URL).wait_for_number_of_elements(
                        (By.XPATH, '//canvas'),
                        29).find_request('deaths',
                                         find_by=tableau.find_tableau_request))

        parser = tableau.TableauParser(request=results.requests['cases'])

        raw_date_str = pydash.head(
            parser.extract_data_from_key('cases')['ATTR(Date Updated)'])
        date = datetime.strptime(raw_date_str, '%A, %B %d, %Y').date()

        confirmed_cases = pydash.head(
            parser.extract_data_from_key('cases')
            ['SUM(# Lab Confirmed Cases)'])
        probable_cases = pydash.head(
            parser.extract_data_from_key('probable cases')['SUM(# probable)'])
        cases = confirmed_cases + probable_cases
        cases_df = pd.DataFrame.from_dict(
            parser.extract_data_from_key('raceth')).set_index('sub-category')
        aa_cases = cases_df.loc['Black']['SUM(count)']
        known_race_cases = cases - cases_df.loc['unknown']['SUM(count)']

        parser = tableau.TableauParser(request=results.requests['deaths'])
        deaths = pydash.head(
            parser.extract_data_from_key('death (2)')
            ['SUM(# lab confirmed deaths)'])
        deaths_df = pd.DataFrame.from_dict(
            parser.extract_data_from_key('raceth (death)')).set_index(
                'sub-category')
        deaths_df = deaths_df.assign(Count=[
            round(v * deaths) for v in deaths_df['SUM(% of deaths)'].values
        ])
        aa_deaths = deaths_df.loc['Black']['Count']
        known_race_deaths = deaths - deaths_df.loc['unknown']['Count']

        pct_aa_cases = misc.to_percentage(aa_cases, known_race_cases)
        pct_aa_deaths = misc.to_percentage(aa_deaths, known_race_deaths)

        return [
            self._make_series(
                date=date,
                cases=cases,
                deaths=deaths,
                aa_cases=aa_cases,
                aa_deaths=aa_deaths,
                pct_aa_cases=pct_aa_cases,
                pct_aa_deaths=pct_aa_deaths,
                pct_includes_unknown_race=False,
                pct_includes_hispanic_black=False,
                known_race_cases=known_race_cases,
                known_race_deaths=known_race_deaths,
            )
        ]
    def _scrape(self, **kwargs):
        runner = WebdriverRunner()
        results = runner.run(WebdriverSteps().go_to_url(
            self.CASES_URL).wait_for_number_of_elements(
                (By.XPATH, '//canvas'), 38).find_request(
                    'cases', find_by=tableau.find_tableau_request
                ).clear_request_history().go_to_url(
                    self.DEATHS_URL).wait_for_number_of_elements(
                        (By.XPATH, '//canvas'),
                        20).find_request('deaths',
                                         find_by=tableau.find_tableau_request))

        parser = tableau.TableauParser(request=results.requests['cases'])

        date_str = pydash.head(
            parser.extract_data_from_key('Footer')['AGG(Today)'])
        date = datetime.strptime(date_str, '%m-%d-%y').date()

        cases = pydash.head(
            parser.extract_data_from_key('Total Cases')['AGG(Total Cases)'])
        deaths = pydash.head(
            parser.extract_data_from_key('Total  Deaths')
            ['SUM(Count Of Deaths)'])
        cases_pct_df = pd.DataFrame.from_dict(
            parser.extract_data_from_key('Race Breakdown ')).set_index('Race')
        cases_df = cases_pct_df.assign(Count=[
            round(v * cases) for v in cases_pct_df['CNTD(Caseid 1)'].values
        ])
        aa_cases = cases_df.loc['Black']['Count']
        known_race_cases = cases - cases_df.loc['Unknown']['Count']

        parser = tableau.TableauParser(request=results.requests['deaths'])
        deaths_pct_df = pd.DataFrame.from_dict(
            parser.extract_data_from_key('Bar | Race')).set_index('Race')
        deaths_df = deaths_pct_df.assign(Count=[
            round(v * deaths) for v in deaths_pct_df['SUM(Death Count)'].values
        ])
        aa_deaths = deaths_df.loc['Black']['Count']
        known_race_deaths = deaths - deaths_df.loc['Unknown']['Count']

        pct_aa_cases = misc.to_percentage(aa_cases, known_race_cases)
        pct_aa_deaths = misc.to_percentage(aa_deaths, known_race_deaths)

        return [
            self._make_series(date=date,
                              cases=cases,
                              deaths=deaths,
                              aa_cases=aa_cases,
                              aa_deaths=aa_deaths,
                              pct_aa_cases=pct_aa_cases,
                              pct_aa_deaths=pct_aa_deaths,
                              pct_includes_unknown_race=False,
                              pct_includes_hispanic_black=True,
                              known_race_cases=known_race_cases,
                              known_race_deaths=known_race_deaths)
        ]
예제 #3
0
    def _scrape(self, **kwargs):
        runner = WebdriverRunner()
        results = runner.run(WebdriverSteps().go_to_url(
            self.HOME_PAGE_URL).get_page_source().go_to_url(
                self.DEMOGRAPHIC_CASES_URL).find_request(
                    key='cases', find_by=tableau.find_tableau_request).
                             clear_request_history().go_to_url(
                                 self.DEMOGRAPHIC_DEATHS_URL).find_request(
                                     'deaths',
                                     find_by=tableau.find_tableau_request))

        date_str_element = results.page_source.find(
            'strong', string=re.compile('current'))
        assert date_str_element, 'No date element found'
        date_str = date_str_element.get_text()
        pattern = re.compile(r'(\d{1,2}\/\d{1,2}\/\d{4})')
        matches = pattern.search(date_str)
        assert matches, 'Date not found.'
        date = datetime.strptime(matches.group(), '%m/%d/%Y').date()

        parser = tableau.TableauParser(request=results.requests['cases'])
        cases_df = parser.get_dataframe_from_key('CaseRace').set_index(
            'Measure Status')
        cases = cases_df.loc['Black or African American']['AGG(Calculation1)']
        aa_cases = round(
            cases_df.loc['Black or African American']['SUM(Count)'] * cases)

        parser = tableau.TableauParser(request=results.requests['deaths'])
        deaths = parser.get_dataframe_from_key(
            'Total Deaths (2)')['SUM(Deaths)'].sum()
        deaths_df = parser.get_dataframe_from_key('Race').set_index(
            'Measure Status11')
        aa_deaths = round(
            deaths_df.loc['Black or African American']['SUM(Deaths)'] * deaths)

        pct_aa_cases = to_percentage(aa_cases, cases)
        pct_aa_deaths = to_percentage(aa_deaths, deaths)

        return [
            self._make_series(
                date=date,
                cases=cases,
                deaths=deaths,
                aa_cases=aa_cases,
                aa_deaths=aa_deaths,
                pct_aa_cases=pct_aa_cases,
                pct_aa_deaths=pct_aa_deaths,
                pct_includes_unknown_race=False,
                pct_includes_hispanic_black=True,
            )
        ]
    def _scrape(self, **kwargs):
        runner = WebdriverRunner()
        results = runner.run(WebdriverSteps().go_to_url(
            self.CASES_URL).find_request('cases',
                                         find_by=tableau.find_tableau_request).
                             clear_request_history().go_to_url(
                                 self.DEATHS_URL).find_request(
                                     'deaths',
                                     find_by=tableau.find_tableau_request))

        parser = tableau.TableauParser(request=results.requests['cases'])
        raw_date_str = pydash.head(
            parser.extract_data_from_key('cases')['ATTR(dateupdated)'])
        date = datetime.strptime(raw_date_str, '%m/%d/%Y').date()

        cases = pydash.head(
            parser.extract_data_from_key('cases')
            ['SUM(Laboratory Confirmed Cases)'])
        cases_df = pd.DataFrame.from_dict(
            parser.extract_data_from_key('raceth')).set_index('subcategory')
        aa_cases = cases_df.loc['Black']['SUM(count)']
        known_race_cases = cases - cases_df.loc['Unknown']['SUM(count)']

        parser = tableau.TableauParser(request=results.requests['deaths'])
        deaths = pydash.head(
            parser.extract_data_from_key('death (2)')['SUM(Deaths)'])
        deaths_df = pd.DataFrame.from_dict(
            parser.extract_data_from_key('raceth (death)')).set_index(
                'subcategory')
        aa_deaths = deaths_df.loc['Black']['SUM(count)']
        known_race_deaths = deaths - deaths_df.loc['Unknown']['SUM(count)']

        pct_aa_cases = misc.to_percentage(aa_cases, known_race_cases)
        pct_aa_deaths = misc.to_percentage(aa_deaths, known_race_deaths)

        return [
            self._make_series(
                date=date,
                cases=cases,
                deaths=deaths,
                aa_cases=aa_cases,
                aa_deaths=aa_deaths,
                pct_aa_cases=pct_aa_cases,
                pct_aa_deaths=pct_aa_deaths,
                pct_includes_unknown_race=False,
                pct_includes_hispanic_black=False,
                known_race_cases=known_race_cases,
                known_race_deaths=known_race_deaths,
            )
        ]
    def _scrape(self, **kwargs):
        runner = WebdriverRunner()
        results = runner.run(WebdriverSteps().go_to_url(
            self.SUMMARY_URL).find_request(
                'summary', find_by=tableau.find_tableau_request
            ).clear_request_history().go_to_url(self.DEATHS_URL).find_request(
                'deaths',
                find_by=tableau.find_tableau_request).clear_request_history().
                             go_to_url(self.NYS_RACE_DEATHS_URL).find_request(
                                 'race_deaths',
                                 find_by=tableau.find_tableau_request))

        parser = tableau.TableauParser(request=results.requests['summary'])
        date_info = parser.extract_data_from_key('DASHBOARD 1 DATE (2)')
        date_str = pydash.get(date_info, 'MAX(Last_Reported_Test_Formatted).0')
        date = datetime.strptime(date_str, '%m/%d/%Y').date()

        cases_df = self.get_cases_df(
            parser.extract_data_from_key('TABLE VIEW (LARGE)'))
        cases = cases_df.loc['%all%']['Measure Values']

        parser = tableau.TableauParser(request=results.requests['deaths'])
        deaths_df = self.get_deaths_df(
            parser.extract_data_from_key('Fatalaties by County'))
        deaths = deaths_df.loc['%all%']['Measure Values']

        parser = tableau.TableauParser(request=results.requests['race_deaths'])
        nys_race_deaths_df = self.get_nys_race_deaths_df(
            parser.extract_data_from_key('Race/Ethnicity Table (2)'))
        nyc_race_deaths_df = pd.read_csv(
            self.NYC_RACE_DEATHS_URL).set_index('RACE_GROUP')
        aa_deaths = (
            nys_race_deaths_df.loc['Black']['Measure Values'] +
            nyc_race_deaths_df.loc['Black/African-American']['DEATH_COUNT'])
        known_race_deaths = nys_race_deaths_df.loc['Total'][
            'Measure Values'] + nyc_race_deaths_df['DEATH_COUNT'].sum()

        pct_aa_deaths = misc.to_percentage(aa_deaths, known_race_deaths)

        return [
            self._make_series(date=date,
                              cases=cases,
                              deaths=deaths,
                              aa_deaths=aa_deaths,
                              pct_aa_deaths=pct_aa_deaths,
                              pct_includes_unknown_race=False,
                              pct_includes_hispanic_black=False,
                              known_race_deaths=known_race_deaths)
        ]
    def _scrape(self, **kwargs):
        runner = WebdriverRunner()
        results = runner.run(
            WebdriverSteps()
            .go_to_url(self.URL)
            .wait_for_number_of_elements((By.XPATH, '//canvas'), 12)
            .find_request('summary', find_by=tableau.find_tableau_request))

        parser = tableau.TableauParser(request=results.requests['summary'])
        date_str = parser.get_dataframe_from_key('Date Stamp').loc[0]['Date Stamp']
        match = re.search(r'\d{1,2}\/\d{1,2}\/\d{4}', date_str)
        date = datetime.strptime(match.group(), '%m/%d/%Y').date()

        cases_df = parser.get_dataframe_from_key('Demographic Data - Hospitalizaton Status')
        cases_df = cases_df.set_index(['Status Value', 'Demographic', 'Categories']).sort_index().loc[('%all%', 'Race')]
        cases_df['SUM(Count)'] = cases_df['SUM(Count)'].apply(raw_string_to_int)
        cases = cases_df.loc['All', 'SUM(Count)']
        aa_cases = cases_df.loc['Black', 'SUM(Count)']
        known_race_cases = cases - cases_df.loc['Refused/Unknown', 'SUM(Count)']

        deaths_df = parser.get_dataframe_from_key('Demographic Data - Survival Outcomes')
        deaths_df = deaths_df.set_index(['Status Value', 'Demographic', 'Categories']).sort_index().loc[('Died', 'Race')]
        deaths_df['SUM(Count)'] = deaths_df['SUM(Count)'].apply(raw_string_to_int)

        deaths = deaths_df.loc['All', 'SUM(Count)']
        aa_deaths = deaths_df.loc['Black', 'SUM(Count)']
        known_race_deaths = deaths - deaths_df.loc['Refused/Unknown', 'SUM(Count)']

        pct_aa_cases = to_percentage(aa_cases, known_race_cases)
        pct_aa_deaths = to_percentage(aa_deaths, known_race_deaths)

        return [self._make_series(
            date=date,
            cases=cases,
            deaths=deaths,
            aa_cases=aa_cases,
            aa_deaths=aa_deaths,
            pct_aa_cases=pct_aa_cases,
            pct_aa_deaths=pct_aa_deaths,
            pct_includes_unknown_race=False,
            pct_includes_hispanic_black=True,
            known_race_cases=known_race_cases,
            known_race_deaths=known_race_deaths
        )]
    def _scrape(self, **kwargs):
        _, summary_df = arcgis.query_geoservice(**self.SUMMARY_QUERY)
        cases = summary_df.loc[0, 'Cases']
        deaths = summary_df.loc[0, 'Deaths']

        runner = WebdriverRunner()
        results = runner.run(WebdriverSteps().go_to_url(
            self.RACE_URL).wait_for_number_of_elements(
                (By.XPATH, '//canvas'), 14).find_request(
                    'race_cases',
                    find_by=tableau.find_tableau_request).go_to_url(
                        self.MAIN_PAGE_URL).get_page_source())
        soup = results.page_source
        date = self.get_date(soup)

        parser = tableau.TableauParser(request=results.requests['race_cases'])
        cases_df = parser.get_dataframe_from_key('Census')
        cases_df = cases_df[cases_df['Measure Names'] == 'Case %'].set_index(
            'Race')
        aa_cases = cases_df.loc['Black', 'SUM(Case Count)']
        known_race_cases = cases_df['SUM(Case Count)'].sum()

        pct_aa_cases = misc.to_percentage(aa_cases, known_race_cases)

        return [
            self._make_series(
                date=date,
                cases=cases,
                deaths=deaths,
                aa_cases=aa_cases,
                pct_aa_cases=pct_aa_cases,
                pct_includes_unknown_race=False,
                pct_includes_hispanic_black=True,
                known_race_cases=known_race_cases,
            )
        ]
    def _scrape(self, **kwargs):
        runner = WebdriverRunner()

        results = runner.run(WebdriverSteps().go_to_url(
            self.HOME_URL).get_page_source())
        date = self.parse_date(results.page_source)

        results = runner.run(WebdriverSteps().go_to_url(
            self.URL
        ).wait_for_number_of_elements((By.XPATH, '//canvas'), 32).find_request(
            'cases', tableau.find_tableau_request
        ).clear_request_history().find_element_by_xpath(
            '//*[@id="tabZoneId4"]/div/div/div/span[2]/div/span/span/span[2]/div[2]/div'
        ).click_on_last_element_found().wait_for_number_of_elements(
            (By.XPATH, "//span[contains(text(), 'Deaths')]"), 6).find_request(
                'deaths',
                find_by=lambda r: 'set-active-story-point' in r.path))

        parser = tableau.TableauParser(request=results.requests['cases'])
        cases = pydash.head(
            parser.extract_data_from_key('Cases')['SUM(Number of Records)'])
        cases_pct_df = pd.DataFrame.from_dict(
            parser.extract_data_from_key('Race_Cases')).set_index(
                'Assigned Race')
        cases_df = cases_pct_df.assign(Count=[
            round(v * cases)
            for v in cases_pct_df['SUM(Number of Records)'].values
        ])
        aa_cases = cases_df.loc['Black']['Count']
        known_race_cases = cases - cases_df.loc['Unknown']['Count']

        parser = tableau.TableauParser(request=results.requests['deaths'])
        deaths = pydash.head(
            parser.extract_data_from_key('NumberDeaths')
            ['SUM(Number of Records)'])
        deaths_pct_df = pd.DataFrame.from_dict(
            parser.extract_data_from_key('Race_Deaths')).set_index(
                'Assigned Race')
        deaths_df = deaths_pct_df.assign(Count=[
            round(v * deaths)
            for v in deaths_pct_df['SUM(Number of Records)'].values
        ])
        aa_deaths = deaths_df.loc['Black']['Count']
        known_race_deaths = deaths - deaths_df.loc['Unknown']['Count']

        pct_aa_cases = misc.to_percentage(aa_cases, known_race_cases)
        pct_aa_deaths = misc.to_percentage(aa_deaths, known_race_deaths)

        return [
            self._make_series(date=date,
                              cases=cases,
                              deaths=deaths,
                              aa_cases=aa_cases,
                              aa_deaths=aa_deaths,
                              pct_aa_cases=pct_aa_cases,
                              pct_aa_deaths=pct_aa_deaths,
                              pct_includes_unknown_race=False,
                              pct_includes_hispanic_black=True,
                              known_race_cases=known_race_cases,
                              known_race_deaths=known_race_deaths)
        ]