def pipe_add_latest_boosters(self, df: pd.DataFrame) -> pd.DataFrame: ts = TS() ts.loads( "https://public.tableau.com/views/DashboardVaksinKemkes/TotalVaksinasiDosis1" ) first_doses = ts.getWorkbook( ).worksheets[0].data["SUM(Divaksin 1)-alias"].values[0] ts.loads( "https://public.tableau.com/views/DashboardVaksinKemkes/TotalVaksinasiDosis2" ) second_doses = ts.getWorkbook( ).worksheets[0].data["SUM(Divaksin 2)-alias"].values[0] ts.loads( "https://public.tableau.com/views/DashboardVaksinKemkes/TotalVaksinasiDosis3" ) boosters = ts.getWorkbook( ).worksheets[0].data["SUM(Divaksin 3)-alias"].values[0] df.loc[df.date == df.date.max(), "total_boosters"] = boosters df.loc[df.date == df.date.max(), "total_vaccinations"] = first_doses + second_doses + boosters return df
def cli(db_path, tableau_views): """Fetch data from Tableau into a SQLite database Pass this command a SQLite databsae file and one or more Tableau views, where a Tableau view looks like this: OregonCOVID-19VaccineProviderEnrollment/COVID-19VaccineProviderEnrollment For example: tableau-to-sqlite tableau.db OregonCOVID-19VaccineProviderEnrollment/COVID-19VaccineProviderEnrollment You can also pass a full URL to a Tableau dashboard, for example: tableau-to-sqlite tableau.db https://results.mo.gov/t/COVID19/views/VaccinationsDashboard/Vaccinations """ for view in tableau_views: if not (view.startswith("http://") or view.startswith("https://")): url = "https://public.tableau.com/views/{}".format(view) else: url = view ts = TableauScraper() ts.loads(url) workboop = ts.getWorkbook() conn = sqlite3.connect(str(db_path)) for worksheet in workboop.worksheets: worksheet.data.to_sql(fix_name(worksheet.name), conn)
def test_Sheets(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) mocker.patch("tableauscraper.api.goToSheet", return_value=vqlCmdResponse) ts = TS() ts.loads(fakeUri) wb = ts.getWorkbook() sheets = wb.getSheets() assert sheets == [{ "sheet": "[WORKSHEET1]", "isDashboard": False, "isVisible": True, "namesOfSubsheets": [], "windowId": "{XXXXX}" }] wbRes = wb.goToSheet("[WORKSHEET1]") assert type(wbRes) is TableauWorkbook assert len(wbRes.worksheets) == 1 assert wbRes.worksheets[0].name == "[WORKSHEET1]" # sheet not found wbRes = wb.goToSheet("XXXXXX") assert type(wbRes) is TableauWorkbook assert len(wbRes.worksheets) == 0
def _extract_data(self, shot_type: str) -> pd.DataFrame: ts = TableauScraper() ts.loads(self.url) # set type of metric and demographic (initiated is fetched by default) book = ts.getWorkbook() # get all possible paramaters params = pd.DataFrame(book.getParameters()) shot_vals = params.loc[params["column"] == "View By", "values"].iloc[0] demo_vals = params.loc[params["column"] == "Key Metrics", "values"].iloc[0] assert self.demographic_col_name in demo_vals book.setParameter("Key Metrics", self.demographic_col_name) if shot_type == "complete": print(f"set parameter to: {shot_vals[1]}") book.setParameter("View By", shot_vals[1]) parts = [] ws = ts.getWorksheet("New Map") counties = ws.getSelectableValues("county") for county in counties: print("working on county: ", county) wb = ws.select("county", county) try: df = wb.getWorksheet("New Demographics").data.assign( location_name=county) except TypeError: print(f"Could not fetch data for {county}, skipping...") df = df.rename( columns={"SUM(Chosen Dose Status Metric)-alias": shot_type}) parts.append(df) ws = ts.getWorksheet("New Map") return pd.concat(parts)
def _get_county(variable: str, county: str): engine = TableauScraper() engine.loads( url="https://www.nh.gov/t/DHHS/views/VaccineOperationalDashboard/VaccineDashboard" ) engine.getWorkbook().setParameter("ShowBy", "Race/Ethnicity") engine.getWorkbook().setParameter("Metric", variable) raw_data = [] worksheet = engine.getWorksheet("Count and Prop: Map (no R/E with town&rphn)") workbook = worksheet.select("CMN + Town + RPHN", county) raw_data.append( workbook.getWorksheet("Count: Bar Chart").data.assign(location_name=county) ) data = pd.concat(raw_data) return data
def test_TableauScraper_getWorksheets(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) ts = TS() ts.loads(fakeUri) dashboard = ts.getWorkbook() assert len(dashboard.worksheets) == 2 assert dashboard.worksheets[0].name == "[WORKSHEET1]" assert dashboard.worksheets[1].name == "[WORKSHEET2]"
def fetch(self): """ uses the tableauscraper module: https://github.com/bertrandmartel/tableau-scraping/blob/master/README.md """ ts = TS() ts.loads(self.fullUrl) workbook = ts.getWorkbook() workbook = workbook.setParameter("Demographic", self.demographic_col_name) return workbook.worksheets[0].data
def _get_demographic_data(self, category: str, demo_col_name: str): # open connection to dashboard ts = TS() ts.loads(self.fullUrl) counties = list( pd.read_csv( os.path.dirname(__file__) + "/../../../bootstrap_data/locations.csv").query( f"state == 55 and location != 55")["name"].replace({ "La Crosse": "La crosse", "Green Lake": "Green lake", "Fond du Lac": "Fond du lac", "St. Croix": "St croix", "Eau Claire": "Eau claire", })) # get worksheets and set filter type # returns initiated values by default -- specify if the filter needs to be changed to completed workbook = ts.getWorkbook() if category == "completed": workbook.setParameter( "Initiation or Completion", "Residents who have completed the vaccine series", ) elif category not in ["completed", "initiated"]: raise ValueError( "category expected 'completed' or 'iniated' but received neither" ) # get main sheet ws = workbook.getWorksheet("Map") dfs = [] # for each county, extract data from subsheet and rename columns, append to list of dfs for c in counties: county_ws = ws.select("County", f"{c} County") df = county_ws.getWorksheet(self.demographic_worksheet).data df = df[[ demo_col_name, "AGG(Geography TT)-alias", "SUM(Initiation or completed count for TT)-alias", ]] df = df.rename( columns={ "SUM(Initiation or completed count for TT)-alias": category }) dfs.append(df) return pd.concat(dfs)
def test_getStoryPoints(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponseWithStoryPointsNav) ts = TS() ts.loads(fakeUri) wb = ts.getWorkbook() storyPointResult = wb.getStoryPoints() assert storyPointResult["storyBoard"] == "[WORKSHEET1]" assert len(storyPointResult["storyPoints"]) == 1 assert len(storyPointResult["storyPoints"][0]) == 12 # test no story point mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) ts = TS() ts.loads(fakeUri) wb = ts.getWorkbook() storyPointResult = wb.getStoryPoints() assert len(storyPointResult["storyPoints"]) == 0
def test_levelDrill(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) mocker.patch("tableauscraper.api.levelDrill", return_value=vqlCmdResponse) ts = TS() ts.loads(fakeUri) wb = ts.getWorkbook() wb = wb.getWorksheet("[WORKSHEET1]").levelDrill(drillDown=True) assert type(wb) is TableauWorkbook assert len(wb.worksheets) == 1 assert wb.worksheets[0].name == "[WORKSHEET1]"
def test_goToStoryPoint(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponseWithStoryPointsNav) mocker.patch("tableauscraper.api.setActiveStoryPoint", return_value=vqlCmdResponse) ts = TS() ts.loads(fakeUri) wb = ts.getWorkbook() storyWb = wb.goToStoryPoint(storyPointId=1) assert type(storyWb) is TableauWorkbook assert len(storyWb.worksheets) == 1
def test_getCsvData(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) mocker.patch("tableauscraper.api.getCsvData", return_value=tableauDownloadableCsvData) ts = TS() ts.loads(fakeUri) wb = ts.getWorkbook() data = wb.getCsvData("[WORKSHEET1]") assert data.shape[0] == 3 assert data.shape[1] == 1
def test_getDownloadableUnderlyingData(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) mocker.patch("tableauscraper.api.getDownloadableUnderlyingData", return_value=json.loads(tableauDownloadableUnderlyingData)) ts = TS() ts.loads(fakeUri) wb = ts.getWorkbook() data = wb.getWorksheet("[WORKSHEET1]").getDownloadableUnderlyingData() assert data.shape[0] == 200 assert data.shape[1] == 42
def request_tableau_scraper(query): ts = TableauScraper() ts.loads(query.url) dashboard = ts.getWorkbook() dfs = [] prefixes = [] if not query.params else query.params.get('worksheet', []) if isinstance(prefixes, str): prefixes = [prefixes] for ws in dashboard.worksheets: if prefixes is None: dfs.append(ws.data) elif any([ws.name.startswith(n) for n in prefixes]): dfs.append(ws.data) return dfs
def main(): logging.basicConfig(filename='app.log', filemode='a', format='%(asctime)s - %(message)s', level=logging.INFO) url = "https://public.tableau.com/views/R2L2020/Division" ts = TS() ts.loads(url) workbook = ts.getWorkbook() logging.info("Received Virginia Data", exc_info=False) for t in workbook.worksheets: df = t.data df.to_csv("out/VA_" + datetime.now().strftime('%Y%m%d') + ".csv") logging.info("Wrote Virginia Data", exc_info=False)
def test_getDownloadableData(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) mocker.patch("tableauscraper.api.getDownloadableData", return_value=tableauVizHtmlResponse) ts = TS() ts.loads(fakeUri) dataFrameGroup = ts.getWorkbook() dataFrameGroup.getDownloadableData(sheetName="[WORKSHEET1]") assert type(dataFrameGroup) is TableauWorkbook assert "_originalData" in dataFrameGroup.__dict__ assert dataFrameGroup.__dict__["_scraper"] is ts assert not dataFrameGroup.cmdResponse assert len(dataFrameGroup.worksheets) == 2
def test_TableauWorkbook(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) mocker.patch("tableauscraper.api.select", return_value=vqlCmdResponse) ts = TS() ts.loads(fakeUri) dataFrameGroup = ts.getWorkbook() assert type(dataFrameGroup) is TableauWorkbook assert "_originalData" in dataFrameGroup.__dict__ assert dataFrameGroup.__dict__["_scraper"] is ts assert not dataFrameGroup.cmdResponse assert len(dataFrameGroup.worksheets) == 2 # get worksheet names (initial response) worksheetNames = dataFrameGroup.getWorksheetNames() assert type(worksheetNames) is list assert not dataFrameGroup.cmdResponse assert worksheetNames == ["[WORKSHEET1]", "[WORKSHEET2]"] # get worksheet names (vql response) dataFrameGroup = dataFrameGroup.worksheets[0].select("[FIELD1]", "2") assert dataFrameGroup.cmdResponse assert len(dataFrameGroup.worksheets) == 1 worksheetNames = dataFrameGroup.getWorksheetNames() assert type(worksheetNames) is list assert worksheetNames == ["[WORKSHEET1]"] # get worksheets (initial response) dataFrameGroup = ts.getWorkbook() dataFrameGroup = dataFrameGroup.getWorksheets() assert type(dataFrameGroup) is TableauWorkbook assert not dataFrameGroup.cmdResponse assert len(dataFrameGroup.worksheets) == 2 # get worksheets (vql response) dataFrameGroup = dataFrameGroup.worksheets[0].select("[FIELD1]", "2") dataFrameGroup = dataFrameGroup.getWorksheets() assert type(dataFrameGroup) is TableauWorkbook assert dataFrameGroup.cmdResponse assert len(dataFrameGroup.worksheets) == 1 # get single worksheet (initial response) dataFrameGroup = ts.getWorkbook() dataFrame = dataFrameGroup.getWorksheet("[WORKSHEET1]") assert type(dataFrame) is TableauWorksheet assert not dataFrameGroup.cmdResponse assert len(dataFrameGroup.worksheets) == 2 # get single worksheet (vql response) dataFrameGroup = ts.getWorkbook() dataFrame = (dataFrameGroup.worksheets[0].select( "[FIELD1]", "2").getWorksheet("[WORKSHEET1]")) assert type(dataFrame) is TableauWorksheet assert dataFrame.cmdResponse assert dataFrame.name == "[WORKSHEET1]" assert dataFrame.data.shape[0] == 4 assert dataFrame.data.shape[1] == 2 # get single worksheet (vql response) wrong sheet name dataFrameGroup = ts.getWorkbook() dataFrame = (dataFrameGroup.worksheets[0].select("[FIELD1]", "2").getWorksheet("XXXX")) assert type(dataFrame) is TableauWorksheet assert dataFrame.cmdResponse assert dataFrame.name == "XXXX" assert dataFrame.data.shape[0] == 0 assert dataFrame.data.shape[1] == 0 # get single worksheet (vql response) no data mocker.patch("tableauscraper.api.select", return_value=vqlCmdResponseEmptyValues) dataFrameGroup = ts.getWorkbook() dataFrame = (dataFrameGroup.worksheets[0].select( "[FIELD1]", "2").getWorksheet("[WORKSHEET1]")) assert type(dataFrame) is TableauWorksheet assert dataFrame.cmdResponse assert dataFrame.name == "[WORKSHEET1]" assert dataFrame.data.shape[0] == 0 assert dataFrame.data.shape[1] == 0 # get worksheet names (storypoints) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponseWithStoryPoints) ts = TS() ts.loads(fakeUri) dataFrameGroup = ts.getWorkbook() worksheetNames = dataFrameGroup.getWorksheetNames() assert type(worksheetNames) is list assert not dataFrameGroup.cmdResponse assert worksheetNames == ["[WORKSHEET1]"] # get parameters with storypoints parameters = dataFrameGroup.getParameters() assert type(parameters) is list assert parameters == [{ "column": "[INPUT_NAME1]", "values": [ "select1", "select2", "select3", ], "parameterName": "[Parameters].[Parameter 1]" }, { "column": "[INPUT_NAME2]", "values": [ "select4", "select5", "select6", ], "parameterName": "[Parameters].[Parameter 1]", }] # set parameter with story points on vql cmd response mocker.patch("tableauscraper.api.setParameterValue", return_value=storyPointsCmdResponse) wb = dataFrameGroup.setParameter("[INPUT_NAME1]", "select1") parameters = wb.getParameters() assert type(parameters) is list assert parameters == [{ "column": "[INPUT_NAME1]", "values": [ "select1", "select2", "select3", ], "parameterName": "[Parameters].[Parameter 1]" }, { "column": "[INPUT_NAME2]", "values": [ "select4", "select5", "select6", ], "parameterName": "[Parameters].[Parameter 1]", }]
# # vax_dash.worksheets[0].data.to_csv('data\\temp.csv') # col_rename = {'Region-value': 'Region', # 'County-value': 'County', # 'Measure Names-alias': 'Measure', # 'Measure Values-alias': 'value'} # county = vax_dash.worksheets[0].data[col_rename.keys()] # county = county.rename(columns=col_rename) #%% Get positives/tests pos_url = 'https://bi.wisconsin.gov/t/DHS/views/PercentPositivebyTestPersonandaComparisonandTestCapacity/PercentPositivebyTestDashboard?:embed_code_version=3&:embed=y&:loadOrderID=1&:display_spinner=no&:showAppBanner=false&:display_count=n&:showVizHome=n&:origin=viz_share_link' # https://bi.wisconsin.gov/t/DHS/views/PercentPositivebyTestPersonandaComparisonandTestCapacity/PercentPositivebyTestDashboard?:embed_code_version=3&:embed=y&:loadOrderID=1&:display_spinner=no&:showAppBanner=false&:display_count=n&:showVizHome=n&:origin=viz_share_link ts.loads(pos_url) pos_dash = ts.getWorkbook() pos_sheet = pos_dash.worksheets[0] # data here not in pos_sheet.data for some reason - that's all zeros - but in selectable items # Many of the columns are repeated, and appear to contain the same data but # reversed in time. The code below only keeps the second of each repeated # column, which I think is fine. data = pos_sheet.getSelectableItems() pos_dict = dict() pos_df = pd.DataFrame() for d in data[1::2]: if d['column'] != 'Measure Values': # this column is too long and also redundant pos_dict[d['column']] = d['values'] pos_df[d['column']] = d['values']
from tableauscraper import TableauScraper as TS url = "https://dataviz1.dc.gov/t/OCTO/views/Vaccine_Public/Coverage" ts = TS() ts.loads(url) wb = ts.getWorkbook() parameters = wb.getParameters() print(parameters) wb = wb.setParameter("MapChoice", "Count (#)") #wb = wb.setParameter("Special Pop", "DC residents") for ws in wb.worksheets: print(ws.data)
from tableauscraper import TableauScraper as TS url = "https://public.tableau.com/views/PlayerStats-Top5Leagues20192020/OnePlayerSummary" ts = TS() ts.loads(url) workbook = ts.getWorkbook() for t in workbook.worksheets: # show worksheet name print(f"WORKSHEET NAME : {t.name}") # show dataframe for this worksheet print(t.data)
def fetch(self): scraper_instance = TableauScraper() scraper_instance.loads(self.fetch_url) workbook = scraper_instance.getWorkbook() return workbook.getWorksheet("County Map").data
else: raise err return ts #%% vaccine allocation # url = 'https://bi.wisconsin.gov/t/DHS/views/VaccineDistribution/Allocated?:embed_code_version=3&:embed=y&:loadOrderID=0&:display_spinner=no&:showAppBanner=false&:display_count=n&:showVizHome=n&:origin=viz_share_link' # updated url 18-May-2021: url = 'https://bi.wisconsin.gov/t/DHS/views/COVID-19VaccineAdministration/Allocated?:embed_code_version=3&:embed=y&:loadOrderID=0&:display_spinner=no&:showAppBanner=false&:display_count=n&:showVizHome=n&:origin=viz_share_link' ts = TS() ts = loads_with_retries(ts, url, 3) # ts.loads(url) allocation_dash = ts.getWorkbook() #%% # Vaccine by county and age # url = 'https://bi.wisconsin.gov/t/DHS/views/VaccinesAdministeredtoWIResidents_16129838459350/VaccinatedWisconsin-County?:embed_code_version=3&:embed=y&:loadOrderID=1&:display_spinner=no&:showAppBanner=false&:display_count=n&:showVizHome=n&:origin=viz_share_link' # updated url 18-May-2021: url = 'https://bi.wisconsin.gov/t/DHS/views/VaccinesAdministeredtoWIResidents_16212677845310/VaccinatedWisconsin-County?:embed_code_version=3&:embed=y&:loadOrderID=1&:display_spinner=no&:showAppBanner=false&:display_count=n&:showVizHome=n&:origin=viz_share_link' ts = loads_with_retries(ts, url, 3) vax_dash = ts.getWorkbook() vax_complete = vax_dash.setParameter( 'Initiation or Completion', 'Total population who have completed the series') update_date = format_date(allocation_dash.worksheets[2].data.iloc[0, 2]) datafile = 'data\\vaccinations\\vax-dashboards_' + update_date + '.pkl'
t = '{:0.1f}'.format(p) + '%' else: t = '{:0.0f}'.format(p) + '%' return t #%% Get cases by age group # Pull from tableau instead of downloadable data, because the tableau plot has # the right age groups to match up with vaccinations. url = 'https://bi.wisconsin.gov/t/DHS/views/Agegroupovertime/Cases?:embed_code_version=3&:embed=y&:loadOrderID=3&:display_spinner=no&:showAppBanner=false&:display_count=n&:showVizHome=n&:origin=viz_share_link' ts = TS() ts = loads_with_retries(ts, url, 3) age_dash = ts.getWorkbook() age_total = age_dash.worksheets[0].data col_rename = { 'New Age Groups-alias': 'Age group', 'CNTD(Incident ID)-alias': 'Cases', 'SUM(case rate by age for 100K )-alias': 'Cases per 100K', 'WEEK(Episode Date Trunc)-value': 'Week of' } age_total = age_total[col_rename.keys()] age_total = age_total.rename(columns=col_rename) age_total['Week of'] = pd.to_datetime(age_total['Week of']) # make sure certain columns are numbers
vax_race = pd.read_csv(vax_race_file, converters={'Reporting date': pd.to_datetime}) vax_race.pivot(index='Reporting date', columns='Race', values='Initiated %').plot() #%% administered by day from DHS url = 'https://bi.wisconsin.gov/t/DHS/views/COVID-19VaccineAdministration/Allocated?:embed_code_version=3&:embed=y&:loadOrderID=0&:display_spinner=no&:showAppBanner=false&:display_count=n&:showVizHome=n&:origin=viz_share_link' ts = TS() # ts = loads_with_retries(ts, url, 3) ts.loads(url) allocation_dash = ts.getWorkbook() dhs = allocation_dash.getWorksheet('AdministeredByDay').data col_rename = { 'SUM(Immunization Count)-value': 'doses', # 'SUM(Immunization Count)-alias': 'doses 7-day', 'DAY(Vaccination Date)-value': 'Date' } dhs = dhs[col_rename.keys()] dhs = dhs.rename(columns=col_rename) dhs.Date = pd.to_datetime(dhs.Date) dhs['total doses'] = dhs.doses.cumsum() dhs = dhs.set_index('Date')
if len(sys.argv) > 1: currentPath = os.path.join(currentPath, sys.argv[1]) if len(sys.argv) > 2: currentFolder = os.path.join(currentPath, sys.argv[2]) else: currentFolder = os.path.join(currentPath, time.strftime("%Y-%m-%d")) url = 'https://public.tableau.com/views/COVID-19VaccineTrackerDashboard_16153822244270/DosesAdministered' ts = TS() ts.loads(url) os.makedirs(currentFolder, exist_ok=True) rootWb = ts.getWorkbook() def buildFilePath(currentFolder, name): return os.path.join(currentPath, currentFolder, name) for t in rootWb.worksheets: print(f"worksheet name : {t.name}") if not t.data.empty: t.data.to_csv(buildFilePath(currentFolder, f'{t.name}.csv'), index=False) dailyWb = rootWb.setParameter("Cumulative + Daily", "Daily trends") ws = dailyWb.getWorksheet("Daily trends (2)")