def _extract_data(self, shot_type: str) -> pd.DataFrame: ts = TableauScraper() ts.loads(self.url) # set type of metric and demographic (initiated is fetched by default) book = ts.getWorkbook() # get all possible paramaters params = pd.DataFrame(book.getParameters()) shot_vals = params.loc[params["column"] == "View By", "values"].iloc[0] demo_vals = params.loc[params["column"] == "Key Metrics", "values"].iloc[0] assert self.demographic_col_name in demo_vals book.setParameter("Key Metrics", self.demographic_col_name) if shot_type == "complete": print(f"set parameter to: {shot_vals[1]}") book.setParameter("View By", shot_vals[1]) parts = [] ws = ts.getWorksheet("New Map") counties = ws.getSelectableValues("county") for county in counties: print("working on county: ", county) wb = ws.select("county", county) try: df = wb.getWorksheet("New Demographics").data.assign( location_name=county) except TypeError: print(f"Could not fetch data for {county}, skipping...") df = df.rename( columns={"SUM(Chosen Dose Status Metric)-alias": shot_type}) parts.append(df) ws = ts.getWorksheet("New Map") return pd.concat(parts)
def test_TableauWorkbook_setFilter(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) mocker.patch("tableauscraper.api.filter", return_value=vqlCmdResponse) ts = TS() ts.loads(fakeUri) tableauDataFrameGroup = dashboard.getWorksheets(ts, data, info) assert type(tableauDataFrameGroup) is TableauWorkbook ws = tableauDataFrameGroup.getWorksheet("[WORKSHEET1]") tableauDataFrameGroup = ws.setFilter("FILTER_1", "FITLTER_VALUE_1") assert type(tableauDataFrameGroup) is TableauWorkbook assert len(tableauDataFrameGroup.worksheets) == 1 assert tableauDataFrameGroup.worksheets[0].name == "[WORKSHEET1]" assert tableauDataFrameGroup.worksheets[0].data.shape[0] == 4 assert tableauDataFrameGroup.worksheets[0].data.shape[1] == 2 assert list(tableauDataFrameGroup.worksheets[0].data.columns.values) == [ "[FIELD1]-value", "[FIELD2]-alias", ] # column not found tableauDataFrameGroup = ws.setFilter("UNKNOWN", "FITLTER_VALUE_1") assert type(tableauDataFrameGroup) is TableauWorkbook assert len(tableauDataFrameGroup.worksheets) == 0 # incorrect value tableauDataFrameGroup = ws.setFilter("FILTER_1", "FITLTER_VALUE_X") assert type(tableauDataFrameGroup) is TableauWorkbook assert len(tableauDataFrameGroup.worksheets) == 0
def cli(db_path, tableau_views): """Fetch data from Tableau into a SQLite database Pass this command a SQLite databsae file and one or more Tableau views, where a Tableau view looks like this: OregonCOVID-19VaccineProviderEnrollment/COVID-19VaccineProviderEnrollment For example: tableau-to-sqlite tableau.db OregonCOVID-19VaccineProviderEnrollment/COVID-19VaccineProviderEnrollment You can also pass a full URL to a Tableau dashboard, for example: tableau-to-sqlite tableau.db https://results.mo.gov/t/COVID19/views/VaccinationsDashboard/Vaccinations """ for view in tableau_views: if not (view.startswith("http://") or view.startswith("https://")): url = "https://public.tableau.com/views/{}".format(view) else: url = view ts = TableauScraper() ts.loads(url) workboop = ts.getWorkbook() conn = sqlite3.connect(str(db_path)) for worksheet in workboop.worksheets: worksheet.data.to_sql(fix_name(worksheet.name), conn)
def test_Sheets(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) mocker.patch("tableauscraper.api.goToSheet", return_value=vqlCmdResponse) ts = TS() ts.loads(fakeUri) wb = ts.getWorkbook() sheets = wb.getSheets() assert sheets == [{ "sheet": "[WORKSHEET1]", "isDashboard": False, "isVisible": True, "namesOfSubsheets": [], "windowId": "{XXXXX}" }] wbRes = wb.goToSheet("[WORKSHEET1]") assert type(wbRes) is TableauWorkbook assert len(wbRes.worksheets) == 1 assert wbRes.worksheets[0].name == "[WORKSHEET1]" # sheet not found wbRes = wb.goToSheet("XXXXXX") assert type(wbRes) is TableauWorkbook assert len(wbRes.worksheets) == 0
def test_TableauWorkbook_getFilters(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) mocker.patch("tableauscraper.api.select", return_value=vqlCmdResponse) ts = TS() ts.loads(fakeUri) tableauDataFrameGroup = dashboard.getWorksheets(ts, data, info) assert type(tableauDataFrameGroup) is TableauWorkbook assert not tableauDataFrameGroup.cmdResponse ws = tableauDataFrameGroup.getWorksheet("[WORKSHEET1]") filters = ws.getFilters() assert type(filters) is list assert filters == [ { "column": "FILTER_1", "ordinal": 0, "values": ["FITLTER_VALUE_1", "FITLTER_VALUE_2", "FITLTER_VALUE_3"], "globalFieldName": "[FILTER].[FILTER_1]" }, ]
def test_TableauDashboard_getDropdownValues(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) mocker.patch("tableauscraper.api.select", return_value=vqlCmdResponse) ts = TS() ts.loads(fakeUri) tableauDataFrameGroup = dashboard.getWorksheets(ts, data, info) assert type(tableauDataFrameGroup) is TableauDashboard assert not tableauDataFrameGroup.cmdResponse dropDownValues = tableauDataFrameGroup.getDropdownValues("[INPUT_NAME1]") assert type(dropDownValues) is list assert dropDownValues == ["select1", "select2", "select3"] # missing parameter name dropDownValues = tableauDataFrameGroup.getDropdownValues("XXXXX") assert type(dropDownValues) is list assert dropDownValues == [] # in vql cmd response tableauDataFrameGroup = tableauDataFrameGroup.getWorksheet( "[WORKSHEET1]").select("[FIELD1]", "2") assert type(tableauDataFrameGroup) is TableauDashboard assert tableauDataFrameGroup.cmdResponse dropDownValues = tableauDataFrameGroup.getDropdownValues("[INPUT_NAME1]") assert type(dropDownValues) is list assert dropDownValues == ["select1", "select2", "select3"] # vql cmd response missing parameter name dropDownValues = tableauDataFrameGroup.getDropdownValues("XXXXX") assert type(dropDownValues) is list assert dropDownValues == []
def test_TableauWorkbook_getParameters(mocker: MockerFixture) -> None: mocker.patch( "tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse ) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) mocker.patch("tableauscraper.api.select", return_value=vqlCmdResponse) ts = TS() ts.loads(fakeUri) tableauDataFrameGroup = dashboard.getWorksheets(ts, data, info) assert type(tableauDataFrameGroup) is TableauWorkbook assert not tableauDataFrameGroup.cmdResponse parameters = tableauDataFrameGroup.getParameters() assert type(parameters) is list assert parameters == [{ "column": "[INPUT_NAME1]", "values": [ "select1", "select2", "select3", ], "parameterName": "[Parameters].[Parameter 1]" }, { "column": "[INPUT_NAME2]", "values": [ "select4", "select5", "select6", ], "parameterName": "[Parameters].[Parameter 1]", }] # in vql cmd response tableauDataFrameGroup = tableauDataFrameGroup.getWorksheet("[WORKSHEET1]").select( "[FIELD1]", "2" ) assert type(tableauDataFrameGroup) is TableauWorkbook assert tableauDataFrameGroup.cmdResponse parameters = tableauDataFrameGroup.getParameters() assert type(parameters) is list assert parameters == [{ "column": "[INPUT_NAME1]", "values": [ "select1", "select2", "select3", ], "parameterName": "[Parameters].[Parameter 1]" }, { "column": "[INPUT_NAME2]", "values": [ "select4", "select5", "select6", ], "parameterName": "[Parameters].[Parameter 1]", }]
def test_TableauScraper_loadsWithPlaceholderEmpty( mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauPlaceHolderDataEmpty) ts = TS() ts.loads(fakeUri) assert ts.__dict__["tableauData"] == {} assert ts.__dict__["data"] == {} assert ts.__dict__["info"] == {}
def test_TableauWorkbook_getSelectableItems(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) mocker.patch("tableauscraper.api.select", return_value=vqlCmdResponse) ts = TS() ts.loads(fakeUri) tableauDataFrameGroup = dashboard.getWorksheets(ts, data, info) assert type(tableauDataFrameGroup) is TableauWorkbook assert not tableauDataFrameGroup.cmdResponse ws = tableauDataFrameGroup.getWorksheet("[WORKSHEET1]") selection = ws.getSelectableItems() assert type(selection) is list assert selection == [{ "column": "[FIELD1]", "values": ["2", "3", "4", "5"] }, { "column": "[FIELD2]", "values": ["6", "7", "8", "9"] }] # in vql cmd response tableauDataFrameGroup = ws.select("[FIELD1]", "2") assert type(tableauDataFrameGroup) is TableauWorkbook assert tableauDataFrameGroup.cmdResponse ws = tableauDataFrameGroup.getWorksheet("[WORKSHEET1]") selection = ws.getSelectableItems() assert type(selection) is list assert selection == [{ "column": "[FIELD1]", "values": ["2", "3", "4", "5"] }, { "column": "[FIELD2]", "values": ["6", "7", "8", "9"] }] # story point tableauDataFrameGroup = dashboard.getWorksheets( ts, dataWithoutPresModelWithDictionary, storyPointsInfo) assert type(tableauDataFrameGroup) is TableauWorkbook assert not tableauDataFrameGroup.cmdResponse ws = tableauDataFrameGroup.getWorksheet("[WORKSHEET1]") selection = ws.getSelectableItems() assert type(selection) is list assert selection == [{ "column": "[FIELD1]", "values": ["2", "3", "4", "5"] }, { "column": "[FIELD2]", "values": ["6", "7", "8", "9"] }]
def test_setActiveStoryPoint(httpserver, mocker: MockerFixture): mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) ts = TS() ts.loads(fakeUri) httpserver.serve_content(json.dumps(vqlCmdResponse)) ts.host = httpserver.url + "/" result = api.setActiveStoryPoint(scraper=ts, storyBoard="", storyPointId=1) assert result == vqlCmdResponse
def main(): logging.basicConfig(filename='app.log', filemode='a', format='%(asctime)s - %(message)s', level=logging.INFO) url="https://public.tableau.com/views/COVIDPlanningTool-Embed700px/SimpleDashboard?:embed=y&:showVizHome=no&:host_url=https%3A%2F%2Fpublic.tableau.com%2F&:embed_code_version=3&:tabs=no&:toolbar=yes&:animate_transition=yes&:display_static_image=no&:display_spinner=no&:display_overlay=yes&:display_count=yes&:language=en&publish=yes&:loadOrderID=0" ts = TS() ts.loads(url) logging.info("Received Hawaii Data", exc_info=False); ws = ts.getWorksheet("CDC Map") ws.data.to_csv("HI_" + datetime.now().strftime('%Y%m%d') + ".csv") logging.info("Wrote Hawaii Data", exc_info=False);
def fetch(self): """ uses the tableauscraper module: https://github.com/bertrandmartel/tableau-scraping/blob/master/README.md """ ts = TS() ts.loads(self.fullUrl) workbook = ts.getWorkbook() workbook = workbook.setParameter("Demographic", self.demographic_col_name) return workbook.worksheets[0].data
def test_gotosheet(httpserver, mocker: MockerFixture): mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) ts = TS() ts.loads(fakeUri) httpserver.serve_content(json.dumps(vqlCmdResponse)) ts.host = httpserver.url + "/" result = api.goToSheet(scraper=ts, windowId="") assert result == vqlCmdResponse
def test_getCsvData(httpserver, mocker: MockerFixture): mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) ts = TS() ts.loads(fakeUri) httpserver.serve_content(tableauDownloadableCsvData) ts.host = httpserver.url + "/" result = api.getCsvData(scraper=ts, viewId="") assert result == tableauDownloadableCsvData
def test_TableauScraper_getWorksheets(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) ts = TS() ts.loads(fakeUri) dashboard = ts.getDashboard() assert len(dashboard.worksheets) == 2 assert dashboard.worksheets[0].name == "[WORKSHEET1]" assert dashboard.worksheets[1].name == "[WORKSHEET2]"
def main(): logging.basicConfig(filename='app.log', filemode='a', format='%(asctime)s - %(message)s', level=logging.INFO) url="https://analytics.la.gov/t/DOE/views/DigitalLearningModel/SchoolLevelModeofLearning/[email protected]/29ea44c5-4c98-44e0-87dc-2c2b907e7ac3?%3Adisplay_count=n&%3AshowVizHome=n&%3Aorigin=viz_share_link&%3AisGuestRedirectFromVizportal=y&%3Aembed=y" ts = TS() ts.loads(url) logging.info("Received LA Data", exc_info=False); ws = ts.getWorksheet("Site Map (2)") ws.data.to_csv("out/LA_" + datetime.now().strftime('%Y%m%d') + ".csv") logging.info("Wrote LA Data", exc_info=False);
def test_setParameterValue(httpserver, mocker: MockerFixture): mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) ts = TS() ts.loads(fakeUri) httpserver.serve_content(json.dumps(vqlCmdResponse)) ts.host = httpserver.url + "/" result = api.setParameterValue(scraper=ts, parameterName="", value="test") assert result == vqlCmdResponse
def test_select(httpserver, mocker: MockerFixture): mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) ts = TS() ts.loads(fakeUri) httpserver.serve_content(json.dumps(vqlCmdResponse)) ts.host = httpserver.url + "/" result = api.select(scraper=ts, worksheetName="", selection=[1]) assert result == vqlCmdResponse
def _get_demographic_data(self, category: str, demo_col_name: str): # open connection to dashboard ts = TS() ts.loads(self.fullUrl) counties = list( pd.read_csv( os.path.dirname(__file__) + "/../../../bootstrap_data/locations.csv").query( f"state == 55 and location != 55")["name"].replace({ "La Crosse": "La crosse", "Green Lake": "Green lake", "Fond du Lac": "Fond du lac", "St. Croix": "St croix", "Eau Claire": "Eau claire", })) # get worksheets and set filter type # returns initiated values by default -- specify if the filter needs to be changed to completed workbook = ts.getWorkbook() if category == "completed": workbook.setParameter( "Initiation or Completion", "Residents who have completed the vaccine series", ) elif category not in ["completed", "initiated"]: raise ValueError( "category expected 'completed' or 'iniated' but received neither" ) # get main sheet ws = workbook.getWorksheet("Map") dfs = [] # for each county, extract data from subsheet and rename columns, append to list of dfs for c in counties: county_ws = ws.select("County", f"{c} County") df = county_ws.getWorksheet(self.demographic_worksheet).data df = df[[ demo_col_name, "AGG(Geography TT)-alias", "SUM(Initiation or completed count for TT)-alias", ]] df = df.rename( columns={ "SUM(Initiation or completed count for TT)-alias": category }) dfs.append(df) return pd.concat(dfs)
def request_tableau_scraper(query): ts = TableauScraper() ts.loads(query.url) dashboard = ts.getDashboard() dfs = [] prefixes = [] if not query.params else query.params.get('worksheet', []) for ws in dashboard.worksheets: if prefixes is None: dfs.append(ws.data) elif any([ws.name.startswith(n) for n in prefixes]): dfs.append(ws.data) return dfs
def test_TableauScraper_loads(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) ts = TS() ts.loads(fakeUri) assert "vizql_root" in ts.__dict__["tableauData"] assert "sessionid" in ts.__dict__["tableauData"] assert "sheetId" in ts.__dict__["tableauData"] assert ts.__dict__["data"] == data assert ts.__dict__["info"] == info
def test_TableauScraper_getWorksheet(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) ts = TS() ts.loads(fakeUri) tableauDataFrame = ts.getWorksheet("[WORKSHEET1]") assert type(tableauDataFrame) is TableauWorksheet assert tableauDataFrame.name == "[WORKSHEET1]" assert tableauDataFrame.data.shape[0] == 4 assert tableauDataFrame.data.shape[1] == 2
def _parse_data(self, url: str) -> pd.DataFrame: """Parse data from url""" t_scraper = TableauScraper() t_scraper.loads(url) # Get the metrics count = self._parse_metrics(t_scraper) # Get the date date = self._parse_date(t_scraper) df = pd.DataFrame({ "Date": [date], "Cumulative total": [count], }) return df
def test_levelDrill(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) mocker.patch("tableauscraper.api.levelDrill", return_value=vqlCmdResponse) ts = TS() ts.loads(fakeUri) wb = ts.getWorkbook() wb = wb.getWorksheet("[WORKSHEET1]").levelDrill(drillDown=True) assert type(wb) is TableauWorkbook assert len(wb.worksheets) == 1 assert wb.worksheets[0].name == "[WORKSHEET1]"
def test_goToStoryPoint(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponseWithStoryPointsNav) mocker.patch("tableauscraper.api.setActiveStoryPoint", return_value=vqlCmdResponse) ts = TS() ts.loads(fakeUri) wb = ts.getWorkbook() storyWb = wb.goToStoryPoint(storyPointId=1) assert type(storyWb) is TableauWorkbook assert len(storyWb.worksheets) == 1
def test_getCsvData(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) mocker.patch("tableauscraper.api.getCsvData", return_value=tableauDownloadableCsvData) ts = TS() ts.loads(fakeUri) wb = ts.getWorkbook() data = wb.getCsvData("[WORKSHEET1]") assert data.shape[0] == 3 assert data.shape[1] == 1
def test_getDownloadableUnderlyingData(mocker: MockerFixture) -> None: mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) mocker.patch("tableauscraper.api.getDownloadableUnderlyingData", return_value=json.loads(tableauDownloadableUnderlyingData)) ts = TS() ts.loads(fakeUri) wb = ts.getWorkbook() data = wb.getWorksheet("[WORKSHEET1]").getDownloadableUnderlyingData() assert data.shape[0] == 200 assert data.shape[1] == 42
def main(): logging.basicConfig(filename='app.log', filemode='a', format='%(asctime)s - %(message)s', level=logging.INFO) url = "https://results.mo.gov/t/DESE/views/LearningMethods-Opening-Enrollment-Public/LearningMethods?:showAppBanner=false&:display_count=n&:showVizHome=n&:origin=viz_share_link%22%20title%3D%22open%20Enrollment%20Public&:isGuestRedirectFromVizportal=y&:embed=y&:toolbar=no" ts = TS() ts.loads(url) logging.info("Received MO Data", exc_info=False) ws = ts.getWorksheet("MOmap-AllCategory") ws.data.to_csv("out/MO_" + datetime.now().strftime('%Y%m%d') + ".csv") logging.info("Wrote MO Data", exc_info=False)
def test_getDownloadableUnderlyingData(httpserver, mocker: MockerFixture): mocker.patch("tableauscraper.api.getTableauViz", return_value=tableauVizHtmlResponse) mocker.patch("tableauscraper.api.getTableauData", return_value=tableauDataResponse) ts = TS() ts.loads(fakeUri) httpserver.serve_content(json.dumps(tableauDownloadableUnderlyingData)) ts.host = httpserver.url + "/" ts.tableauData = {"vizql_root": "", "sessionid": "", "sheetId": ""} result = api.getDownloadableUnderlyingData(scraper=ts, worksheetName="", dashboardName="", numRows=200) assert result == tableauDownloadableUnderlyingData
def main(): logging.basicConfig(filename='app.log', filemode='a', format='%(asctime)s - %(message)s', level=logging.INFO) url = "https://public.tableau.com/views/R2L2020/Division" ts = TS() ts.loads(url) workbook = ts.getWorkbook() logging.info("Received Virginia Data", exc_info=False) for t in workbook.worksheets: df = t.data df.to_csv("out/VA_" + datetime.now().strftime('%Y%m%d') + ".csv") logging.info("Wrote Virginia Data", exc_info=False)