Ejemplo n.º 1
0
def parse_tableau(file_contents):
    """
    This is a weird blob containing *two* JSON encoded dictionaries.
    Each is preceded by its length in bytes, but using a regex (instead of counting) is simple enough.
    Follows the approach in tableau-scraping. See the links below:
    https://github.com/bertrandmartel/tableau-scraping/blob/9dba25af057ac29f921a75df374943060ab79b0a/tableauscraper/TableauScraper.py#L77-L84
    https://github.com/bertrandmartel/tableau-scraping/blob/9dba25af057ac29f921a75df374943060ab79b0a/tableauscraper/dashboard.py#L35
    """
    info_and_data = re.search(r"\d+;({.*})\d+;({.*})", file_contents,
                              re.MULTILINE)
    data = json.loads(info_and_data.group(2))
    presModelMap = data["secondaryInfo"]["presModelMap"]
    dataSegments = presModelMap["dataDictionary"]["presModelHolder"][
        "genDataDictionaryPresModel"]["dataSegments"]
    full_data = utils.getDataFull(presModelMap, dataSegments)
    indices_info = utils.getIndicesInfo(presModelMap, "List")
    data_dict = utils.getData(full_data, indices_info)
    # Transpose columns to rows (tableau-scraping uses pandas, but we don't strictly need to do that)
    # i.e. {'a': [a1, a2, a3], 'b': [b1, b2, b3]} --> [{'a': a1, 'b': b1}, {'a': a2, 'b': b2}, {'a': a3, 'b': b3}]
    transposed_data = map(
        dict,
        itertools.starmap(
            zip,
            zip(itertools.repeat(data_dict.keys()), zip(*data_dict.values()))),
    )
    return (tableau_item_to_parsed_site(entry) for entry in transposed_data)
Ejemplo n.º 2
0
def parse_tableau(file_contents):
    """
    This is a weird blob containing *two* JSON encoded dictionaries.
    Each is preceded by its length in bytes, but using a regex (instead of counting) is simple enough.
    Follows the approach in tableau-scraping. See the links below:
    https://github.com/bertrandmartel/tableau-scraping/blob/9dba25af057ac29f921a75df374943060ab79b0a/tableauscraper/TableauScraper.py#L77-L84
    https://github.com/bertrandmartel/tableau-scraping/blob/9dba25af057ac29f921a75df374943060ab79b0a/tableauscraper/dashboard.py#L35
    """
    info_and_data = re.search(r"\d+;({.*})\d+;({.*})", file_contents,
                              re.MULTILINE)
    data = json.loads(info_and_data.group(2))
    presModelMap = data["secondaryInfo"]["presModelMap"]
    dataSegments = presModelMap["dataDictionary"]["presModelHolder"][
        "genDataDictionaryPresModel"]["dataSegments"]
    full_data = utils.getDataFull(presModelMap, dataSegments)
    indices_info = utils.getIndicesInfo(presModelMap, "Vaccination Sites")
    data_dict = utils.getData(full_data, indices_info)
    num_entries = len(data_dict["Site-value"])
    # Transpose columns to rows (tableau-scraping uses pandas, but we don't strictly need to do that)
    # Rows are actually duplicated; some have map, some have website.
    entries = []
    for i in range(0, num_entries, 2):
        main_data = {k: v[i] for (k, v) in data_dict.items()}
        extra_data = {
            "Dimension-value": data_dict["Dimension-value"][i + 1],
            "Value-alias": data_dict["Value-alias"][i + 1],
        }
        entries.append((main_data, extra_data))
    return [tableau_item_to_parsed_site(entry) for entry in entries]
Ejemplo n.º 3
0
def parse_tableau(file_contents):
    """
    This is a weird blob containing *two* JSON encoded dictionaries.
    Each is preceded by its length in bytes, but using a regex (instead of counting) is simple enough.
    Follows the approach in tableau-scraping. See the links below:
    https://github.com/bertrandmartel/tableau-scraping/blob/9dba25af057ac29f921a75df374943060ab79b0a/tableauscraper/TableauScraper.py#L77-L84
    https://github.com/bertrandmartel/tableau-scraping/blob/9dba25af057ac29f921a75df374943060ab79b0a/tableauscraper/dashboard.py#L35
    """
    info_and_data = re.search(r"\d+;({.*})\d+;({.*})", file_contents,
                              re.MULTILINE)
    data = json.loads(info_and_data.group(2))
    presModelMap = data["secondaryInfo"]["presModelMap"]
    dataSegments = presModelMap["dataDictionary"]["presModelHolder"][
        "genDataDictionaryPresModel"]["dataSegments"]
    full_data = utils.getDataFull(presModelMap, dataSegments)
    indices_info = utils.getIndicesInfo(presModelMap, "Vaccination Sites")
    data_dict = utils.getData(full_data, indices_info)
    # Transpose columns to rows (tableau-scraping uses pandas, but we don't strictly need to do that)
    # i.e. {'a': [a1, a2, a3], 'b': [b1, b2, b3]} --> [{'a': a1, 'b': b1}, {'a': a2, 'b': b2}, {'a': a3, 'b': b3}]
    transposed_data = map(
        dict,
        itertools.starmap(
            zip,
            zip(itertools.repeat(data_dict.keys()), zip(*data_dict.values()))),
    )
    # Data contains at least one bad value; filter it out. See https://github.com/CAVaccineInventory/vaccine-feed-ingest/issues/621
    filtered_transposed_data = (row for row in transposed_data
                                if row["Site-value"] != "%null%")
    # Adjacent rows are actually duplicates; some have map, some have website. Combine into one.
    doubled_filtered_transposed_data = zip(filtered_transposed_data,
                                           filtered_transposed_data)
    return (tableau_item_to_parsed_site(entry)
            for entry in doubled_filtered_transposed_data)
Ejemplo n.º 4
0
def test_getData():
    presModel = utils.getPresModelVizData(data)
    dataFull = utils.getDataFull(presModel, {})
    indicesInfo = utils.getIndicesInfo(presModel, "[WORKSHEET1]")
    frameData = utils.getData(dataFull, indicesInfo)
    assert len(frameData.keys()) == 2
    assert "[FIELD1]-value" in frameData
    assert "[FIELD2]-alias" in frameData
    assert len(frameData["[FIELD1]-value"]) == 4
    assert len(frameData["[FIELD2]-alias"]) == 4
    assert frameData["[FIELD1]-value"] == ["2", "3", "4", "5"]
    assert frameData["[FIELD2]-alias"] == ["6", "7", "8", "9"]
Ejemplo n.º 5
0
def test_getIndicesInfo():
    presModel = utils.getPresModelVizData(data)
    indicesInfo = utils.getIndicesInfo(presModel, "[WORKSHEET1]")
    assert len(indicesInfo) == 2
    assert "fieldCaption" in indicesInfo[0]
    assert "valueIndices" in indicesInfo[0]
    assert "aliasIndices" in indicesInfo[0]
    assert "dataType" in indicesInfo[0]
    assert "paneIndices" in indicesInfo[0]
    assert "columnIndices" in indicesInfo[0]
    assert len(indicesInfo[0]["valueIndices"]) == 4
    assert len(indicesInfo[0]["aliasIndices"]) == 0
    assert len(indicesInfo[1]["valueIndices"]) == 0
    assert len(indicesInfo[1]["aliasIndices"]) == 4
    assert indicesInfo[0]["fieldCaption"] == "[FIELD1]"
    assert indicesInfo[1]["fieldCaption"] == "[FIELD2]"

    # check noSelectFilter parameter
    indicesInfo = utils.getIndicesInfo(
        presModel, "[WORKSHEET1]", noSelectFilter=False)
    assert len(indicesInfo) == 1
Ejemplo n.º 6
0
def getWorksheet(TS, data, info, worksheet) -> TableauWorksheet:

    presModelMap = data["secondaryInfo"]["presModelMap"]

    indicesInfo = utils.getIndicesInfo(presModelMap, worksheet)
    dataFull = utils.getDataFull(presModelMap)
    frameData = utils.getData(dataFull, indicesInfo)

    df = pd.DataFrame.from_dict(frameData, orient="index").fillna(0).T

    return TableauWorksheet(
        scraper=TS,
        originalData=data,
        originalInfo=info,
        worksheetName=worksheet,
        dataFrame=df,
    )
Ejemplo n.º 7
0
def get(TS, data, info, logger):
    worksheets = utils.selectWorksheet(data, logger, single=True)
    if len(worksheets) == 0:
        return TableauDashboard(scraper=TS,
                                originalData=data,
                                originalInfo=info,
                                data=[])
    selectedWorksheet = worksheets[0]

    presModel = utils.getPresModelVizData(data)
    result = utils.getIndicesInfo(presModel,
                                  selectedWorksheet,
                                  noSelectFilter=False)

    for idx, t in enumerate(result):
        logger.info(f"[{idx}] {t['fieldCaption']}")

    selected = input(f"select field by index : ")

    if (selected is None) or (selected == ""):
        raise (Exception("you must select at least one field"))
    field = result[int(selected)]
    logger.info(f"you have selected {field['fieldCaption']}")

    dataFull = utils.getDataFull(presModel)
    frameData = utils.getData(dataFull, [field])
    frameDataKeys = list(frameData.keys())

    if len(frameDataKeys) == 0:
        raise (Exception("no data extracted"))

    data = frameData[frameDataKeys[0]]
    for idx, t in enumerate(data):
        logger.info(f"[{idx}] {t}")

    selected = input(f"select value by index : ")
    if (selected is None) or (selected == ""):
        raise (Exception("you must select at least one value"))
    value = data[int(selected)]
    logger.info(f"you have selected {value}")

    r = api.select(TS, selectedWorksheet, [int(selected) + 1])
    return dashboard.getCmdResponse(TS, r, logger)
Ejemplo n.º 8
0
def getWorksheet(TS, data, info, worksheet) -> TableauWorksheet:

    presModelMap = utils.getPresModelVizData(data)
    if presModelMap is None:
        presModelMap = utils.getPresModelVizInfo(info)
        indicesInfo = utils.getIndicesInfoStoryPoint(presModelMap, worksheet)

        if "dataDictionary" not in presModelMap:
            presModelMap = utils.getPresModelVizDataWithoutViz(data)

        dataFull = utils.getDataFull(presModelMap, TS.dataSegments)
    else:
        indicesInfo = utils.getIndicesInfo(presModelMap, worksheet)
        dataFull = utils.getDataFull(presModelMap, TS.dataSegments)

    frameData = utils.getData(dataFull, indicesInfo)
    df = pd.DataFrame.from_dict(frameData, orient="index").fillna(0).T

    return TableauWorksheet(scraper=TS,
                            originalData=data,
                            originalInfo=info,
                            worksheetName=worksheet,
                            dataFull=dataFull,
                            dataFrame=df)