Ejemplo n.º 1
0
def test_vis_collection_set_intent():
    df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true")
    vlist = VisList(["Height", "SportType=Ice", "?"], df)
    vlist.set_intent(["Height", "SportType=Boat", "?"])
    for v in vlist._collection:
        filter_vspec = list(filter(lambda x: x.channel == "", v._inferred_intent))[0]
        assert filter_vspec.value == "Boat"
Ejemplo n.º 2
0
    def exported(self) -> Union[Dict[str, VisList], VisList]:
        """
		Get selected visualizations as exported Vis List

		Notes
		-----
		Convert the _exportedVisIdxs dictionary into a programmable VisList
		Example _exportedVisIdxs : 
			{'Correlation': [0, 2], 'Occurrence': [1]}
		indicating the 0th and 2nd vis from the `Correlation` tab is selected, and the 1st vis from the `Occurrence` tab is selected.
		
		Returns
		-------
		Union[Dict[str,VisList], VisList]
			When there are no exported vis, return empty list -> []
			When all the exported vis is from the same tab, return a VisList of selected visualizations. -> VisList(v1, v2...)
			When the exported vis is from the different tabs, return a dictionary with the action name as key and selected visualizations in the VisList. -> {"Enhance": VisList(v1, v2...), "Filter": VisList(v5, v7...), ..}
		"""
        if not hasattr(self, "_widget"):
            warnings.warn(
                "\nNo widget attached to the dataframe."
                "Please assign dataframe to an output variable.\n"
                "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips",
                stacklevel=2)
            return []
        exported_vis_lst = self._widget._exportedVisIdxs
        exported_vis = []
        if (exported_vis_lst == {}):
            warnings.warn(
                "\nNo visualization selected to export.\n"
                "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips",
                stacklevel=2)
            return []
        if len(exported_vis_lst) == 1 and "currentVis" in exported_vis_lst:
            return self.current_vis
        elif len(exported_vis_lst) > 1:
            exported_vis = {}
            if ("currentVis" in exported_vis_lst):
                exported_vis["Current Vis"] = self.current_vis
            for export_action in exported_vis_lst:
                if (export_action != "currentVis"):
                    exported_vis[export_action] = VisList(
                        list(
                            map(self.recommendation[export_action].__getitem__,
                                exported_vis_lst[export_action])))
            return exported_vis
        elif len(exported_vis_lst) == 1 and ("currentVis"
                                             not in exported_vis_lst):
            export_action = list(exported_vis_lst.keys())[0]
            exported_vis = VisList(
                list(
                    map(self.recommendation[export_action].__getitem__,
                        exported_vis_lst[export_action])))
            return exported_vis
        else:
            warnings.warn(
                "\nNo visualization selected to export.\n"
                "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips",
                stacklevel=2)
            return []
Ejemplo n.º 3
0
def test_specified_vis_collection(global_var):
    sql_df = lux.LuxSQLTable(table_name="cars")

    vlst = VisList(
        [
            lux.Clause(attribute="horsepower"),
            lux.Clause(attribute="brand"),
            lux.Clause(attribute="origin", value=["Japan", "USA"]),
        ],
        sql_df,
    )
    assert len(vlst) == 2

    vlst = VisList(
        [
            lux.Clause(attribute=["horsepower", "weight"]),
            lux.Clause(attribute="brand"),
            lux.Clause(attribute="origin", value=["Japan", "USA"]),
        ],
        sql_df,
    )
    assert len(vlst) == 4

    # test if z axis has been filtered correctly
    chart_titles = [vis.title for vis in vlst]
    assert "origin = USA" and "origin = Japan" in chart_titles
    assert "origin = Europe" not in chart_titles
Ejemplo n.º 4
0
def test_parse(global_var):
    lux.config.set_executor_type("Pandas")
    df = pytest.car_df
    vlst = VisList(
        [lux.Clause("Origin=?"),
         lux.Clause(attribute="MilesPerGal")], df)
    assert len(vlst) == 3

    df = pytest.car_df
    vlst = VisList([lux.Clause("Origin=?"), lux.Clause("MilesPerGal")], df)
    assert len(vlst) == 3

    df = pd.read_csv("lux/data/car.csv")
    vlst = VisList([lux.Clause("Origin=?"), lux.Clause("MilesPerGal")], df)
    assert len(vlst) == 3

    connection = psycopg2.connect(
        "host=localhost dbname=postgres user=postgres password=lux")
    lux.config.set_SQL_connection(connection)
    sql_df = lux.LuxSQLTable(table_name="cars")
    vlst = VisList(
        [lux.Clause("origin=?"),
         lux.Clause(attribute="milespergal")], sql_df)
    assert len(vlst) == 3

    connection = psycopg2.connect(
        "host=localhost dbname=postgres user=postgres password=lux")
    lux.config.set_SQL_connection(connection)
    sql_df = lux.LuxSQLTable(table_name="cars")
    vlst = VisList([lux.Clause("origin=?"), lux.Clause("milespergal")], sql_df)
    assert len(vlst) == 3
Ejemplo n.º 5
0
def test_underspecified_vis_collection_zval(global_var):
    lux.config.set_executor_type("Pandas")
    # check if the number of charts is correct
    df = pytest.car_df
    vlst = VisList(
        [
            lux.Clause(attribute="Origin", filter_op="=", value="?"),
            lux.Clause(attribute="MilesPerGal"),
        ],
        df,
    )
    assert len(vlst) == 3

    # does not work
    # df = pd.read_csv("lux/data/car.csv")
    # vlst = VisList([lux.Clause(attribute = ["Origin","Cylinders"], filter_op="=",value="?"),lux.Clause(attribute = ["Horsepower"]),lux.Clause(attribute = "Weight")],df)
    # assert len(vlst) == 8

    connection = psycopg2.connect(
        "host=localhost dbname=postgres user=postgres password=lux")
    lux.config.set_SQL_connection(connection)
    sql_df = lux.LuxSQLTable(table_name="cars")
    vlst = VisList(
        [
            lux.Clause(attribute="origin", filter_op="=", value="?"),
            lux.Clause(attribute="milespergal"),
        ],
        sql_df,
    )
    assert len(vlst) == 3
Ejemplo n.º 6
0
def test_specified_vis_collection():
    url = 'https://github.com/lux-org/lux-datasets/blob/master/data/cars.csv?raw=true'
    df = pd.read_csv(url)
    df["Year"] = pd.to_datetime(
        df["Year"],
        format='%Y')  # change pandas dtype for the column "Year" to datetype

    vlst = VisList([
        lux.Clause(attribute="Horsepower"),
        lux.Clause(attribute="Brand"),
        lux.Clause(attribute="Origin", value=["Japan", "USA"])
    ], df)
    assert len(vlst) == 2

    vlst = VisList([
        lux.Clause(attribute=["Horsepower", "Weight"]),
        lux.Clause(attribute="Brand"),
        lux.Clause(attribute="Origin", value=["Japan", "USA"])
    ], df)
    assert len(vlst) == 4

    # test if z axis has been filtered correctly
    chart_titles = [vis.title for vis in vlst]
    assert "Origin = USA" and "Origin = Japan" in chart_titles
    assert "Origin = Europe" not in chart_titles
Ejemplo n.º 7
0
def test_specified_vis_collection(global_var):
    df = pytest.car_df
    # change pandas dtype for the column "Year" to datetype
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")

    vlst = VisList(
        [
            lux.Clause(attribute="Horsepower"),
            lux.Clause(attribute="Brand"),
            lux.Clause(attribute="Origin", value=["Japan", "USA"]),
        ],
        df,
    )
    assert len(vlst) == 2

    vlst = VisList(
        [
            lux.Clause(attribute=["Horsepower", "Weight"]),
            lux.Clause(attribute="Brand"),
            lux.Clause(attribute="Origin", value=["Japan", "USA"]),
        ],
        df,
    )
    assert len(vlst) == 4

    # test if z axis has been filtered correctly
    chart_titles = [vis.title for vis in vlst]
    assert "Origin = USA" and "Origin = Japan" in chart_titles
    assert "Origin = Europe" not in chart_titles
Ejemplo n.º 8
0
def test_parse(global_var):
    sql_df = lux.LuxSQLTable(table_name="cars")
    vlst = VisList([lux.Clause("origin=?"), lux.Clause(attribute="milespergal")], sql_df)
    assert len(vlst) == 3

    sql_df = lux.LuxSQLTable(table_name="cars")
    vlst = VisList([lux.Clause("origin=?"), lux.Clause("milespergal")], sql_df)
    assert len(vlst) == 3
Ejemplo n.º 9
0
def test_vis_collection_set_intent(global_var):
    df = pytest.olympic
    vlist = VisList(["Height", "SportType=Ice", "?"], df)
    vlist.set_intent(["Height", "SportType=Boat", "?"])
    for v in vlist._collection:
        filter_vspec = list(filter(lambda x: x.channel == "", v._inferred_intent))[0]
        assert filter_vspec.value == "Boat"
    df.clear_intent()
Ejemplo n.º 10
0
def test_vis_collection_set_intent():
    df = pd.read_csv("lux/data/olympic.csv")
    from lux.vis.VisList import VisList
    vc = VisList(["Height", "SportType=Ice", "?"], df)
    vc.set_intent(["Height", "SportType=Boat", "?"])
    for v in vc.collection:
        filter_vspec = list(
            filter(lambda x: x.channel == "", v._inferred_intent))[0]
        assert filter_vspec.value == "Boat"
Ejemplo n.º 11
0
def test_vis_collection():
    df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true")
    vlist = VisList(["Height", "SportType=Ball", "?"], df)
    vis_with_year = list(filter(lambda x: x.get_attr_by_attr_name("Year") != [], vlist))[0]
    assert vis_with_year.get_attr_by_channel("x")[0].attribute == "Year"
    # remove 1 for vis with same filter attribute and remove 1 vis with for same attribute
    assert len(vlist) == len(df.columns) - 1 - 1
    vlist = VisList(["Height", "?"], df)
    assert len(vlist) == len(df.columns) - 1  # remove 1 for vis with for same attribute
Ejemplo n.º 12
0
def test_vis_collection(global_var):
    df = pytest.olympic
    vlist = VisList(["Height", "SportType=Ball", "?"], df)
    vis_with_year = list(filter(lambda x: x.get_attr_by_attr_name("Year") != [], vlist))[0]
    assert vis_with_year.get_attr_by_channel("x")[0].attribute == "Year"
    # remove 1 for vis with same filter attribute and remove 1 vis with for same attribute
    assert len(vlist) == len(df.columns) - 1 - 1
    vlist = VisList(["Height", "?"], df)
    assert len(vlist) == len(df.columns) - 1  # remove 1 for vis with for same attribute
Ejemplo n.º 13
0
def test_parse():
    df = pd.read_csv("lux/data/car.csv")
    vlst = VisList(
        [lux.Clause("Origin=?"),
         lux.Clause(attribute="MilesPerGal")], df)
    assert len(vlst) == 3

    df = pd.read_csv("lux/data/car.csv")
    vlst = VisList([lux.Clause("Origin=?"), lux.Clause("MilesPerGal")], df)
    assert len(vlst) == 3
Ejemplo n.º 14
0
def test_parse(global_var):
    df = pytest.car_df
    vlst = VisList(
        [lux.Clause("Origin=?"),
         lux.Clause(attribute="MilesPerGal")], df)
    assert len(vlst) == 3

    df = pytest.car_df
    vlst = VisList([lux.Clause("Origin=?"), lux.Clause("MilesPerGal")], df)
    assert len(vlst) == 3
Ejemplo n.º 15
0
 def random_categorical(ldf):
     intent = [lux.Clause("?", data_type="nominal")]
     vlist = VisList(intent, ldf)
     for vis in vlist:
         vis.score = 10
     vlist = vlist.topK(15)
     return {
         "action": "bars",
         "description": "Random list of Bar charts",
         "collection": vlist
     }
Ejemplo n.º 16
0
def test_vis_collection():
    df = pd.read_csv("lux/data/olympic.csv")
    from lux.vis.VisList import VisList
    vc = VisList(["Height", "SportType=Ball", "?"], df)
    vis_with_year = list(
        filter(lambda x: x.get_attr_by_attr_name("Year") != [], vc))[0]
    assert vis_with_year.get_attr_by_channel("x")[0].attribute == "Year"
    assert len(vc) == len(
        df.columns
    ) - 1 - 1  #remove 1 for vis with same filter attribute and remove 1 vis with for same attribute
    vc = VisList(["Height", "?"], df)
    assert len(vc) == len(
        df.columns) - 1  #remove 1 for vis with for same attribute
Ejemplo n.º 17
0
def test_parse(global_var):
    lux.config.set_executor_type("Pandas")
    df = pytest.car_df
    vlst = VisList(
        [lux.Clause("Origin=?"),
         lux.Clause(attribute="MilesPerGal")], df)
    assert len(vlst) == 3

    df = pytest.car_df
    vlst = VisList([lux.Clause("Origin=?"), lux.Clause("MilesPerGal")], df)
    assert len(vlst) == 3

    df = pd.read_csv("lux/data/car.csv")
    vlst = VisList([lux.Clause("Origin=?"), lux.Clause("MilesPerGal")], df)
    assert len(vlst) == 3
Ejemplo n.º 18
0
    def remove_all_invalid(vis_collection: VisList) -> VisList:
        """
        Given an expanded vis list, remove all visualizations that are invalid.
        Currently, the invalid visualizations are ones that do not contain:
        - two of the same attribute,
        - more than two temporal attributes,
        - no overlapping attributes (same filter attribute and visualized attribute),
        - more than 1 temporal attribute with 2 or more measures
        Parameters
        ----------
        vis_collection : list[lux.vis.Vis]
                empty list that will be populated with specified lux.Vis objects.
        Returns
        -------
        lux.vis.VisList
                vis list with compiled lux.Vis objects.
        """
        new_vc = []
        for vis in vis_collection:
            num_temporal_specs = 0
            attribute_set = set()
            for clause in vis._inferred_intent:
                attribute_set.add(clause.attribute)
                if clause.data_type == "temporal":
                    num_temporal_specs += 1
            all_distinct_specs = 0 == len(
                vis._inferred_intent) - len(attribute_set)
            if (num_temporal_specs < 2 and all_distinct_specs
                    and not (vis._nmsr == 2 and num_temporal_specs == 1)):
                new_vc.append(vis)
            # else:
            # 	warnings.warn("\nThere is more than one duplicate attribute specified in the intent.\nPlease check your intent specification again.")

        return VisList(new_vc)
Ejemplo n.º 19
0
    def get_graphing_code(self, change):
        import inspect

        full_screen_vis_idx = self._widget.selectedFullScreenIndex
        full_screen_action = list(self._widget.selectedFullScreenIndex.keys())[0]
        # Using visList to support eventual full view display of multiple graphs
        full_screen_vis = VisList(
            list(
                map(
                    self._recommendation[full_screen_action].__getitem__,
                    full_screen_vis_idx[full_screen_action],
                )
            )
        )
        if lux.config.plotting_backend == "vegalite":
            self._widget.unobserve(self.apply_full_view_changes)
            self._widget.visGraphCode = (
                "import pandas as pd\n" + full_screen_vis[0].get_Altair_vis_code()
            )
            self._widget.visStyleCode = lux.config.plotting_style_code
            self._widget.observe(self.apply_full_view_changes, names="visGraphCode")
            self._widget.observe(self.apply_full_view_changes, names="visStyleCode")
            self._widget.observe(self.change_style_config, names="configPlottingStyle")
        else:
            self._widget.visGraphCode = (
                "# " + lux.config.plotting_backend + " not supported in full screen yet"
            )
            self._widget.visStyleCode = ""
Ejemplo n.º 20
0
def column_group(ldf):
    recommendation = {
        "action":
        "Column Groups",
        "description":
        "Shows charts of possible visualizations with respect to the column-wise index."
    }
    collection = []
    ldf_flat = ldf
    if isinstance(ldf.columns, pd.DatetimeIndex):
        ldf_flat.columns = ldf_flat.columns.format()
    ldf_flat = ldf_flat.reset_index(
    )  #use a single shared ldf_flat so that metadata doesn't need to be computed for every vis
    if (ldf.index.nlevels == 1):
        index_column_name = ldf.index.name
        if isinstance(ldf.columns, pd.DatetimeIndex):
            ldf.columns = ldf.columns.to_native_types()
        for attribute in ldf.columns:
            vis = Vis([
                index_column_name,
                lux.Clause(str(attribute), aggregation=None)
            ], ldf_flat)
            collection.append(vis)
    vlst = VisList(collection)
    # Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated ldf

    recommendation["collection"] = vlst
    return recommendation
Ejemplo n.º 21
0
def row_group(ldf):
    recommendation = {
        "action":
        "Row Groups",
        "description":
        "Shows charts of possible visualizations with respect to the row-wise index.",
    }
    collection = []

    if ldf.index.nlevels == 1:
        if ldf.columns.name is not None:
            dim_name = ldf.columns.name
        else:
            dim_name = "index"
        for row_id in range(len(ldf)):
            row = ldf.iloc[row_id, ]
            rowdf = row.reset_index()
            # if (dim_name =="index"): #TODO: need to change this to auto-detect
            # 	rowdf.data_type_lookup["index"]="nominal"
            # 	rowdf.data_model_lookup["index"]="dimension"
            # 	rowdf.cardinality["index"]=len(rowdf)
            # if isinstance(ldf.columns,pd.DatetimeIndex):
            # 	rowdf.data_type_lookup[dim_name]="temporal"
            vis = Vis([
                dim_name,
                lux.Clause(row.name, data_model="measure", aggregation=None)
            ], rowdf)
            collection.append(vis)
    vlst = VisList(collection)
    # Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated data

    recommendation["collection"] = vlst
    return recommendation
Ejemplo n.º 22
0
 def show_all_column_vis(self):
     if len(self.columns) > 1 and len(
             self.columns) < 4 and self.intent == [] or self.intent is None:
         vis = Vis(list(self.columns), self)
         if vis.mark != "":
             vis._all_column = True
             self.current_vis = VisList([vis])
Ejemplo n.º 23
0
    def remove_all_invalid(vis_collection: VisList) -> VisList:
        """
		Given an expanded vis list, remove all visualizations that are invalid.
		Currently, the invalid visualizations are ones that contain two of the same attribute, no more than two temporal attributes, or overlapping attributes (same filter attribute and visualized attribute).
		Parameters
		----------
		vis_collection : list[lux.vis.Vis]
			empty list that will be populated with specified lux.Vis objects.
		Returns
		-------
		lux.vis.VisList
			vis list with compiled lux.Vis objects.
		"""
        new_vc = []
        for vis in vis_collection:
            num_temporal_specs = 0
            attribute_set = set()
            for clause in vis._inferred_intent:
                attribute_set.add(clause.attribute)
                if clause.data_type == "temporal":
                    num_temporal_specs += 1
            all_distinct_specs = 0 == len(
                vis._inferred_intent) - len(attribute_set)
            if num_temporal_specs < 2 and all_distinct_specs:
                new_vc.append(vis)

        return VisList(new_vc)
Ejemplo n.º 24
0
def test_underspecified_vis_collection_zval():
    # check if the number of charts is correct
    df = pd.read_csv("lux/data/car.csv")
    vlst = VisList([
        lux.Clause(attribute="Origin", filter_op="=", value="?"),
        lux.Clause(attribute="MilesPerGal")
    ], df)
    assert len(vlst) == 3
Ejemplo n.º 25
0
def test_specified_channel_enforced_vis_collection(global_var):
    sql_df = lux.LuxSQLTable(table_name="cars")

    visList = VisList(
        [lux.Clause(attribute="?"), lux.Clause(attribute="milespergal", channel="x")],
        sql_df,
    )
    for vis in visList:
        check_attribute_on_channel(vis, "milespergal", "x")
Ejemplo n.º 26
0
def test_underspecified_vis_collection_zval(global_var):
    sql_df = lux.LuxSQLTable(table_name="cars")
    vlst = VisList(
        [
            lux.Clause(attribute="origin", filter_op="=", value="?"),
            lux.Clause(attribute="milespergal"),
        ],
        sql_df,
    )
    assert len(vlst) == 3
Ejemplo n.º 27
0
def test_autoencoding_scatter(global_var):
    lux.config.set_executor_type("Pandas")
    # No channel specified
    df = pytest.car_df
    # change pandas dtype for the column "Year" to datetype
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")
    vis = Vis(
        [lux.Clause(attribute="MilesPerGal"),
         lux.Clause(attribute="Weight")], df)
    check_attribute_on_channel(vis, "MilesPerGal", "x")
    check_attribute_on_channel(vis, "Weight", "y")

    # Partial channel specified
    vis = Vis(
        [
            lux.Clause(attribute="MilesPerGal", channel="y"),
            lux.Clause(attribute="Weight"),
        ],
        df,
    )
    check_attribute_on_channel(vis, "MilesPerGal", "y")
    check_attribute_on_channel(vis, "Weight", "x")

    # Full channel specified
    vis = Vis(
        [
            lux.Clause(attribute="MilesPerGal", channel="y"),
            lux.Clause(attribute="Weight", channel="x"),
        ],
        df,
    )
    check_attribute_on_channel(vis, "MilesPerGal", "y")
    check_attribute_on_channel(vis, "Weight", "x")
    # Duplicate channel specified
    with pytest.raises(ValueError):
        # Should throw error because there should not be columns with the same channel specified
        df.set_intent([
            lux.Clause(attribute="MilesPerGal", channel="x"),
            lux.Clause(attribute="Weight", channel="x"),
        ])
    df.clear_intent()

    connection = psycopg2.connect(
        "host=localhost dbname=postgres user=postgres password=lux")
    lux.config.set_SQL_connection(connection)
    sql_df = lux.LuxSQLTable(table_name="cars")
    visList = VisList(
        [
            lux.Clause(attribute="?"),
            lux.Clause(attribute="milespergal", channel="x")
        ],
        sql_df,
    )
    for vis in visList:
        check_attribute_on_channel(vis, "milespergal", "x")
Ejemplo n.º 28
0
def test_vis_list_set_intent():
    from lux.vis.VisList import VisList
    df = pd.read_csv("lux/data/car.csv")
    vislist = VisList(["Horsepower","?"],df)
    vislist._repr_html_()
    for vis in vislist: assert vis.get_attr_by_attr_name("Horsepower")!=[]
    vislist.intent = ["Weight","?"]
    vislist._repr_html_()
    for vis in vislist: assert vis.get_attr_by_attr_name("Weight")!=[]
Ejemplo n.º 29
0
def test_underspecified_vis_collection_zval(global_var):
    # check if the number of charts is correct
    df = pytest.car_df
    vlst = VisList(
        [
            lux.Clause(attribute="Origin", filter_op="=", value="?"),
            lux.Clause(attribute="MilesPerGal"),
        ],
        df,
    )
    assert len(vlst) == 3
Ejemplo n.º 30
0
def test_selection():
    tbl = lux.LuxSQLTable()
    tbl.set_SQL_table("cars")

    intent = [
        lux.Clause(attribute=["horsepower", "weight", "acceleration"]),
        lux.Clause(attribute="year"),
    ]
    vislist = VisList(intent, tbl)
    assert all([type(vis.data) == lux.core.frame.LuxDataFrame for vis in vislist])
    assert all(vislist[2].data.columns == ["year", "acceleration"])