Beispiel #1
0
def test_filter_aggregation_fillzero_aligned(global_var):
    tbl = lux.LuxSQLTable()
    tbl.set_SQL_table("cars")

    intent = [
        lux.Clause(attribute="cylinders"),
        lux.Clause(attribute="milespergal"),
        lux.Clause("origin=Japan"),
    ]
    vis = Vis(intent, tbl)
    result = vis.data
    assert result[result["cylinders"] == 5]["milespergal"].values[0] == 0
    assert result[result["cylinders"] == 8]["milespergal"].values[0] == 0
Beispiel #2
0
def test_vis_collection_via_list_of_vis(global_var):
    df = pytest.olympic
    # change pandas dtype for the column "Year" to datetype
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")
    from lux.vis.VisList import VisList
    from lux.vis.Vis import Vis

    vcLst = []
    for attribute in ["Sport", "Year", "Height", "HostRegion", "SportType"]:
        vis = Vis([lux.Clause("Weight"), lux.Clause(attribute)])
        vcLst.append(vis)
    vlist = VisList(vcLst, df)
    assert len(vlist) == 5
Beispiel #3
0
def test_lazy_execution(global_var):
    tbl = lux.LuxSQLTable()
    tbl.set_SQL_table("cars")

    intent = [
        lux.Clause(attribute="horsepower", aggregation="mean"),
        lux.Clause(attribute="origin"),
    ]
    vis = Vis(intent)
    # Check data field in vis is empty before calling executor
    assert vis.data is None
    SQLExecutor.execute([vis], tbl)
    assert type(vis.data) == lux.core.frame.LuxDataFrame
Beispiel #4
0
def test_apply_nan_filter():
    from lux.vis.Vis import Vis

    import numpy as np

    dataset = [
        {"fully_nan": np.nan, "some_nan": 3.0, "some_nan2": np.nan},
        {"fully_nan": np.nan, "some_nan": 15.0, "some_nan2": 3.0},
        {"fully_nan": np.nan, "some_nan": np.nan, "some_nan2": 3.0},
        {"fully_nan": np.nan, "some_nan": 7.0, "some_nan2": 0.0},
        {"fully_nan": np.nan, "some_nan": 2.0, "some_nan2": 2.0},
        {"fully_nan": np.nan, "some_nan": 3.0, "some_nan2": np.nan},
        {"fully_nan": np.nan, "some_nan": 1.0, "some_nan2": 1.0},
        {"fully_nan": np.nan, "some_nan": 1.0, "some_nan2": 1.0},
        {"fully_nan": np.nan, "some_nan": 2.0, "some_nan2": 0.0},
        {"fully_nan": np.nan, "some_nan": 11.0, "some_nan2": 0.0},
    ]
    test = pd.DataFrame(dataset)

    vis = Vis(["some_nan", "some_nan2=nan"], test)
    vis._ipython_display_()
    assert vis.mark == "bar"
Beispiel #5
0
    def execute_scatter(view: Vis, tbl: LuxSQLTable):
        """
        Given a scatterplot vis and a Lux Dataframe, fetch the data required to render the vis.
        1) Generate WHERE clause for the SQL query
        2) Check number of datapoints to be included in the query
        3) If the number of datapoints exceeds 10000, perform a random sample from the original data
        4) Query datapoints needed for the scatterplot visualization
        5) return a DataFrame with relevant results

        Parameters
        ----------
        vislist: list[lux.Vis]
            vis list that contains lux.Vis objects for visualization.
        tbl : lux.core.frame
            LuxSQLTable with specified intent.

        Returns
        -------
        None
        """

        attributes = set([])
        for clause in view._inferred_intent:
            if clause.attribute:
                if clause.attribute != "Record":
                    attributes.add(clause.attribute)
        where_clause, filterVars = SQLExecutor.execute_filter(view)

        length_query = pandas.read_sql(
            "SELECT COUNT(1) as length FROM {} {}".format(tbl.table_name, where_clause),
            lux.config.SQLconnection,
        )

        def add_quotes(var_name):
            return '"' + var_name + '"'

        required_variables = attributes | set(filterVars)
        required_variables = map(add_quotes, required_variables)
        required_variables = ",".join(required_variables)
        row_count = list(
            pandas.read_sql(
                f"SELECT COUNT(*) FROM {tbl.table_name} {where_clause}",
                lux.config.SQLconnection,
            )["count"]
        )[0]
        if row_count > lux.config.sampling_cap:
            query = f"SELECT {required_variables} FROM {tbl.table_name} {where_clause} ORDER BY random() LIMIT 10000"
        else:
            query = "SELECT {} FROM {} {}".format(required_variables, tbl.table_name, where_clause)
        data = pandas.read_sql(query, lux.config.SQLconnection)
        view._vis_data = utils.pandas_to_lux(data)
Beispiel #6
0
def test_autoencoding_line_chart(global_var):
    df = pytest.car_df
    # change pandas dtype for the column "Year" to datetype
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")
    vis = Vis(
        [lux.Clause(attribute="Year"),
         lux.Clause(attribute="Acceleration")], df)
    check_attribute_on_channel(vis, "Year", "x")
    check_attribute_on_channel(vis, "Acceleration", "y")

    # Partial channel specified
    vis = Vis(
        [
            lux.Clause(attribute="Year", channel="y"),
            lux.Clause(attribute="Acceleration"),
        ],
        df,
    )
    check_attribute_on_channel(vis, "Year", "y")
    check_attribute_on_channel(vis, "Acceleration", "x")

    # Full channel specified
    vis = Vis(
        [
            lux.Clause(attribute="Year", channel="y"),
            lux.Clause(attribute="Acceleration", channel="x"),
        ],
        df,
    )
    check_attribute_on_channel(vis, "Year", "y")
    check_attribute_on_channel(vis, "Acceleration", "x")

    with pytest.raises(ValueError):
        # Should throw error because there should not be columns with the same channel specified
        df.set_intent([
            lux.Clause(attribute="Year", channel="x"),
            lux.Clause(attribute="Acceleration", channel="x"),
        ])
Beispiel #7
0
def test_refresh_inplace():
    df = pd.DataFrame(
        {
            "date": ["2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01"],
            "value": [10.5, 15.2, 20.3, 25.2],
        }
    )
    with pytest.warns(UserWarning, match="Lux detects that the attribute 'date' may be temporal."):
        df._repr_html_()
    assert df.data_type_lookup["date"] == "temporal"

    from lux.vis.Vis import Vis

    vis = Vis(["date", "value"], df)

    df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d")
    df.maintain_metadata()
    assert df.data_type["temporal"][0] == "date"

    vis.refresh_source(df)
    assert vis.mark == "line"
    assert vis.get_attr_by_channel("x")[0].attribute == "date"
    assert vis.get_attr_by_channel("y")[0].attribute == "value"
Beispiel #8
0
def test_autoencoding_color_line_chart():
    df = pd.read_csv("lux/data/car.csv")
    df["Year"] = pd.to_datetime(
        df["Year"],
        format="%Y")  # change pandas dtype for the column "Year" to datetype
    intent = [
        lux.Clause(attribute="Year"),
        lux.Clause(attribute="Acceleration"),
        lux.Clause(attribute="Origin"),
    ]
    vis = Vis(intent, df)
    check_attribute_on_channel(vis, "Year", "x")
    check_attribute_on_channel(vis, "Acceleration", "y")
    check_attribute_on_channel(vis, "Origin", "color")
Beispiel #9
0
def test_vis_list_custom_title_override():
    df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true")
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")

    vcLst = []
    for attribute in ["Sport", "Year", "Height", "HostRegion", "SportType"]:
        vis = Vis(
            [lux.Clause("Weight"), lux.Clause(attribute)],
            title="overriding dummy title",
        )
        vcLst.append(vis)
    vlist = VisList(vcLst, df)
    for v in vlist:
        assert v.title == "overriding dummy title"
Beispiel #10
0
def test_vis_list_custom_title_override(global_var):
    df = pytest.olympic
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")

    vcLst = []
    for attribute in ["Sport", "Year", "Height", "HostRegion", "SportType"]:
        vis = Vis(
            [lux.Clause("Weight"), lux.Clause(attribute)],
            title="overriding dummy title",
        )
        vcLst.append(vis)
    vlist = VisList(vcLst, df)
    for v in vlist:
        assert v.title == "overriding dummy title"
Beispiel #11
0
    def execute_filter(vis: Vis):
        assert (
            vis.data is not None
        ), "execute_filter assumes input vis.data is populated (if not, populate with LuxDataFrame values)"
        filters = utils.get_filter_specs(vis._inferred_intent)

        if filters:
            # TODO: Need to handle OR logic
            for filter in filters:
                vis._vis_data = PandasExecutor.apply_filter(
                    vis.data, filter.attribute, filter.filter_op, filter.value)
            return True
        else:
            return False
Beispiel #12
0
def test_autoencoding_color_scatter_chart(global_var):
    df = pytest.car_df
    # change pandas dtype for the column "Year" to datetype
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")
    vis = Vis(
        [
            lux.Clause(attribute="Horsepower"),
            lux.Clause(attribute="Acceleration"),
            lux.Clause(attribute="Origin"),
        ],
        df,
    )
    check_attribute_on_channel(vis, "Origin", "color")

    vis = Vis(
        [
            lux.Clause(attribute="Horsepower"),
            lux.Clause(attribute="Acceleration", channel="color"),
            lux.Clause(attribute="Origin"),
        ],
        df,
    )
    check_attribute_on_channel(vis, "Acceleration", "color")
Beispiel #13
0
def test_autoencoding_color_line_chart(global_var):
    lux.config.set_executor_type("Pandas")
    df = pytest.car_df
    # change pandas dtype for the column "Year" to datetype
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")
    intent = [
        lux.Clause(attribute="Year"),
        lux.Clause(attribute="Acceleration"),
        lux.Clause(attribute="Origin"),
    ]
    vis = Vis(intent, df)
    check_attribute_on_channel(vis, "Year", "x")
    check_attribute_on_channel(vis, "Acceleration", "y")
    check_attribute_on_channel(vis, "Origin", "color")
Beispiel #14
0
def test_vis_collection_via_list_of_vis():
    url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true"
    df = pd.read_csv(url)
    # change pandas dtype for the column "Year" to datetype
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")
    from lux.vis.VisList import VisList
    from lux.vis.Vis import Vis

    vcLst = []
    for attribute in ["Sport", "Year", "Height", "HostRegion", "SportType"]:
        vis = Vis([lux.Clause("Weight"), lux.Clause(attribute)])
        vcLst.append(vis)
    vlist = VisList(vcLst, df)
    assert len(vlist) == 5
Beispiel #15
0
def test_multi_vis():
    df = pd.read_csv("lux/data/college.csv")
    with pytest.raises(
            SyntaxError,
            match=
            "The intent that you specified corresponds to more than one visualization.",
    ):
        Vis(["SATAverage", "AverageCost", "Geography=?"], df)._repr_html_()

    with pytest.raises(
            SyntaxError,
            match=
            "The intent that you specified corresponds to more than one visualization.",
    ):
        Vis(["SATAverage", "?"], df)._repr_html_()

    with pytest.raises(
            SyntaxError,
            match=
            "The intent that you specified corresponds to more than one visualization.",
    ):
        Vis(["SATAverage", "AverageCost", "Region=New England|Southeast"],
            df)._repr_html_()
Beispiel #16
0
def test_autoencoding_line_chart(global_var):
    # test for sql executor
    sql_df = lux.LuxSQLTable(table_name="cars")
    vis = Vis([lux.Clause(attribute="year"), lux.Clause(attribute="acceleration")], sql_df)
    check_attribute_on_channel(vis, "year", "x")
    check_attribute_on_channel(vis, "acceleration", "y")

    # Partial channel specified
    vis = Vis(
        [
            lux.Clause(attribute="year", channel="y"),
            lux.Clause(attribute="acceleration"),
        ],
        sql_df,
    )
    check_attribute_on_channel(vis, "year", "y")
    check_attribute_on_channel(vis, "acceleration", "x")

    # Full channel specified
    vis = Vis(
        [
            lux.Clause(attribute="year", channel="y"),
            lux.Clause(attribute="acceleration", channel="x"),
        ],
        sql_df,
    )
    check_attribute_on_channel(vis, "year", "y")
    check_attribute_on_channel(vis, "acceleration", "x")

    with pytest.raises(ValueError):
        # Should throw error because there should not be columns with the same channel specified
        sql_df.set_intent(
            [
                lux.Clause(attribute="year", channel="x"),
                lux.Clause(attribute="acceleration", channel="x"),
            ]
        )
Beispiel #17
0
def test_sort_bar():
    from lux.processor.Compiler import Compiler
    from lux.vis.Vis import Vis
    df = pd.read_csv("lux/data/car.csv")
    vis = Vis([
        lux.Clause(attribute="Acceleration",
                   data_model="measure",
                   data_type="quantitative"),
        lux.Clause(
            attribute="Origin", data_model="dimension", data_type="nominal")
    ], df)
    assert vis.mark == "bar"
    assert vis._inferred_intent[1].sort == ''

    df = pd.read_csv("lux/data/car.csv")
    vis = Vis([
        lux.Clause(attribute="Acceleration",
                   data_model="measure",
                   data_type="quantitative"),
        lux.Clause(
            attribute="Name", data_model="dimension", data_type="nominal")
    ], df)
    assert vis.mark == "bar"
    assert vis._inferred_intent[1].sort == 'ascending'
Beispiel #18
0
def test_autoencoding_color_scatter_chart():
    df = pd.read_csv("lux/data/car.csv")
    df["Year"] = pd.to_datetime(
        df["Year"],
        format="%Y")  # change pandas dtype for the column "Year" to datetype
    vis = Vis(
        [
            lux.Clause(attribute="Horsepower"),
            lux.Clause(attribute="Acceleration"),
            lux.Clause(attribute="Origin"),
        ],
        df,
    )
    check_attribute_on_channel(vis, "Origin", "color")

    vis = Vis(
        [
            lux.Clause(attribute="Horsepower"),
            lux.Clause(attribute="Acceleration", channel="color"),
            lux.Clause(attribute="Origin"),
        ],
        df,
    )
    check_attribute_on_channel(vis, "Acceleration", "color")
Beispiel #19
0
def test_lazy_execution():
    connection = psycopg2.connect(
        "host=localhost dbname=postgres user=postgres password=lux")
    sql_df = lux.LuxSQLTable()
    lux.config.set_SQL_connection(connection)
    sql_df.set_SQL_table("car")

    intent = [
        lux.Clause(attribute="Horsepower", aggregation="mean"),
        lux.Clause(attribute="Origin"),
    ]
    vis = Vis(intent)
    # Check data field in vis is empty before calling executor
    assert vis.data is None
    SQLExecutor.execute([vis], sql_df)
    assert type(vis.data) == lux.core.frame.LuxDataFrame
Beispiel #20
0
def test_filter_aggregation_fillzero_aligned():
    connection = psycopg2.connect(
        "host=localhost dbname=postgres user=postgres password=lux")
    sql_df = lux.LuxSQLTable()
    lux.config.set_SQL_connection(connection)
    sql_df.set_SQL_table("car")

    intent = [
        lux.Clause(attribute="Cylinders"),
        lux.Clause(attribute="MilesPerGal"),
        lux.Clause("Origin=Japan"),
    ]
    vis = Vis(intent, sql_df)
    result = vis.data
    assert result[result["Cylinders"] == 5]["MilesPerGal"].values[0] == 0
    assert result[result["Cylinders"] == 8]["MilesPerGal"].values[0] == 0
Beispiel #21
0
def test_colored_bar_chart():
    from lux.vis.Vis import Vis
    from lux.vis.Vis import Clause
    df = pd.read_csv("lux/data/car.csv")

    x_clause = Clause(attribute="MilesPerGal", channel="x")
    y_clause = Clause(attribute="Origin", channel="y")
    color_clause = Clause(attribute='Cylinders', channel="color")

    new_vis = Vis([x_clause, y_clause, color_clause], df)
    #make sure dimention of the data is correct
    color_cardinality = len(df.unique_values['Cylinders'])
    group_by_cardinality = len(df.unique_values['Origin'])
    assert (len(new_vis.data.columns) == 3)
    assert (
        len(new_vis.data) == 15 > group_by_cardinality <
        color_cardinality * group_by_cardinality
    )  # Not color_cardinality*group_by_cardinality since some combinations have 0 values
Beispiel #22
0
def test_vis_private_properties():
    from lux.vis.Vis import Vis
    df = pd.read_csv("lux/data/car.csv")
    vis = Vis(["Horsepower", "Weight"], df)
    vis._repr_html_()
    assert isinstance(vis.data, lux.core.frame.LuxDataFrame)
    with pytest.raises(AttributeError, match="can't set attribute"):
        vis.data = "some val"

    assert isinstance(vis.code, dict)
    with pytest.raises(AttributeError, match="can't set attribute"):
        vis.code = "some val"

    assert isinstance(vis.min_max, dict)
    with pytest.raises(AttributeError, match="can't set attribute"):
        vis.min_max = "some val"

    assert vis.mark == "scatter"
    with pytest.raises(AttributeError, match="can't set attribute"):
        vis.mark = "some val"
Beispiel #23
0
def test_filter_aggregation_fillzero_aligned(global_var):
    df = pytest.car_df
    intent = [
        lux.Clause(attribute="Cylinders"),
        lux.Clause(attribute="MilesPerGal"),
        lux.Clause("Origin=Japan"),
    ]
    vis = Vis(intent, df)
    result = vis.data
    externalValidation = df[df["Origin"] == "Japan"].groupby(
        "Cylinders").mean()["MilesPerGal"]
    assert result[result["Cylinders"] == 5]["MilesPerGal"].values[0] == 0
    assert result[result["Cylinders"] == 8]["MilesPerGal"].values[0] == 0
    assert result[result["Cylinders"] ==
                  3]["MilesPerGal"].values[0] == externalValidation[3]
    assert result[result["Cylinders"] ==
                  4]["MilesPerGal"].values[0] == externalValidation[4]
    assert result[result["Cylinders"] ==
                  6]["MilesPerGal"].values[0] == externalValidation[6]
Beispiel #24
0
def row_group(ldf):
    recommendation = {
        "action":
        "Row Groups",
        "description":
        "Shows charts of possible visualizations with respect to the row-wise index.",
        "long_description":
        'A row index can be thought of as an extra row that indicates the values that the user is interested in. \
            Lux focuses on visualizing named dataframe indices, i.e., indices with a non-null name property, as a proxy of the attribute \
                that the user is interested in or have operated on (e.g., group-by attribute). In particular, dataframes with named indices \
                    are often pre-aggregated, so Lux visualizes exactly the values that the dataframe portrays. \
                        <a href="https://lux-api.readthedocs.io/en/latest/source/advanced/indexgroup.html" target="_blank">More details</a>',
    }
    collection = []

    if ldf.index.nlevels == 1:
        if ldf.columns.name is not None:
            dim_name = ldf.columns.name
        else:
            dim_name = "index"
        for row_id in range(len(ldf)):
            row = ldf.iloc[row_id, ]
            rowdf = row.reset_index()
            # if (dim_name =="index"): #TODO: need to change this to auto-detect
            # 	rowdf.data_type_lookup["index"]="nominal"
            # 	rowdf.data_model_lookup["index"]="dimension"
            # 	rowdf.cardinality["index"]=len(rowdf)
            # if isinstance(ldf.columns,pd.DatetimeIndex):
            # 	rowdf.data_type_lookup[dim_name]="temporal"
            vis = Vis(
                [
                    dim_name,
                    lux.Clause(
                        row.name, data_model="measure", aggregation=None),
                ],
                rowdf,
            )
            collection.append(vis)
    vlst = VisList(collection)
    # Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated data

    recommendation["collection"] = vlst
    return recommendation
Beispiel #25
0
def test_colored_bar_chart():
    from lux.vis.Vis import Vis
    from lux.vis.Vis import Clause

    tbl = lux.LuxSQLTable()
    tbl.set_SQL_table("cars")

    x_clause = Clause(attribute="milespergal", channel="x")
    y_clause = Clause(attribute="origin", channel="y")
    color_clause = Clause(attribute="cylinders", channel="color")

    new_vis = Vis([x_clause, y_clause, color_clause], tbl)
    # make sure dimention of the data is correct
    color_carsdinality = len(tbl.unique_values["cylinders"])
    group_by_carsdinality = len(tbl.unique_values["origin"])
    assert len(new_vis.data.columns) == 3
    assert (
        len(new_vis.data) == 15 > group_by_carsdinality < color_carsdinality * group_by_carsdinality
    )  # Not color_carsdinality*group_by_carsdinality since some combinations have 0 values
Beispiel #26
0
def test_filter_aggregation_fillzero_aligned():
    df = pd.read_csv("lux/data/car.csv")
    intent = [
        lux.Clause(attribute="Cylinders"),
        lux.Clause(attribute="MilesPerGal"),
        lux.Clause("Origin=Japan"),
    ]
    vis = Vis(intent, df)
    result = vis.data
    externalValidation = (
        df[df["Origin"] == "Japan"].groupby("Cylinders").mean()["MilesPerGal"])
    assert result[result["Cylinders"] == 5]["MilesPerGal"].values[0] == 0
    assert result[result["Cylinders"] == 8]["MilesPerGal"].values[0] == 0
    assert (result[result["Cylinders"] == 3]["MilesPerGal"].values[0] ==
            externalValidation[3])
    assert (result[result["Cylinders"] == 4]["MilesPerGal"].values[0] ==
            externalValidation[4])
    assert (result[result["Cylinders"] == 6]["MilesPerGal"].values[0] ==
            externalValidation[6])
Beispiel #27
0
def test_vis_private_properties(global_var):
    from lux.vis.Vis import Vis

    df = pytest.car_df
    vis = Vis(["Horsepower", "Weight"], df)
    vis._ipython_display_()
    assert isinstance(vis.data, lux.core.frame.LuxDataFrame)
    with pytest.raises(AttributeError, match="can't set attribute"):
        vis.data = "some val"

    assert isinstance(vis.code, dict)
    with pytest.raises(AttributeError, match="can't set attribute"):
        vis.code = "some val"

    assert isinstance(vis.min_max, dict)
    with pytest.raises(AttributeError, match="can't set attribute"):
        vis.min_max = "some val"

    assert vis.mark == "scatter"
    with pytest.raises(AttributeError, match="can't set attribute"):
        vis.mark = "some val"
Beispiel #28
0
def column_group(ldf):
    recommendation = {
        "action":
        "Column Groups",
        "description":
        "Shows charts of possible visualizations with respect to the column-wise index.",
    }
    collection = []
    ldf_flat = ldf
    if isinstance(ldf.columns, pd.DatetimeIndex):
        ldf_flat.columns = ldf_flat.columns.format()

    # use a single shared ldf_flat so that metadata doesn't need to be computed for every vis
    ldf_flat = ldf_flat.reset_index()
    if ldf.index.nlevels == 1:
        if ldf.index.name:
            index_column_name = ldf.index.name
        else:
            index_column_name = "index"
        if isinstance(ldf.columns, pd.DatetimeIndex):
            ldf.columns = ldf.columns.to_native_types()
        for attribute in ldf.columns:
            if ldf[attribute].dtype != "object" and (attribute != "index"):
                vis = Vis([
                    lux.Clause(
                        attribute=index_column_name,
                        data_type="nominal",
                        data_model="dimension",
                        aggregation=None,
                    ),
                    lux.Clause(
                        attribute=str(attribute),
                        data_type="quantitative",
                        data_model="measure",
                        aggregation=None,
                    ),
                ])
                collection.append(vis)
    vlst = VisList(collection, ldf_flat)
    # Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated ldf

    recommendation["collection"] = vlst
    return recommendation
Beispiel #29
0
def test_colored_bar_chart(global_var):
    from lux.vis.Vis import Vis
    from lux.vis.Vis import Clause

    df = pytest.car_df

    x_clause = Clause(attribute="MilesPerGal", channel="x")
    y_clause = Clause(attribute="Origin", channel="y")
    color_clause = Clause(attribute="Cylinders", channel="color")

    new_vis = Vis([x_clause, y_clause, color_clause], df)
    # make sure dimention of the data is correct
    color_cardinality = len(df.unique_values["Cylinders"])
    group_by_cardinality = len(df.unique_values["Origin"])
    assert len(new_vis.data.columns) == 3
    # Not color_cardinality*group_by_cardinality since some combinations have 0 values
    assert len(
        new_vis.data
    ) == 15 > group_by_cardinality < color_cardinality * group_by_cardinality
Beispiel #30
0
def column_group(ldf):
	recommendation = {"action":"Column Groups",
					"description":"Shows charts of possible visualizations with respect to the column-wise index."}
	collection = []
	ldf_flat = ldf
	if isinstance(ldf.columns,pd.DatetimeIndex):
		ldf_flat.columns = ldf_flat.columns.format()
	ldf_flat = ldf_flat.reset_index() #use a single shared ldf_flat so that metadata doesn't need to be computed for every vis
	if (ldf.index.nlevels==1):
		index_column_name = ldf.index.name
		if isinstance(ldf.columns,pd.DatetimeIndex):
			ldf.columns = ldf.columns.to_native_types()
		for attribute in ldf.columns:
			vis = Vis([index_column_name,lux.Clause(str(attribute),aggregation=None)],ldf_flat)
			collection.append(vis)
	vlst = VisList(collection)
	# Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated ldf
	
	recommendation["collection"] = vlst
	return recommendation