コード例 #1
0
def test_filter_aggregation_fillzero_aligned(global_var):
    tbl = lux.LuxSQLTable()
    tbl.set_SQL_table("cars")

    intent = [
        lux.Clause(attribute="cylinders"),
        lux.Clause(attribute="milespergal"),
        lux.Clause("origin=Japan"),
    ]
    vis = Vis(intent, tbl)
    result = vis.data
    assert result[result["cylinders"] == 5]["milespergal"].values[0] == 0
    assert result[result["cylinders"] == 8]["milespergal"].values[0] == 0
コード例 #2
0
def test_vis_collection_via_list_of_vis(global_var):
    df = pytest.olympic
    # change pandas dtype for the column "Year" to datetype
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")
    from lux.vis.VisList import VisList
    from lux.vis.Vis import Vis

    vcLst = []
    for attribute in ["Sport", "Year", "Height", "HostRegion", "SportType"]:
        vis = Vis([lux.Clause("Weight"), lux.Clause(attribute)])
        vcLst.append(vis)
    vlist = VisList(vcLst, df)
    assert len(vlist) == 5
コード例 #3
0
def test_lazy_execution(global_var):
    tbl = lux.LuxSQLTable()
    tbl.set_SQL_table("cars")

    intent = [
        lux.Clause(attribute="horsepower", aggregation="mean"),
        lux.Clause(attribute="origin"),
    ]
    vis = Vis(intent)
    # Check data field in vis is empty before calling executor
    assert vis.data is None
    SQLExecutor.execute([vis], tbl)
    assert type(vis.data) == lux.core.frame.LuxDataFrame
コード例 #4
0
ファイル: test_nan.py プロジェクト: siwefe/lux
def test_apply_nan_filter():
    from lux.vis.Vis import Vis

    import numpy as np

    dataset = [
        {"fully_nan": np.nan, "some_nan": 3.0, "some_nan2": np.nan},
        {"fully_nan": np.nan, "some_nan": 15.0, "some_nan2": 3.0},
        {"fully_nan": np.nan, "some_nan": np.nan, "some_nan2": 3.0},
        {"fully_nan": np.nan, "some_nan": 7.0, "some_nan2": 0.0},
        {"fully_nan": np.nan, "some_nan": 2.0, "some_nan2": 2.0},
        {"fully_nan": np.nan, "some_nan": 3.0, "some_nan2": np.nan},
        {"fully_nan": np.nan, "some_nan": 1.0, "some_nan2": 1.0},
        {"fully_nan": np.nan, "some_nan": 1.0, "some_nan2": 1.0},
        {"fully_nan": np.nan, "some_nan": 2.0, "some_nan2": 0.0},
        {"fully_nan": np.nan, "some_nan": 11.0, "some_nan2": 0.0},
    ]
    test = pd.DataFrame(dataset)

    vis = Vis(["some_nan", "some_nan2=nan"], test)
    vis._ipython_display_()
    assert vis.mark == "bar"
コード例 #5
0
    def execute_scatter(view: Vis, tbl: LuxSQLTable):
        """
        Given a scatterplot vis and a Lux Dataframe, fetch the data required to render the vis.
        1) Generate WHERE clause for the SQL query
        2) Check number of datapoints to be included in the query
        3) If the number of datapoints exceeds 10000, perform a random sample from the original data
        4) Query datapoints needed for the scatterplot visualization
        5) return a DataFrame with relevant results

        Parameters
        ----------
        vislist: list[lux.Vis]
            vis list that contains lux.Vis objects for visualization.
        tbl : lux.core.frame
            LuxSQLTable with specified intent.

        Returns
        -------
        None
        """

        attributes = set([])
        for clause in view._inferred_intent:
            if clause.attribute:
                if clause.attribute != "Record":
                    attributes.add(clause.attribute)
        where_clause, filterVars = SQLExecutor.execute_filter(view)

        length_query = pandas.read_sql(
            "SELECT COUNT(1) as length FROM {} {}".format(tbl.table_name, where_clause),
            lux.config.SQLconnection,
        )

        def add_quotes(var_name):
            return '"' + var_name + '"'

        required_variables = attributes | set(filterVars)
        required_variables = map(add_quotes, required_variables)
        required_variables = ",".join(required_variables)
        row_count = list(
            pandas.read_sql(
                f"SELECT COUNT(*) FROM {tbl.table_name} {where_clause}",
                lux.config.SQLconnection,
            )["count"]
        )[0]
        if row_count > lux.config.sampling_cap:
            query = f"SELECT {required_variables} FROM {tbl.table_name} {where_clause} ORDER BY random() LIMIT 10000"
        else:
            query = "SELECT {} FROM {} {}".format(required_variables, tbl.table_name, where_clause)
        data = pandas.read_sql(query, lux.config.SQLconnection)
        view._vis_data = utils.pandas_to_lux(data)
コード例 #6
0
def test_autoencoding_line_chart(global_var):
    df = pytest.car_df
    # change pandas dtype for the column "Year" to datetype
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")
    vis = Vis(
        [lux.Clause(attribute="Year"),
         lux.Clause(attribute="Acceleration")], df)
    check_attribute_on_channel(vis, "Year", "x")
    check_attribute_on_channel(vis, "Acceleration", "y")

    # Partial channel specified
    vis = Vis(
        [
            lux.Clause(attribute="Year", channel="y"),
            lux.Clause(attribute="Acceleration"),
        ],
        df,
    )
    check_attribute_on_channel(vis, "Year", "y")
    check_attribute_on_channel(vis, "Acceleration", "x")

    # Full channel specified
    vis = Vis(
        [
            lux.Clause(attribute="Year", channel="y"),
            lux.Clause(attribute="Acceleration", channel="x"),
        ],
        df,
    )
    check_attribute_on_channel(vis, "Year", "y")
    check_attribute_on_channel(vis, "Acceleration", "x")

    with pytest.raises(ValueError):
        # Should throw error because there should not be columns with the same channel specified
        df.set_intent([
            lux.Clause(attribute="Year", channel="x"),
            lux.Clause(attribute="Acceleration", channel="x"),
        ])
コード例 #7
0
ファイル: test_dates.py プロジェクト: piyushg9794/lux
def test_refresh_inplace():
    df = pd.DataFrame(
        {
            "date": ["2020-01-01", "2020-02-01", "2020-03-01", "2020-04-01"],
            "value": [10.5, 15.2, 20.3, 25.2],
        }
    )
    with pytest.warns(UserWarning, match="Lux detects that the attribute 'date' may be temporal."):
        df._repr_html_()
    assert df.data_type_lookup["date"] == "temporal"

    from lux.vis.Vis import Vis

    vis = Vis(["date", "value"], df)

    df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d")
    df.maintain_metadata()
    assert df.data_type["temporal"][0] == "date"

    vis.refresh_source(df)
    assert vis.mark == "line"
    assert vis.get_attr_by_channel("x")[0].attribute == "date"
    assert vis.get_attr_by_channel("y")[0].attribute == "value"
コード例 #8
0
def test_autoencoding_color_line_chart():
    df = pd.read_csv("lux/data/car.csv")
    df["Year"] = pd.to_datetime(
        df["Year"],
        format="%Y")  # change pandas dtype for the column "Year" to datetype
    intent = [
        lux.Clause(attribute="Year"),
        lux.Clause(attribute="Acceleration"),
        lux.Clause(attribute="Origin"),
    ]
    vis = Vis(intent, df)
    check_attribute_on_channel(vis, "Year", "x")
    check_attribute_on_channel(vis, "Acceleration", "y")
    check_attribute_on_channel(vis, "Origin", "color")
コード例 #9
0
ファイル: test_vis.py プロジェクト: piyushg9794/lux
def test_vis_list_custom_title_override():
    df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true")
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")

    vcLst = []
    for attribute in ["Sport", "Year", "Height", "HostRegion", "SportType"]:
        vis = Vis(
            [lux.Clause("Weight"), lux.Clause(attribute)],
            title="overriding dummy title",
        )
        vcLst.append(vis)
    vlist = VisList(vcLst, df)
    for v in vlist:
        assert v.title == "overriding dummy title"
コード例 #10
0
ファイル: test_vis.py プロジェクト: oddyolau/lux
def test_vis_list_custom_title_override(global_var):
    df = pytest.olympic
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")

    vcLst = []
    for attribute in ["Sport", "Year", "Height", "HostRegion", "SportType"]:
        vis = Vis(
            [lux.Clause("Weight"), lux.Clause(attribute)],
            title="overriding dummy title",
        )
        vcLst.append(vis)
    vlist = VisList(vcLst, df)
    for v in vlist:
        assert v.title == "overriding dummy title"
コード例 #11
0
ファイル: PandasExecutor.py プロジェクト: whmz/lux
    def execute_filter(vis: Vis):
        assert (
            vis.data is not None
        ), "execute_filter assumes input vis.data is populated (if not, populate with LuxDataFrame values)"
        filters = utils.get_filter_specs(vis._inferred_intent)

        if filters:
            # TODO: Need to handle OR logic
            for filter in filters:
                vis._vis_data = PandasExecutor.apply_filter(
                    vis.data, filter.attribute, filter.filter_op, filter.value)
            return True
        else:
            return False
コード例 #12
0
def test_autoencoding_color_scatter_chart(global_var):
    df = pytest.car_df
    # change pandas dtype for the column "Year" to datetype
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")
    vis = Vis(
        [
            lux.Clause(attribute="Horsepower"),
            lux.Clause(attribute="Acceleration"),
            lux.Clause(attribute="Origin"),
        ],
        df,
    )
    check_attribute_on_channel(vis, "Origin", "color")

    vis = Vis(
        [
            lux.Clause(attribute="Horsepower"),
            lux.Clause(attribute="Acceleration", channel="color"),
            lux.Clause(attribute="Origin"),
        ],
        df,
    )
    check_attribute_on_channel(vis, "Acceleration", "color")
コード例 #13
0
def test_autoencoding_color_line_chart(global_var):
    lux.config.set_executor_type("Pandas")
    df = pytest.car_df
    # change pandas dtype for the column "Year" to datetype
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")
    intent = [
        lux.Clause(attribute="Year"),
        lux.Clause(attribute="Acceleration"),
        lux.Clause(attribute="Origin"),
    ]
    vis = Vis(intent, df)
    check_attribute_on_channel(vis, "Year", "x")
    check_attribute_on_channel(vis, "Acceleration", "y")
    check_attribute_on_channel(vis, "Origin", "color")
コード例 #14
0
ファイル: test_vis.py プロジェクト: piyushg9794/lux
def test_vis_collection_via_list_of_vis():
    url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true"
    df = pd.read_csv(url)
    # change pandas dtype for the column "Year" to datetype
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")
    from lux.vis.VisList import VisList
    from lux.vis.Vis import Vis

    vcLst = []
    for attribute in ["Sport", "Year", "Height", "HostRegion", "SportType"]:
        vis = Vis([lux.Clause("Weight"), lux.Clause(attribute)])
        vcLst.append(vis)
    vlist = VisList(vcLst, df)
    assert len(vlist) == 5
コード例 #15
0
def test_multi_vis():
    df = pd.read_csv("lux/data/college.csv")
    with pytest.raises(
            SyntaxError,
            match=
            "The intent that you specified corresponds to more than one visualization.",
    ):
        Vis(["SATAverage", "AverageCost", "Geography=?"], df)._repr_html_()

    with pytest.raises(
            SyntaxError,
            match=
            "The intent that you specified corresponds to more than one visualization.",
    ):
        Vis(["SATAverage", "?"], df)._repr_html_()

    with pytest.raises(
            SyntaxError,
            match=
            "The intent that you specified corresponds to more than one visualization.",
    ):
        Vis(["SATAverage", "AverageCost", "Region=New England|Southeast"],
            df)._repr_html_()
コード例 #16
0
def test_autoencoding_line_chart(global_var):
    # test for sql executor
    sql_df = lux.LuxSQLTable(table_name="cars")
    vis = Vis([lux.Clause(attribute="year"), lux.Clause(attribute="acceleration")], sql_df)
    check_attribute_on_channel(vis, "year", "x")
    check_attribute_on_channel(vis, "acceleration", "y")

    # Partial channel specified
    vis = Vis(
        [
            lux.Clause(attribute="year", channel="y"),
            lux.Clause(attribute="acceleration"),
        ],
        sql_df,
    )
    check_attribute_on_channel(vis, "year", "y")
    check_attribute_on_channel(vis, "acceleration", "x")

    # Full channel specified
    vis = Vis(
        [
            lux.Clause(attribute="year", channel="y"),
            lux.Clause(attribute="acceleration", channel="x"),
        ],
        sql_df,
    )
    check_attribute_on_channel(vis, "year", "y")
    check_attribute_on_channel(vis, "acceleration", "x")

    with pytest.raises(ValueError):
        # Should throw error because there should not be columns with the same channel specified
        sql_df.set_intent(
            [
                lux.Clause(attribute="year", channel="x"),
                lux.Clause(attribute="acceleration", channel="x"),
            ]
        )
コード例 #17
0
def test_sort_bar():
    from lux.processor.Compiler import Compiler
    from lux.vis.Vis import Vis
    df = pd.read_csv("lux/data/car.csv")
    vis = Vis([
        lux.Clause(attribute="Acceleration",
                   data_model="measure",
                   data_type="quantitative"),
        lux.Clause(
            attribute="Origin", data_model="dimension", data_type="nominal")
    ], df)
    assert vis.mark == "bar"
    assert vis._inferred_intent[1].sort == ''

    df = pd.read_csv("lux/data/car.csv")
    vis = Vis([
        lux.Clause(attribute="Acceleration",
                   data_model="measure",
                   data_type="quantitative"),
        lux.Clause(
            attribute="Name", data_model="dimension", data_type="nominal")
    ], df)
    assert vis.mark == "bar"
    assert vis._inferred_intent[1].sort == 'ascending'
コード例 #18
0
def test_autoencoding_color_scatter_chart():
    df = pd.read_csv("lux/data/car.csv")
    df["Year"] = pd.to_datetime(
        df["Year"],
        format="%Y")  # change pandas dtype for the column "Year" to datetype
    vis = Vis(
        [
            lux.Clause(attribute="Horsepower"),
            lux.Clause(attribute="Acceleration"),
            lux.Clause(attribute="Origin"),
        ],
        df,
    )
    check_attribute_on_channel(vis, "Origin", "color")

    vis = Vis(
        [
            lux.Clause(attribute="Horsepower"),
            lux.Clause(attribute="Acceleration", channel="color"),
            lux.Clause(attribute="Origin"),
        ],
        df,
    )
    check_attribute_on_channel(vis, "Acceleration", "color")
コード例 #19
0
def test_lazy_execution():
    connection = psycopg2.connect(
        "host=localhost dbname=postgres user=postgres password=lux")
    sql_df = lux.LuxSQLTable()
    lux.config.set_SQL_connection(connection)
    sql_df.set_SQL_table("car")

    intent = [
        lux.Clause(attribute="Horsepower", aggregation="mean"),
        lux.Clause(attribute="Origin"),
    ]
    vis = Vis(intent)
    # Check data field in vis is empty before calling executor
    assert vis.data is None
    SQLExecutor.execute([vis], sql_df)
    assert type(vis.data) == lux.core.frame.LuxDataFrame
コード例 #20
0
def test_filter_aggregation_fillzero_aligned():
    connection = psycopg2.connect(
        "host=localhost dbname=postgres user=postgres password=lux")
    sql_df = lux.LuxSQLTable()
    lux.config.set_SQL_connection(connection)
    sql_df.set_SQL_table("car")

    intent = [
        lux.Clause(attribute="Cylinders"),
        lux.Clause(attribute="MilesPerGal"),
        lux.Clause("Origin=Japan"),
    ]
    vis = Vis(intent, sql_df)
    result = vis.data
    assert result[result["Cylinders"] == 5]["MilesPerGal"].values[0] == 0
    assert result[result["Cylinders"] == 8]["MilesPerGal"].values[0] == 0
コード例 #21
0
def test_colored_bar_chart():
    from lux.vis.Vis import Vis
    from lux.vis.Vis import Clause
    df = pd.read_csv("lux/data/car.csv")

    x_clause = Clause(attribute="MilesPerGal", channel="x")
    y_clause = Clause(attribute="Origin", channel="y")
    color_clause = Clause(attribute='Cylinders', channel="color")

    new_vis = Vis([x_clause, y_clause, color_clause], df)
    #make sure dimention of the data is correct
    color_cardinality = len(df.unique_values['Cylinders'])
    group_by_cardinality = len(df.unique_values['Origin'])
    assert (len(new_vis.data.columns) == 3)
    assert (
        len(new_vis.data) == 15 > group_by_cardinality <
        color_cardinality * group_by_cardinality
    )  # Not color_cardinality*group_by_cardinality since some combinations have 0 values
コード例 #22
0
def test_vis_private_properties():
    from lux.vis.Vis import Vis
    df = pd.read_csv("lux/data/car.csv")
    vis = Vis(["Horsepower", "Weight"], df)
    vis._repr_html_()
    assert isinstance(vis.data, lux.core.frame.LuxDataFrame)
    with pytest.raises(AttributeError, match="can't set attribute"):
        vis.data = "some val"

    assert isinstance(vis.code, dict)
    with pytest.raises(AttributeError, match="can't set attribute"):
        vis.code = "some val"

    assert isinstance(vis.min_max, dict)
    with pytest.raises(AttributeError, match="can't set attribute"):
        vis.min_max = "some val"

    assert vis.mark == "scatter"
    with pytest.raises(AttributeError, match="can't set attribute"):
        vis.mark = "some val"
コード例 #23
0
def test_filter_aggregation_fillzero_aligned(global_var):
    df = pytest.car_df
    intent = [
        lux.Clause(attribute="Cylinders"),
        lux.Clause(attribute="MilesPerGal"),
        lux.Clause("Origin=Japan"),
    ]
    vis = Vis(intent, df)
    result = vis.data
    externalValidation = df[df["Origin"] == "Japan"].groupby(
        "Cylinders").mean()["MilesPerGal"]
    assert result[result["Cylinders"] == 5]["MilesPerGal"].values[0] == 0
    assert result[result["Cylinders"] == 8]["MilesPerGal"].values[0] == 0
    assert result[result["Cylinders"] ==
                  3]["MilesPerGal"].values[0] == externalValidation[3]
    assert result[result["Cylinders"] ==
                  4]["MilesPerGal"].values[0] == externalValidation[4]
    assert result[result["Cylinders"] ==
                  6]["MilesPerGal"].values[0] == externalValidation[6]
コード例 #24
0
def row_group(ldf):
    recommendation = {
        "action":
        "Row Groups",
        "description":
        "Shows charts of possible visualizations with respect to the row-wise index.",
        "long_description":
        'A row index can be thought of as an extra row that indicates the values that the user is interested in. \
            Lux focuses on visualizing named dataframe indices, i.e., indices with a non-null name property, as a proxy of the attribute \
                that the user is interested in or have operated on (e.g., group-by attribute). In particular, dataframes with named indices \
                    are often pre-aggregated, so Lux visualizes exactly the values that the dataframe portrays. \
                        <a href="https://lux-api.readthedocs.io/en/latest/source/advanced/indexgroup.html" target="_blank">More details</a>',
    }
    collection = []

    if ldf.index.nlevels == 1:
        if ldf.columns.name is not None:
            dim_name = ldf.columns.name
        else:
            dim_name = "index"
        for row_id in range(len(ldf)):
            row = ldf.iloc[row_id, ]
            rowdf = row.reset_index()
            # if (dim_name =="index"): #TODO: need to change this to auto-detect
            # 	rowdf.data_type_lookup["index"]="nominal"
            # 	rowdf.data_model_lookup["index"]="dimension"
            # 	rowdf.cardinality["index"]=len(rowdf)
            # if isinstance(ldf.columns,pd.DatetimeIndex):
            # 	rowdf.data_type_lookup[dim_name]="temporal"
            vis = Vis(
                [
                    dim_name,
                    lux.Clause(
                        row.name, data_model="measure", aggregation=None),
                ],
                rowdf,
            )
            collection.append(vis)
    vlst = VisList(collection)
    # Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated data

    recommendation["collection"] = vlst
    return recommendation
コード例 #25
0
def test_colored_bar_chart():
    from lux.vis.Vis import Vis
    from lux.vis.Vis import Clause

    tbl = lux.LuxSQLTable()
    tbl.set_SQL_table("cars")

    x_clause = Clause(attribute="milespergal", channel="x")
    y_clause = Clause(attribute="origin", channel="y")
    color_clause = Clause(attribute="cylinders", channel="color")

    new_vis = Vis([x_clause, y_clause, color_clause], tbl)
    # make sure dimention of the data is correct
    color_carsdinality = len(tbl.unique_values["cylinders"])
    group_by_carsdinality = len(tbl.unique_values["origin"])
    assert len(new_vis.data.columns) == 3
    assert (
        len(new_vis.data) == 15 > group_by_carsdinality < color_carsdinality * group_by_carsdinality
    )  # Not color_carsdinality*group_by_carsdinality since some combinations have 0 values
コード例 #26
0
def test_filter_aggregation_fillzero_aligned():
    df = pd.read_csv("lux/data/car.csv")
    intent = [
        lux.Clause(attribute="Cylinders"),
        lux.Clause(attribute="MilesPerGal"),
        lux.Clause("Origin=Japan"),
    ]
    vis = Vis(intent, df)
    result = vis.data
    externalValidation = (
        df[df["Origin"] == "Japan"].groupby("Cylinders").mean()["MilesPerGal"])
    assert result[result["Cylinders"] == 5]["MilesPerGal"].values[0] == 0
    assert result[result["Cylinders"] == 8]["MilesPerGal"].values[0] == 0
    assert (result[result["Cylinders"] == 3]["MilesPerGal"].values[0] ==
            externalValidation[3])
    assert (result[result["Cylinders"] == 4]["MilesPerGal"].values[0] ==
            externalValidation[4])
    assert (result[result["Cylinders"] == 6]["MilesPerGal"].values[0] ==
            externalValidation[6])
コード例 #27
0
def test_vis_private_properties(global_var):
    from lux.vis.Vis import Vis

    df = pytest.car_df
    vis = Vis(["Horsepower", "Weight"], df)
    vis._ipython_display_()
    assert isinstance(vis.data, lux.core.frame.LuxDataFrame)
    with pytest.raises(AttributeError, match="can't set attribute"):
        vis.data = "some val"

    assert isinstance(vis.code, dict)
    with pytest.raises(AttributeError, match="can't set attribute"):
        vis.code = "some val"

    assert isinstance(vis.min_max, dict)
    with pytest.raises(AttributeError, match="can't set attribute"):
        vis.min_max = "some val"

    assert vis.mark == "scatter"
    with pytest.raises(AttributeError, match="can't set attribute"):
        vis.mark = "some val"
コード例 #28
0
def column_group(ldf):
    recommendation = {
        "action":
        "Column Groups",
        "description":
        "Shows charts of possible visualizations with respect to the column-wise index.",
    }
    collection = []
    ldf_flat = ldf
    if isinstance(ldf.columns, pd.DatetimeIndex):
        ldf_flat.columns = ldf_flat.columns.format()

    # use a single shared ldf_flat so that metadata doesn't need to be computed for every vis
    ldf_flat = ldf_flat.reset_index()
    if ldf.index.nlevels == 1:
        if ldf.index.name:
            index_column_name = ldf.index.name
        else:
            index_column_name = "index"
        if isinstance(ldf.columns, pd.DatetimeIndex):
            ldf.columns = ldf.columns.to_native_types()
        for attribute in ldf.columns:
            if ldf[attribute].dtype != "object" and (attribute != "index"):
                vis = Vis([
                    lux.Clause(
                        attribute=index_column_name,
                        data_type="nominal",
                        data_model="dimension",
                        aggregation=None,
                    ),
                    lux.Clause(
                        attribute=str(attribute),
                        data_type="quantitative",
                        data_model="measure",
                        aggregation=None,
                    ),
                ])
                collection.append(vis)
    vlst = VisList(collection, ldf_flat)
    # Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated ldf

    recommendation["collection"] = vlst
    return recommendation
コード例 #29
0
def test_colored_bar_chart(global_var):
    from lux.vis.Vis import Vis
    from lux.vis.Vis import Clause

    df = pytest.car_df

    x_clause = Clause(attribute="MilesPerGal", channel="x")
    y_clause = Clause(attribute="Origin", channel="y")
    color_clause = Clause(attribute="Cylinders", channel="color")

    new_vis = Vis([x_clause, y_clause, color_clause], df)
    # make sure dimention of the data is correct
    color_cardinality = len(df.unique_values["Cylinders"])
    group_by_cardinality = len(df.unique_values["Origin"])
    assert len(new_vis.data.columns) == 3
    # Not color_cardinality*group_by_cardinality since some combinations have 0 values
    assert len(
        new_vis.data
    ) == 15 > group_by_cardinality < color_cardinality * group_by_cardinality
コード例 #30
0
ファイル: column_group.py プロジェクト: rahmansunny071/lux
def column_group(ldf):
	recommendation = {"action":"Column Groups",
					"description":"Shows charts of possible visualizations with respect to the column-wise index."}
	collection = []
	ldf_flat = ldf
	if isinstance(ldf.columns,pd.DatetimeIndex):
		ldf_flat.columns = ldf_flat.columns.format()
	ldf_flat = ldf_flat.reset_index() #use a single shared ldf_flat so that metadata doesn't need to be computed for every vis
	if (ldf.index.nlevels==1):
		index_column_name = ldf.index.name
		if isinstance(ldf.columns,pd.DatetimeIndex):
			ldf.columns = ldf.columns.to_native_types()
		for attribute in ldf.columns:
			vis = Vis([index_column_name,lux.Clause(str(attribute),aggregation=None)],ldf_flat)
			collection.append(vis)
	vlst = VisList(collection)
	# Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated ldf
	
	recommendation["collection"] = vlst
	return recommendation