def test_binning(): df = pd.read_csv("lux/data/car.csv") df.set_context([lux.Spec(attribute="Horsepower")]) PandasExecutor.execute(df.current_view, df) nbins = list(filter(lambda x: x.bin_size != 0, df.current_view[0].spec_lst))[0].bin_size assert len(df.current_view[0].data) == nbins
def custom(ldf): """ Generates user-defined vis based on the intent. Parameters ---------- ldf : lux.core.frame LuxDataFrame with underspecified intent. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Distribution action. """ recommendation = { "action": "Current Vis", "description": "Shows the list of visualizations generated based on user specified intent", } recommendation["collection"] = ldf.current_vis vlist = ldf.current_vis PandasExecutor.execute(vlist, ldf) for vis in vlist: vis.score = interestingness(vis, ldf) # ldf.clear_intent() vlist.sort(remove_invalid=True) return recommendation
def test_binning(): df = pd.read_csv("lux/data/car.csv") df.setContext([lux.Spec(attribute="Horsepower")]) PandasExecutor.execute(df.viewCollection, df) Nbins = list(filter(lambda x: x.binSize != 0, df.viewCollection[0].specLst))[0].binSize assert len(df.viewCollection[0].data) == Nbins
def user_defined(ldf): ''' Generates user-defined views based on the context. Parameters ---------- ldf : lux.luxDataFrame.LuxDataFrame LuxDataFrame with underspecified context. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Distribution action. ''' recommendation = { "action": "Current Views", "description": "Shows a view collection defined by the context" } recommendation["collection"] = ldf.current_view vc = ldf.current_view PandasExecutor.execute(vc, ldf) for view in vc: view.score = interestingness(view, ldf) # ldf.clear_context() vc.sort(remove_invalid=True) return recommendation
def load(self, ldf) -> View: """ Loading the data into the view by instantiating the specification and populating the view based on the data, effectively "materializing" the view. Parameters ---------- ldf : LuxDataframe Input Dataframe to be attached to the view Returns ------- View Complete View with fully-specified fields See Also -------- lux.view.ViewCollection.load """ from lux.compiler.Parser import Parser from lux.compiler.Validator import Validator from lux.compiler.Compiler import Compiler from lux.executor.PandasExecutor import PandasExecutor #TODO: temporary (generalize to executor) #TODO: handle case when user input vanilla Pandas dataframe self.specLst = Parser.parse(self.specLst) Validator.validateSpec(self.specLst, ldf) vc = Compiler.compile(ldf, ldf.context, [self], enumerateCollection=False) PandasExecutor.execute(vc, ldf) return vc[0]
def __init__(self, *args, **kw): from lux.executor.PandasExecutor import PandasExecutor self._history = History() self._intent = [] self._recommendation = {} self._saved_export = None self._current_vis = [] self._prev = None super(LuxDataFrame, self).__init__(*args, **kw) self.executor_type = "Pandas" self.executor = PandasExecutor() self.SQLconnection = "" self.table_name = "" self._sampled = None self._toggle_pandas_display = True self._message = Message() self._pandas_only = False # Metadata self.data_type_lookup = None self.data_type = None self.data_model_lookup = None self.data_model = None self.unique_values = None self.cardinality = None self._min_max = None self.pre_aggregated = None warnings.formatwarning = lux.warning_format
def test_binning(): df = pd.read_csv("lux/data/car.csv") df.set_intent([lux.Clause(attribute="Horsepower")]) PandasExecutor.execute(df.current_vis, df) nbins = list( filter(lambda x: x.bin_size != 0, df.current_vis[0]._inferred_intent))[0].bin_size assert len(df.current_vis[0].data) == nbins
def test_lazyExecution(): df = pd.read_csv("lux/data/car.csv") df.setContext([ lux.Spec(attribute="Horsepower", aggregation="mean"), lux.Spec(attribute="Origin") ]) # Check data field in view is empty before calling executor assert df.viewCollection[0].data == None PandasExecutor.execute(df.viewCollection, df) assert type(df.viewCollection[0].data ) == lux.luxDataFrame.LuxDataframe.LuxDataFrame
def test_histogram_code_export(global_var): df = pytest.car_df vis = Vis([lux.Clause("Horsepower")], df) PandasExecutor.execute([vis], df) code = vis.to_code("python") try: exec(code, globals()) create_chart_data(df, vis) except: assert False
def test_color_barchart_code_export(global_var): df = pytest.car_df vis = Vis([lux.Clause("Origin"), lux.Clause("Cylinders")], df) PandasExecutor.execute([vis], df) code = vis.to_code("python") try: exec(code, globals()) create_chart_data(df, vis) except: assert False
def test_lazy_execution(): df = pd.read_csv("lux/data/car.csv") df.set_intent([ lux.Clause(attribute="Horsepower", aggregation="mean"), lux.Clause(attribute="Origin") ]) # Check data field in vis is empty before calling executor assert df.current_vis[0].data == None PandasExecutor.execute(df.current_vis, df) assert type( df.current_vis[0].data) == lux.luxDataFrame.LuxDataframe.LuxDataFrame
def test_lazy_execution(): df = pd.read_csv("lux/data/car.csv") intent = [ lux.Clause(attribute="Horsepower", aggregation="mean"), lux.Clause(attribute="Origin"), ] vis = Vis(intent) # Check data field in vis is empty before calling executor assert vis.data is None PandasExecutor.execute([vis], df) assert type(vis.data) == lux.core.frame.LuxDataFrame
def test_period_filter(): ldf = pd.read_csv("lux/data/car.csv") ldf["Year"] = pd.to_datetime(ldf["Year"], format="%Y") ldf["Year"] = pd.DatetimeIndex(ldf["Year"]).to_period(freq="A") ldf.set_intent([lux.Clause(attribute="Acceleration"), lux.Clause(attribute="Horsepower")]) PandasExecutor.execute(ldf.current_vis, ldf) ldf._repr_html_() assert isinstance(ldf.recommendation["Filter"][2]._inferred_intent[2].value, pd.Period)
def test_exclude_attribute(): df = pd.read_csv("lux/data/car.csv") df.setContext( [lux.Spec("?", exclude=["Name", "Year"]), lux.Spec("Horsepower")]) view = df.viewCollection[0] view.data = df PandasExecutor.executeFilter(view) for vc in df.viewCollection: assert (vc.getAttrByChannel("x")[0].attribute != "Year") assert (vc.getAttrByChannel("x")[0].attribute != "Name") assert (vc.getAttrByChannel("y")[0].attribute != "Year") assert (vc.getAttrByChannel("y")[0].attribute != "Year")
def test_exclude_attribute(): df = pd.read_csv("lux/data/car.csv") df.set_intent( [lux.Clause("?", exclude=["Name", "Year"]), lux.Clause("Horsepower")]) vis = df.current_vis[0] vis.data = df PandasExecutor.execute_filter(vis) for vc in df.current_vis: assert (vc.get_attr_by_channel("x")[0].attribute != "Year") assert (vc.get_attr_by_channel("x")[0].attribute != "Name") assert (vc.get_attr_by_channel("y")[0].attribute != "Year") assert (vc.get_attr_by_channel("y")[0].attribute != "Year")
def test_filter(): df = pd.read_csv("lux/data/car.csv") # change pandas dtype for the column "Year" to datetype df["Year"] = pd.to_datetime(df["Year"], format="%Y") intent = [ lux.Clause(attribute="Horsepower"), lux.Clause(attribute="Year"), lux.Clause(attribute="Origin", filter_op="=", value="USA"), ] vis = Vis(intent, df) vis._vis_data = df PandasExecutor.execute_filter(vis) assert len(vis.data) == len(df[df["Origin"] == "USA"])
def set_executor_type(self, exe): if (exe =="SQL"): import pkgutil if (pkgutil.find_loader("psycopg2") is None): raise ImportError("psycopg2 is not installed. Run `pip install psycopg2' to install psycopg2 to enable the Postgres connection.") else: import psycopg2 from lux.executor.SQLExecutor import SQLExecutor self.executor = SQLExecutor else: from lux.executor.PandasExecutor import PandasExecutor self.executor = PandasExecutor() self.executor_type = exe
def test_filter(): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime( df["Year"], format='%Y') # change pandas dtype for the column "Year" to datetype df.set_intent([ lux.Clause(attribute="Horsepower"), lux.Clause(attribute="Year"), lux.Clause(attribute="Origin", filter_op="=", value="USA") ]) vis = df.current_vis[0] vis.data = df PandasExecutor.execute_filter(vis) assert len(vis.data) == len(df[df["Origin"] == "USA"])
def test_filter(): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime( df["Year"], format='%Y') # change pandas dtype for the column "Year" to datetype df.setContext([ lux.Spec(attribute="Horsepower"), lux.Spec(attribute="Year"), lux.Spec(attribute="Origin", filterOp="=", value="USA") ]) view = df.viewCollection[0] view.data = df PandasExecutor.executeFilter(view) assert len(view.data) == len(df[df["Origin"] == "USA"])
def test_period_to_altair(): chart = None df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") df["Year"] = pd.DatetimeIndex(df["Year"]).to_period(freq="A") df.set_intent([lux.Clause(attribute="Acceleration"), lux.Clause(attribute="Horsepower")]) PandasExecutor.execute(df.current_vis, df) df._repr_html_() exported_code = df.recommendation["Filter"][2].to_Altair() assert "Year = 1972" in exported_code
def test_period_filter(): ldf = pd.read_csv("lux/data/car.csv") ldf["Year"] = pd.to_datetime(ldf["Year"], format='%Y') ldf["Year"] = pd.DatetimeIndex(ldf["Year"]).to_period(freq='A') ldf.set_context( [lux.Spec(attribute="Acceleration"), lux.Spec(attribute="Horsepower")]) PandasExecutor.execute(ldf.current_view, ldf) ldf.show_more() assert isinstance(ldf.recommendation['Filter'][2].spec_lst[2].value, pd.Period)
def test_color_scatter_code_export(global_var): df = pytest.car_df vis = Vis([ lux.Clause("Horsepower"), lux.Clause("Acceleration"), lux.Clause("Origin") ], df) PandasExecutor.execute([vis], df) code = vis.to_code("python") try: exec(code, globals()) create_chart_data(df, vis) except: assert False
def __init__(self, *args, **kw): self._history = History() self._intent = [] self._inferred_intent = [] self._recommendation = {} self._saved_export = None self._current_vis = [] self._prev = None self._widget = None super(LuxDataFrame, self).__init__(*args, **kw) self.table_name = "" if lux.config.SQLconnection == "": from lux.executor.PandasExecutor import PandasExecutor lux.config.executor = PandasExecutor() else: from lux.executor.SQLExecutor import SQLExecutor lux.config.executor = SQLExecutor() self._sampled = None self._toggle_pandas_display = True self._message = Message() self._pandas_only = False # Metadata self._data_type = {} self.unique_values = None self.cardinality = None self._min_max = None self.pre_aggregated = None self._type_override = {} warnings.formatwarning = lux.warning_format
def test_period_selection(): ldf = pd.read_csv("lux/data/car.csv") ldf["Year"] = pd.to_datetime(ldf["Year"], format="%Y") ldf["Year"] = pd.DatetimeIndex(ldf["Year"]).to_period(freq="A") ldf.set_intent( [ lux.Clause(attribute=["Horsepower", "Weight", "Acceleration"]), lux.Clause(attribute="Year"), ] ) PandasExecutor.execute(ldf.current_vis, ldf) assert all([type(vlist.data) == lux.core.frame.LuxDataFrame for vlist in ldf.current_vis]) assert all(ldf.current_vis[2].data.columns == ["Year", "Acceleration"])
def test_period_to_altair(): chart = None df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format='%Y') df["Year"] = pd.DatetimeIndex(df["Year"]).to_period(freq='A') df.set_context( [lux.Spec(attribute="Acceleration"), lux.Spec(attribute="Horsepower")]) PandasExecutor.execute(df.current_view, df) df.show_more() exported_code = df.recommendation['Filter'][2].to_Altair() assert 'Year = 1971' in exported_code
def test_heatmap_code_export(global_var): df = pd.read_csv( "https://raw.githubusercontent.com/lux-org/lux-datasets/master/data/airbnb_nyc.csv" ) lux.config._heatmap_start = 100 vis = Vis(["price", "longitude"], df) PandasExecutor.execute([vis], df) code = vis.to_code("python") try: exec(code, globals()) create_chart_data(df, vis) except: assert False lux.config._heatmap_start = 5000
def test_selection(): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime( df["Year"], format='%Y') # change pandas dtype for the column "Year" to datetype df.setContext([ lux.Spec(attribute=["Horsepower", "Weight", "Acceleration"]), lux.Spec(attribute="Year") ]) PandasExecutor.execute(df.viewCollection, df) assert all([ type(vc.data) == lux.luxDataFrame.LuxDataframe.LuxDataFrame for vc in df.viewCollection ]) assert all(df.viewCollection[2].data.columns == ["Year", 'Acceleration'])
def test_period_selection(): ldf = pd.read_csv("lux/data/car.csv") ldf["Year"] = pd.to_datetime(ldf["Year"], format='%Y') ldf["Year"] = pd.DatetimeIndex(ldf["Year"]).to_period(freq='A') ldf.set_context([ lux.Spec(attribute=["Horsepower", "Weight", "Acceleration"]), lux.Spec(attribute="Year") ]) PandasExecutor.execute(ldf.current_view, ldf) assert all([ type(vc.data) == lux.luxDataFrame.LuxDataframe.LuxDataFrame for vc in ldf.current_view ]) assert all(ldf.current_view[2].data.columns == ["Year", 'Acceleration'])
def test_filter_aggregation_fillzero_aligned(): df = pd.read_csv("lux/data/car.csv") df.setContext([ lux.Spec(attribute="Cylinders"), lux.Spec(attribute="MilesPerGal"), lux.Spec("Origin=Japan") ]) PandasExecutor.execute(df.viewCollection, df) result = df.viewCollection[0].data externalValidation = df[df["Origin"] == "Japan"].groupby( "Cylinders").mean()["MilesPerGal"] assert result[result["Cylinders"] == 5]["MilesPerGal"].values[0] == 0 assert result[result["Cylinders"] == 8]["MilesPerGal"].values[0] == 0 assert result[result["Cylinders"] == 3]["MilesPerGal"].values[0] == externalValidation[3] assert result[result["Cylinders"] == 4]["MilesPerGal"].values[0] == externalValidation[4] assert result[result["Cylinders"] == 6]["MilesPerGal"].values[0] == externalValidation[6]
def set_executor_type(self, exe): if exe == "SQL": from lux.executor.SQLExecutor import SQLExecutor self.executor = SQLExecutor() elif exe == "Pandas": from lux.executor.PandasExecutor import PandasExecutor self.SQLconnection = "" self.executor = PandasExecutor() else: raise ValueError("Executor type must be either 'Pandas' or 'SQL'")