def test_interestingness_deviation_nan(): import numpy as np dataset = [ {"date": "2017-08-25 09:06:11+00:00", "category": "A", "value": 25.0}, {"date": "2017-08-25 09:06:11+00:00", "category": "B", "value": 1.2}, {"date": "2017-08-25 09:06:11+00:00", "category": "C", "value": 1.3}, {"date": "2017-08-25 09:06:11+00:00", "category": "D", "value": 1.4}, {"date": "2017-08-25 09:06:11+00:00", "category": "E", "value": 1.5}, {"date": "2017-08-25 09:06:11+00:00", "category": "F", "value": 0.1}, {"date": np.nan, "category": "C", "value": 0.2}, {"date": np.nan, "category": "B", "value": 0.2}, {"date": np.nan, "category": "F", "value": 0.3}, {"date": np.nan, "category": "E", "value": 0.3}, {"date": np.nan, "category": "D", "value": 0.4}, {"date": np.nan, "category": "A", "value": 10.4}, {"date": "2017-07-25 15:06:11+00:00", "category": "A", "value": 15.5}, {"date": "2017-07-25 15:06:11+00:00", "category": "F", "value": 1.0}, {"date": "2017-07-25 15:06:11+00:00", "category": "B", "value": 0.1}, ] test = pd.DataFrame(dataset) from lux.vis.Vis import Vis vis = Vis(["date", "value", "category=A"], test) vis2 = Vis(["date", "value", "category=B"], test) from lux.interestingness.interestingness import interestingness smaller_diff_score = interestingness(vis, test) bigger_diff_score = interestingness(vis2, test) assert np.isclose(smaller_diff_score, 0.29, rtol=0.1) assert np.isclose(bigger_diff_score, 0.94, rtol=0.1) assert smaller_diff_score < bigger_diff_score
def test_interestingness_1_1_1(global_var): df = pytest.car_df df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.set_intent([ lux.Clause(attribute="Horsepower"), lux.Clause(attribute="Origin", filter_op="=", value="USA", bin_size=20), ]) df._repr_html_() # check that top recommended Enhance graph score is not none and that ordering makes intuitive sense assert interestingness(df.recommendation["Enhance"][0], df) != None rank1 = -1 rank2 = -1 rank3 = -1 for f in range(0, len(df.recommendation["Enhance"])): if (str(df.recommendation["Enhance"][f]._inferred_intent[2].value) == "USA" and str(df.recommendation["Enhance"] [f]._inferred_intent[1].attribute) == "Cylinders"): rank1 = f if (str(df.recommendation["Enhance"][f]._inferred_intent[2].value) == "USA" and str(df.recommendation["Enhance"] [f]._inferred_intent[1].attribute) == "Weight"): rank2 = f if (str(df.recommendation["Enhance"][f]._inferred_intent[2].value) == "USA" and str(df.recommendation["Enhance"] [f]._inferred_intent[1].attribute) == "Horsepower"): rank3 = f assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 # check for top recommended Filter graph score is not none assert interestingness(df.recommendation["Filter"][0], df) != None
def test_interestingness_0_2_0(global_var): df = pytest.car_df df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.set_intent([ lux.Clause(attribute="Horsepower"), lux.Clause(attribute="Acceleration") ]) df._repr_html_() # check that top recommended enhance graph score is not none and that ordering makes intuitive sense assert interestingness(df.recommendation["Enhance"][0], df) != None rank1 = -1 rank2 = -1 for f in range(0, len(df.recommendation["Enhance"])): if (str(df.recommendation["Enhance"][f]._inferred_intent[2].attribute) == "Origin" and str(df.recommendation["Enhance"][f].mark) == "scatter"): rank1 = f if (str(df.recommendation["Enhance"][f]._inferred_intent[2].attribute) == "Displacement" and str(df.recommendation["Enhance"][f].mark) == "scatter"): rank2 = f assert rank1 < rank2 # check that top recommended filter graph score is not none and that ordering makes intuitive sense assert interestingness(df.recommendation["Filter"][0], df) != None # check that top recommended Generalize graph score is not none assert interestingness(df.recommendation["Generalize"][0], df) != None df.clear_intent()
def test_interestingness_1_1_0(): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format='%Y') df.set_intent([lux.Clause(attribute = "Horsepower"),lux.Clause(attribute = "Year")]) df._repr_html_() #check that top recommended Enhance graph score is not none (all graphs here have same score) assert interestingness(df.recommendation['Enhance'][0],df) != None #check that top recommended filter graph score is not none and that ordering makes intuitive sense assert interestingness(df.recommendation['Filter'][0],df) != None rank1 = -1 rank2 = -1 rank3 = -1 for f in range(0, len(df.recommendation['Filter'])): vis = df.recommendation['Filter'][f] if len(vis.get_attr_by_attr_name("Cylinders"))>0: if int(vis._inferred_intent[2].value) == 6: rank1 = f if int(vis._inferred_intent[2].value) == 5: rank3 = f if len(vis.get_attr_by_attr_name("Origin"))>0: if str(vis._inferred_intent[2].value) == "Europe": rank2 = f assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 #check that top recommended generalize graph score is not none assert interestingness(df.recommendation['Filter'][0],df) != None
def test_interestingness_1_1_1(): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format='%Y') df.set_intent([ lux.Clause(attribute="Horsepower"), lux.Clause(attribute="Origin", filter_op="=", value="USA", bin_size=20) ]) df.show_more() #check that top recommended Enhance graph score is not none and that ordering makes intuitive sense assert interestingness(df.recommendation['Enhance'][0], df) != None rank1 = -1 rank2 = -1 rank3 = -1 for f in range(0, len(df.recommendation['Enhance'])): if str(df.recommendation['Enhance'] [f]._inferred_intent[2].value) == "USA" and str( df.recommendation['Enhance'] [f]._inferred_intent[1].attribute) == 'Cylinders': rank1 = f if str(df.recommendation['Enhance'][f]._inferred_intent[2].value ) == "USA" and str(df.recommendation['Enhance'][f]. _inferred_intent[1].attribute) == 'Weight': rank2 = f if str(df.recommendation['Enhance'] [f]._inferred_intent[2].value) == "USA" and str( df.recommendation['Enhance'] [f]._inferred_intent[1].attribute) == 'Horsepower': rank3 = f assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 #check for top recommended Filter graph score is not none assert interestingness(df.recommendation['Filter'][0], df) != None
def test_interestingness_0_1_0(): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format='%Y') df.set_intent([lux.Clause(attribute = "Horsepower")]) df._repr_html_() #check that top recommended enhance graph score is not none and that ordering makes intuitive sense assert interestingness(df.recommendation['Enhance'][0],df) != None rank1 = -1 rank2 = -1 rank3 = -1 for f in range(0, len(df.recommendation['Enhance'])): if df.recommendation['Enhance'][f].mark == 'scatter' and df.recommendation['Enhance'][f]._inferred_intent[1].attribute == 'Weight': rank1 = f if df.recommendation['Enhance'][f].mark == 'scatter' and df.recommendation['Enhance'][f]._inferred_intent[1].attribute == 'Acceleration': rank2 = f if df.recommendation['Enhance'][f].mark == 'line' and df.recommendation['Enhance'][f]._inferred_intent[0].attribute == 'Year': rank3 = f assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 #check that top recommended filter graph score is not none and that ordering makes intuitive sense assert interestingness(df.recommendation['Filter'][0],df) != None rank1 = -1 rank2 = -1 rank3 = -1 for f in range(0, len(df.recommendation['Filter'])): if df.recommendation['Filter'][f]._inferred_intent[2].value == 4: rank1 = f if str(df.recommendation['Filter'][f]._inferred_intent[2].value) == "Europe": rank2 = f if '1971' in str(df.recommendation['Filter'][f]._inferred_intent[2].value): rank3 = f assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3
def test_interestingness_1_1_0(global_var): lux.config.set_executor_type("Pandas") df = pytest.car_df df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.set_intent( [lux.Clause(attribute="Horsepower"), lux.Clause(attribute="Year")]) df._ipython_display_() # check that top recommended Enhance graph score is not none (all graphs here have same score) assert interestingness(df.recommendation["Enhance"][0], df) != None # check that top recommended filter graph score is not none and that ordering makes intuitive sense assert interestingness(df.recommendation["Filter"][0], df) != None rank1 = -1 rank2 = -1 rank3 = -1 for f in range(0, len(df.recommendation["Filter"])): vis = df.recommendation["Filter"][f] if len(vis.get_attr_by_attr_name("Cylinders")) > 0: if int(vis._inferred_intent[2].value) == 6: rank1 = f if int(vis._inferred_intent[2].value) == 8: rank2 = f if len(vis.get_attr_by_attr_name("Origin")) > 0: if str(vis._inferred_intent[2].value) == "Europe": rank3 = f assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 # check that top recommended generalize graph score is not none assert interestingness(df.recommendation["Filter"][0], df) != None df.clear_intent()
def filter(dobj): result = lux.Result() recommendation = {"action":"Filter", "description":"Shows possible visualizations when filtered by categorical variables in the data object's dataset."} filters = dobj.getObjByRowColType("Row") filterValues = [] output = [] #if Row is specified, create visualizations where data is filtered by all values of the Row's categorical variable if len(filters) > 0: completedFilters = [] columnSpec = dobj.getObjByRowColType("Column") #get unique values for all categorical values specified and creates corresponding filters for row in filters: if row.fAttribute not in completedFilters: uniqueValues = dobj.dataset.df[row.fAttribute].unique() filterValues.append(row.fVal) #creates new data objects with new filters for i in range(0, len(uniqueValues)): if uniqueValues[i] not in filterValues: #create new Data Object newSpec = columnSpec.copy() newFilter = lux.Row(fAttribute = row.fAttribute, fVal = uniqueValues[i]) newSpec.append(newFilter) tempDataObj = lux.DataObj(dobj.dataset, newSpec) tempDataObj.score = interestingness(tempDataObj) #recompile the new Data Object before appending to output tempDataObj.compile() output.append(tempDataObj.compiled) completedFilters.append(row.fAttribute) #if Row is not specified, create filters using unique values from all categorical variables in the dataset else: categoricalVars = dobj.dataset.dataType['categorical'] columnSpec = dobj.getObjByRowColType("Column") for cat in categoricalVars: uniqueValues = dobj.dataset.df[cat].unique() for i in range(0, len(uniqueValues)): newSpec = columnSpec.copy() newFilter = lux.Row(fAttribute = cat, fVal = uniqueValues[i]) newSpec.append(newFilter) tempDataObj = lux.DataObj(dobj.dataset, newSpec) tempDataObj.score = interestingness(tempDataObj) tempDataObj.compile() output.append(tempDataObj.compiled) outputDataObjCol = lux.DataObjCollection(output) outputDataObjCol = outputDataObjCol.topK(5) recommendation["collection"] = outputDataObjCol result.addResult(recommendation,dobj) return result
def test_interestingness_0_2_0(): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format='%Y') df.set_intent([ lux.Clause(attribute="Horsepower"), lux.Clause(attribute="Acceleration") ]) df.show_more() #check that top recommended enhance graph score is not none and that ordering makes intuitive sense assert interestingness(df.recommendation['Enhance'][0], df) != None rank1 = -1 rank2 = -1 rank3 = -1 for f in range(0, len(df.recommendation['Enhance'])): if str(df.recommendation['Enhance'] [f]._inferred_intent[2].attribute) == "Origin" and str( df.recommendation['Enhance'][f].mark) == 'scatter': rank1 = f if str(df.recommendation['Enhance'] [f]._inferred_intent[2].attribute) == "Displacement" and str( df.recommendation['Enhance'][f].mark) == 'scatter': rank2 = f if str(df.recommendation['Enhance'] [f]._inferred_intent[2].attribute) == "Year" and str( df.recommendation['Enhance'][f].mark) == 'scatter': rank3 = f assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 #check that top recommended filter graph score is not none and that ordering makes intuitive sense assert interestingness(df.recommendation['Filter'][0], df) != None rank1 = -1 rank2 = -1 rank3 = -1 for f in range(0, len(df.recommendation['Filter'])): if '1973' in str( df.recommendation['Filter'][f]._inferred_intent[2].value): rank1 = f if '1976' in str( df.recommendation['Filter'][f]._inferred_intent[2].value): rank2 = f if str(df.recommendation['Filter'] [f]._inferred_intent[2].value) == "Europe": rank3 = f assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 #check that top recommended Generalize graph score is not none assert interestingness(df.recommendation['Generalize'][0], df) != None
def user_defined(ldf): ''' Generates user-defined views based on the context. Parameters ---------- ldf : lux.luxDataFrame.LuxDataFrame LuxDataFrame with underspecified context. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Distribution action. ''' recommendation = { "action": "Current Views", "description": "Shows a view collection defined by the context" } recommendation["collection"] = ldf.current_view vc = ldf.current_view PandasExecutor.execute(vc, ldf) for view in vc: view.score = interestingness(view, ldf) # ldf.clear_context() vc.sort(remove_invalid=True) return recommendation
def test_interestingness_1_1_1(global_var): tbl = lux.LuxSQLTable() tbl.set_SQL_table("cars") tbl.set_intent( [ lux.Clause(attribute="horsepower"), lux.Clause(attribute="origin", filter_op="=", value="USA", bin_size=20), ] ) tbl._repr_html_() assert interestingness(tbl.recommendation["Enhance"][0], tbl) != None # check for top recommended Filter graph score is not none assert interestingness(tbl.recommendation["Filter"][0], tbl) != None tbl.clear_intent()
def custom(ldf): """ Generates user-defined vis based on the intent. Parameters ---------- ldf : lux.core.frame LuxDataFrame with underspecified intent. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Distribution action. """ recommendation = { "action": "Current Vis", "description": "Shows the list of visualizations generated based on user specified intent", } recommendation["collection"] = ldf.current_vis vlist = ldf.current_vis PandasExecutor.execute(vlist, ldf) for vis in vlist: vis.score = interestingness(vis, ldf) # ldf.clear_intent() vlist.sort(remove_invalid=True) return recommendation
def enhance(ldf): """ Given a set of vis, generates possible visualizations when an additional attribute is added to the current vis. Parameters ---------- ldf : lux.core.frame LuxDataFrame with underspecified intent. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Enhance action. """ filters = utils.get_filter_specs(ldf._intent) # Collect variables that already exist in the intent attr_specs = list( filter(lambda x: x.value == "" and x.attribute != "Record", ldf._intent)) fltr_str = [ fltr.attribute + fltr.filter_op + str(fltr.value) for fltr in filters ] attr_str = [str(clause.attribute) for clause in attr_specs] intended_attrs = f'<p class="highlight-intent">{", ".join(attr_str + fltr_str)}</p>' if len(attr_specs) == 1: recommendation = { "action": "Enhance", "description": f"Augmenting current {intended_attrs} intent with additional attribute.", } elif len(attr_specs) == 2: recommendation = { "action": "Enhance", "description": f"Further breaking down current {intended_attrs} intent by additional attribute.", } # if there are too many column attributes, return don't generate Enhance recommendations elif len(attr_specs) > 2: recommendation = {"action": "Enhance"} recommendation["collection"] = [] return recommendation intent = ldf._intent.copy() # Clear channel so that channel not enforced based on input vis intent for clause in intent: clause.channel = "" intent = filters + attr_specs intent.append("?") vlist = lux.vis.VisList.VisList(intent, ldf) # Then use the data populated in the vis list to compute score for vis in vlist: vis.score = interestingness(vis, ldf) vlist.sort() vlist = vlist.showK() recommendation["collection"] = vlist return recommendation
def test_interestingness_0_1_1(): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format='%Y') df.set_intent([lux.Clause(attribute = "Origin", filter_op="=",value="?"),lux.Clause(attribute = "MilesPerGal")]) df._repr_html_() assert interestingness(df.recommendation['Current Vis'][0],df) != None assert str(df.recommendation['Current Vis'][0]._inferred_intent[2].value) == 'USA'
def test_interestingness_0_2_1(): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format='%Y') df.set_intent([lux.Clause(attribute = "Horsepower"),lux.Clause(attribute = "MilesPerGal"),lux.Clause(attribute = "Acceleration", filter_op=">",value = 10)]) df._repr_html_() #check that top recommended Generalize graph score is not none assert interestingness(df.recommendation['Generalize'][0],df) != None
def enhance(dobj): result = lux.Result() recommendation = { "action": "Enhance", "description": "Shows possible visualizations when an additional attribute is added to the current view." } quantitativeVars = dobj.dataset.dataType['quantitative'] categoricalVars = dobj.dataset.dataType['categorical'] output = [] dobjVars = [] for i in range(0, len(dobj.spec)): if dobj.spec[i].className == "Column": dobjVars.append(dobj.spec[i].columnName) elif dobj.spec[i].className == "Row": dobjVars.append(dobj.spec[i].fAttribute) #go through and add additional quantitative variable for qVar in quantitativeVars: if qVar not in dobjVars: newSpec = dobj.spec.copy() newSpec.append(Column(qVar)) tempDataObj = DataObj(dobj.dataset, newSpec) tempDataObj.score = interestingness(tempDataObj) tempDataObj.compile() output.append(tempDataObj.compiled) #go through and add additional categorical variable for cVar in categoricalVars: if cVar not in dobjVars: newSpec = dobj.spec.copy() newSpec.append(Column(cVar)) tempDataObj = DataObj(dobj.dataset, newSpec) tempDataObj.score = interestingness(tempDataObj) tempDataObj.compile() output.append(tempDataObj.compiled) recommendation["collection"] = DataObjCollection(output) result.addResult(recommendation, dobj) return result
def test_interestingness_0_1_0(global_var): lux.config.set_executor_type("Pandas") df = pytest.car_df df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.set_intent([lux.Clause(attribute="Horsepower")]) df._ipython_display_() # check that top recommended enhance graph score is not none and that ordering makes intuitive sense assert interestingness(df.recommendation["Enhance"][0], df) != None rank1 = -1 rank2 = -1 rank3 = -1 for f in range(0, len(df.recommendation["Enhance"])): if (df.recommendation["Enhance"][f].mark == "scatter" and df.recommendation["Enhance"][f]._inferred_intent[1].attribute == "Weight"): rank1 = f if (df.recommendation["Enhance"][f].mark == "scatter" and df.recommendation["Enhance"][f]._inferred_intent[1].attribute == "Acceleration"): rank2 = f if (df.recommendation["Enhance"][f].mark == "line" and df.recommendation["Enhance"][f]._inferred_intent[0].attribute == "Year"): rank3 = f assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 # check that top recommended filter graph score is not none and that ordering makes intuitive sense assert interestingness(df.recommendation["Filter"][0], df) != None rank1 = -1 rank2 = -1 rank3 = -1 for f in range(0, len(df.recommendation["Filter"])): if df.recommendation["Filter"][f]._inferred_intent[2].value == 4: rank1 = f if str(df.recommendation["Filter"] [f]._inferred_intent[2].value) == "Europe": rank2 = f if "1970" in str( df.recommendation["Filter"][f]._inferred_intent[2].value): rank3 = f assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 df.clear_intent()
def test_interestingness_0_1_1(global_var): df = pytest.car_df df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.set_intent([ lux.Clause(attribute="Origin", filter_op="=", value="?"), lux.Clause(attribute="MilesPerGal"), ]) df._repr_html_() assert interestingness(df.recommendation["Current Vis"][0], df) != None assert str( df.recommendation["Current Vis"][0]._inferred_intent[2].value) == "USA"
def test_interestingness_0_2_1(global_var): df = pytest.car_df df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.set_intent([ lux.Clause(attribute="Horsepower"), lux.Clause(attribute="MilesPerGal"), lux.Clause(attribute="Acceleration", filter_op=">", value=10), ]) df._repr_html_() # check that top recommended Generalize graph score is not none assert interestingness(df.recommendation["Generalize"][0], df) != None
def test_interestingness_1_0_0(global_var): df = pytest.car_df df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.set_intent([lux.Clause(attribute="Origin")]) df._repr_html_() # check that top recommended enhance graph score is not none and that ordering makes intuitive sense assert interestingness(df.recommendation["Enhance"][0], df) != None rank1 = -1 rank2 = -1 rank3 = -1 for f in range(0, len(df.recommendation["Enhance"])): vis = df.recommendation["Enhance"][f] if vis.get_attr_by_channel("x")[0].attribute == "Displacement": rank1 = f if vis.get_attr_by_channel("x")[0].attribute == "Weight": rank2 = f if vis.get_attr_by_channel("x")[0].attribute == "Acceleration": rank3 = f assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 # check that top recommended filter graph score is not none and that ordering makes intuitive sense assert interestingness(df.recommendation["Filter"][0], df) != None rank1 = -1 rank2 = -1 rank3 = -1 for f in range(0, len(df.recommendation["Filter"])): vis = df.recommendation["Filter"][f] if len(vis.get_attr_by_attr_name("Cylinders")) > 0: if int(vis._inferred_intent[2].value) == 8: rank1 = f if int(vis._inferred_intent[2].value) == 6: rank3 = f if "ford" in str( df.recommendation["Filter"][f]._inferred_intent[2].value): rank2 = f assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 df.clear_intent()
def test_interestingness_0_1_1(global_var): tbl = lux.LuxSQLTable() tbl.set_SQL_table("cars") tbl.set_intent( [ lux.Clause(attribute="origin", filter_op="=", value="?"), lux.Clause(attribute="milespergal"), ] ) tbl._repr_html_() assert interestingness(tbl.recommendation["Current Vis"][0], tbl) != None assert str(tbl.recommendation["Current Vis"][0]._inferred_intent[2].value) == "USA" tbl.clear_intent()
def enhance(ldf): #for benchmarking if ldf.toggle_benchmarking == True: tic = time.perf_counter() ''' Given a set of views, generates possible visualizations when an additional attribute is added to the current view. Parameters ---------- ldf : lux.luxDataFrame.LuxDataFrame LuxDataFrame with underspecified context. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Enhance action. ''' recommendation = { "action": "Enhance", "description": "Shows possible visualizations when an additional attribute is added to the current view." } filters = utils.get_filter_specs(ldf.context) # Collect variables that already exist in the context attr_specs = list( filter(lambda x: x.value == "" and x.attribute != "Record", ldf.context)) if ( len(attr_specs) > 2 ): # if there are too many column attributes, return don't generate Enhance recommendations recommendation["collection"] = [] return recommendation query = ldf.context.copy() query = filters + attr_specs query.append("?") vc = lux.view.ViewCollection.ViewCollection(query) vc = vc.load(ldf) # Then use the data populated in the view collection to compute score for view in vc: view.score = interestingness(view, ldf) vc = vc.topK(15) recommendation["collection"] = vc #for benchmarking if ldf.toggle_benchmarking == True: toc = time.perf_counter() print(f"Performed enhance action in {toc - tic:0.4f} seconds") return recommendation
def test_interestingness_1_0_0(): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format='%Y') df.set_intent([lux.Clause(attribute="Origin")]) df.show_more() #check that top recommended enhance graph score is not none and that ordering makes intuitive sense assert interestingness(df.recommendation['Enhance'][0], df) != None rank1 = -1 rank2 = -1 rank3 = -1 for f in range(0, len(df.recommendation['Enhance'])): if df.recommendation['Enhance'][f]._inferred_intent[ 0].attribute == 'Displacement': rank1 = f if df.recommendation['Enhance'][f]._inferred_intent[ 0].attribute == 'Weight': rank2 = f if df.recommendation['Enhance'][f]._inferred_intent[ 0].attribute == 'Acceleration': rank3 = f assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 #check that top recommended filter graph score is not none and that ordering makes intuitive sense assert interestingness(df.recommendation['Filter'][0], df) != None rank1 = -1 rank2 = -1 rank3 = -1 for f in range(0, len(df.recommendation['Filter'])): if int(df.recommendation['Filter'][f]._inferred_intent[2].value) == 8: rank1 = f if int(df.recommendation['Filter'][f]._inferred_intent[2].value) == 6: rank2 = f if '1972' in str( df.recommendation['Filter'][f]._inferred_intent[2].value): rank3 = f assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3
def test_interestingness_1_2_0(): from lux.vis.Vis import Vis from lux.vis.Vis import Clause from lux.interestingness.interestingness import interestingness df = pd.read_csv("lux/data/car.csv") y_clause = Clause(attribute = "Name", channel = "y") color_clause = Clause(attribute = 'Cylinders', channel = "color") new_vis = Vis([y_clause, color_clause]) new_vis.refresh_source(df) new_vis #assert(len(new_vis.data)==color_cardinality*group_by_cardinality) assert(interestingness(new_vis, df)<0.01)
def test_interestingness_1_2_0(global_var): from lux.vis.Vis import Vis from lux.vis.Vis import Clause from lux.interestingness.interestingness import interestingness df = pytest.car_df y_clause = Clause(attribute="Name", channel="y") color_clause = Clause(attribute="Cylinders", channel="color") new_vis = Vis([y_clause, color_clause]) new_vis.refresh_source(df) new_vis # assert(len(new_vis.data)==color_cardinality*group_by_cardinality) assert interestingness(new_vis, df) < 0.01
def distribution(ldf,dataTypeConstraint="quantitative"): ''' Generates bar chart distributions of different attributes in the dataset. Parameters ---------- ldf : lux.luxDataFrame.LuxDataFrame LuxDataFrame with underspecified context. dataTypeConstraint: str The variable that controls the type of distribution chart that will be rendered. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Distribution action. ''' import scipy.stats import numpy as np #for benchmarking if ldf.toggleBenchmarking == True: tic = time.perf_counter() if (dataTypeConstraint=="quantitative"): query = [lux.Spec("?",dataType="quantitative")] query.extend(ldf.filterSpecs) recommendation = {"action":"Distribution", "description":"Show univariate count distributions of different attributes in the dataset."} elif (dataTypeConstraint=="nominal"): query = [lux.Spec("?",dataType="nominal")] query.extend(ldf.filterSpecs) recommendation = {"action":"Category", "description":"Show bar chart distributions of different attributes in the dataset."} vc = ViewCollection(query) vc = vc.load(ldf) for view in vc: view.score = interestingness(view,ldf) vc = vc.topK(15) recommendation["collection"] = vc #for benchmarking if ldf.toggleBenchmarking == True: toc = time.perf_counter() print(f"Performed distribution action in {toc - tic:0.4f} seconds") return recommendation
def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True): """ Generates bivariate visualizations that represent all pairwise relationships in the data. Parameters ---------- ldf : LuxDataFrame LuxDataFrame with underspecified intent. ignore_transpose: bool Boolean flag to ignore pairs of attributes whose transpose are already computed (i.e., {X,Y} will be ignored if {Y,X} is already computed) Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Correlation action. """ import numpy as np filter_specs = utils.get_filter_specs(ldf._intent) intent = [ lux.Clause("?", data_model="measure"), lux.Clause("?", data_model="measure"), ] intent.extend(filter_specs) vlist = VisList(intent, ldf) recommendation = { "action": "Correlation", "description": "Show relationships between two <p class='highlight-descriptor'>quantitative</p> attributes.", } ignore_rec_flag = False # Doesn't make sense to compute correlation if less than 4 data values if len(ldf) < 5: ignore_rec_flag = True # Then use the data populated in the vis list to compute score for vis in vlist: measures = vis.get_attr_by_data_model("measure") if len(measures) < 2: raise ValueError( f"Can not compute correlation between {[x.attribute for x in ldf.columns]} since less than 2 measure values present." ) msr1 = measures[0].attribute msr2 = measures[1].attribute if ignore_transpose: check_transpose = check_transpose_not_computed(vlist, msr1, msr2) else: check_transpose = True if check_transpose: vis.score = interestingness(vis, ldf) else: vis.score = -1 if ignore_rec_flag: recommendation["collection"] = [] return recommendation vlist.sort() vlist = vlist.showK() recommendation["collection"] = vlist return recommendation
def enhance(ldf): #for benchmarking if ldf.toggle_benchmarking == True: tic = time.perf_counter() ''' Given a set of views, generates possible visualizations when an additional attribute is added to the current vis. Parameters ---------- ldf : lux.luxDataFrame.LuxDataFrame LuxDataFrame with underspecified intent. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Enhance action. ''' filters = utils.get_filter_specs(ldf.intent) # Collect variables that already exist in the intent attr_specs = list( filter(lambda x: x.value == "" and x.attribute != "Record", ldf.intent)) fltr_str = [ fltr.attribute + fltr.filter_op + str(fltr.value) for fltr in filters ] attr_str = [clause.attribute for clause in attr_specs] intended_attrs = '<p class="highlight-intent">' + ', '.join( attr_str + fltr_str) + '</p>' if (len(attr_specs) == 1): recommendation = { "action": "Enhance", "description": f"Augmenting current {intended_attrs} intent with additional attribute." } elif (len(attr_specs) == 2): recommendation = { "action": "Enhance", "description": f"Further breaking down current {intended_attrs} intent by additional attribute." } elif ( len(attr_specs) > 2 ): # if there are too many column attributes, return don't generate Enhance recommendations recommendation = {"action": "Enhance"} recommendation["collection"] = [] return recommendation intent = ldf.intent.copy() intent = filters + attr_specs intent.append("?") vc = lux.vis.VisList.VisList(intent, ldf) # Then use the data populated in the vis list to compute score for view in vc: view.score = interestingness(view, ldf) vc = vc.topK(15) recommendation["collection"] = vc #for benchmarking if ldf.toggle_benchmarking == True: toc = time.perf_counter() print(f"Performed enhance action in {toc - tic:0.4f} seconds") return recommendation
def filter(ldf): """ Iterates over all possible values of a categorical variable and generates visualizations where each categorical value filters the data. Parameters ---------- ldf : lux.core.frame LuxDataFrame with underspecified intent. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Filter action. """ filters = utils.get_filter_specs(ldf._intent) filter_values = [] output = [] # if fltr is specified, create visualizations where data is filtered by all values of the fltr's categorical variable column_spec = utils.get_attrs_specs(ldf.current_vis[0]._inferred_intent) column_spec_attr = map(lambda x: x.attribute, column_spec) if len(filters) == 1: # get unique values for all categorical values specified and creates corresponding filters fltr = filters[0] if ldf.data_type_lookup[fltr.attribute] == "nominal": recommendation = { "action": "Filter", "description": f"Changing the <p class='highlight-intent'>{fltr.attribute}</p> filter to an alternative value.", } unique_values = ldf.unique_values[fltr.attribute] filter_values.append(fltr.value) # creates vis with new filters for val in unique_values: if val not in filter_values: new_spec = column_spec.copy() new_filter = lux.Clause(attribute=fltr.attribute, value=val) new_spec.append(new_filter) temp_vis = Vis(new_spec) output.append(temp_vis) elif ldf.data_type_lookup[fltr.attribute] == "quantitative": recommendation = { "action": "Filter", "description": f"Changing the <p class='highlight-intent'>{fltr.attribute}</p> filter to an alternative inequality operation.", } def get_complementary_ops(fltr_op): if fltr_op == ">": return "<=" elif fltr_op == "<": return ">=" elif fltr_op == ">=": return "<" elif fltr_op == "<=": return ">" # TODO: need to support case where fltr_op is "=" --> auto-binned ranges # Create vis with complementary filter operations new_spec = column_spec.copy() new_filter = lux.Clause( attribute=fltr.attribute, filter_op=get_complementary_ops(fltr.filter_op), value=fltr.value, ) new_spec.append(new_filter) temp_vis = Vis(new_spec, score=1) output.append(temp_vis) # if no existing filters, create filters using unique values from all categorical variables in the dataset else: intended_attrs = ", ".join([ clause.attribute for clause in ldf._intent if clause.value == "" and clause.attribute != "Record" ]) recommendation = { "action": "Filter", "description": f"Applying filters to the <p class='highlight-intent'>{intended_attrs}</p> intent.", } categorical_vars = [] for col in list(ldf.columns): # if cardinality is not too high, and attribute is not one of the X,Y (specified) column if ldf.cardinality[col] < 30 and col not in column_spec_attr: categorical_vars.append(col) for cat in categorical_vars: unique_values = ldf.unique_values[cat] for i in range(0, len(unique_values)): new_spec = column_spec.copy() new_filter = lux.Clause(attribute=cat, filter_op="=", value=unique_values[i]) new_spec.append(new_filter) temp_vis = Vis(new_spec) output.append(temp_vis) vlist = lux.vis.VisList.VisList(output, ldf) for vis in vlist: vis.score = interestingness(vis, ldf) vlist = vlist.topK(15) recommendation["collection"] = vlist return recommendation
def univariate(ldf, data_type_constraint="quantitative"): ''' Generates bar chart distributions of different attributes in the dataframe. Parameters ---------- ldf : lux.core.frame LuxDataFrame with underspecified intent. data_type_constraint: str Controls the type of distribution chart that will be rendered. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Distribution action. ''' import numpy as np filter_specs = utils.get_filter_specs(ldf._intent) ignore_rec_flag = False if (data_type_constraint == "quantitative"): intent = [ lux.Clause("?", data_type="quantitative", exclude="Number of Records") ] intent.extend(filter_specs) recommendation = { "action": "Distribution", "description": "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p> attributes." } if ( len(ldf) < 5 ): # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated) ignore_rec_flag = True elif (data_type_constraint == "nominal"): intent = [lux.Clause("?", data_type="nominal")] intent.extend(filter_specs) recommendation = { "action": "Occurrence", "description": "Show frequency of occurrence for <p class='highlight-descriptor'>categorical</p> attributes." } elif (data_type_constraint == "temporal"): intent = [lux.Clause("?", data_type="temporal")] intent.extend(filter_specs) recommendation = { "action": "Temporal", "description": "Show trends over <p class='highlight-descriptor'>time-related</p> attributes." } if ( len(ldf) < 3 ): # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated) ignore_rec_flag = True if (ignore_rec_flag): recommendation["collection"] = [] return recommendation vlist = VisList(intent, ldf) for vis in vlist: vis.score = interestingness(vis, ldf) # vlist = vlist.topK(15) # Basic visualizations should not be capped vlist.sort() recommendation["collection"] = vlist return recommendation