def generalize(ldf): #for benchmarking if ldf.toggle_benchmarking == True: tic = time.perf_counter() ''' Generates all possible visualizations when one attribute or filter from the current vis is removed. Parameters ---------- ldf : lux.luxDataFrame.LuxDataFrame LuxDataFrame with underspecified intent. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Generalize action. ''' # takes in a dataObject and generates a list of new dataObjects, each with a single measure from the original object removed # --> return list of dataObjects with corresponding interestingness scores output = [] excluded_columns = [] attributes = list(filter(lambda x: x.value=="" and x.attribute!="Record", ldf.intent)) filters = utils.get_filter_specs(ldf.intent) fltr_str = [fltr.attribute+fltr.filter_op+str(fltr.value) for fltr in filters] attr_str = [clause.attribute for clause in attributes] intended_attrs = '<p class="highlight-intent">'+', '.join(attr_str+fltr_str)+'</p>' recommendation = {"action":"Generalize", "description":f"Remove an attribute or filter from {intended_attrs}."} # to observe a more general trend # if we do no have enough column attributes or too many, return no views. if(len(attributes)<1 or len(attributes)>4): recommendation["collection"] = [] return recommendation #for each column specification, create a copy of the ldf's view and remove the column specification #then append the view to the output if (len(attributes)>1): for clause in attributes: columns = clause.attribute if type(columns) == list: for column in columns: if column not in excluded_columns: temp_view = Vis(ldf.copy_intent(),score=1) temp_view.remove_column_from_spec(column, remove_first = True) excluded_columns.append(column) output.append(temp_view) elif type(columns) == str: if columns not in excluded_columns: temp_view = Vis(ldf.copy_intent(),score=1) temp_view.remove_column_from_spec(columns, remove_first = True) excluded_columns.append(columns) output.append(temp_view) #for each filter specification, create a copy of the ldf's current vis and remove the filter specification, #then append the view to the output for clause in filters: #new_spec = ldf.intent.copy() #new_spec.remove_column_from_spec(new_spec.attribute) temp_view = Vis(ldf.current_vis[0]._inferred_intent.copy(),source = ldf,title="Overall",score=0) temp_view.remove_filter_from_spec(clause.value) output.append(temp_view) vc = lux.vis.VisList.VisList(output,source=ldf) # Ignore interestingness sorting since Generalize yields very few vis (preserve order of remove attribute, then remove filters) # for view in vc: # view.score = interestingness(view,ldf) vc.remove_duplicates() vc.sort(remove_invalid=True) recommendation["collection"] = vc #for benchmarking if ldf.toggle_benchmarking == True: toc = time.perf_counter() print(f"Performed generalize action in {toc - tic:0.4f} seconds") return recommendation
def determine_encoding(ldf: LuxDataFrame, vis: Vis): """ Populates Vis with the appropriate mark type and channel information based on ShowMe logic Currently support up to 3 dimensions or measures Parameters ---------- ldf : lux.core.frame LuxDataFrame with underspecified intent vis : lux.vis.Vis Returns ------- None Notes ----- Implementing automatic encoding from Tableau's VizQL Mackinlay, J. D., Hanrahan, P., & Stolte, C. (2007). Show Me: Automatic presentation for visual analysis. IEEE Transactions on Visualization and Computer Graphics, 13(6), 1137–1144. https://doi.org/10.1109/TVCG.2007.70594 """ # Count number of measures and dimensions ndim = vis._ndim nmsr = vis._nmsr # preserve to add back to _inferred_intent later filters = utils.get_filter_specs(vis._inferred_intent) # Helper function (TODO: Move this into utils) def line_or_bar_or_geo(ldf, dimension: Clause, measure: Clause): dim_type = dimension.data_type # If no aggregation function is specified, then default as average if measure.aggregation == "": measure.set_aggregation("mean") if dim_type == "temporal" or dim_type == "oridinal": return "line", {"x": dimension, "y": measure} else: # unordered categorical # if cardinality large than 5 then sort bars if ldf.cardinality[dimension.attribute] > 5: dimension.sort = "ascending" if utils.like_geo(dimension.get_attr()): return "geographical", {"x": dimension, "y": measure} return "bar", {"x": measure, "y": dimension} # ShowMe logic + additional heuristics # count_col = Clause( attribute="count()", data_model="measure") count_col = Clause( attribute="Record", aggregation="count", data_model="measure", data_type="quantitative", ) auto_channel = {} if ndim == 0 and nmsr == 1: # Histogram with Count measure = vis.get_attr_by_data_model("measure", exclude_record=True)[0] if len(vis.get_attr_by_attr_name("Record")) < 0: vis._inferred_intent.append(count_col) # If no bin specified, then default as 10 if measure.bin_size == 0: measure.bin_size = 10 auto_channel = {"x": measure, "y": count_col} vis._mark = "histogram" elif ndim == 1 and (nmsr == 0 or nmsr == 1): # Line or Bar Chart if nmsr == 0: vis._inferred_intent.append(count_col) dimension = vis.get_attr_by_data_model("dimension")[0] measure = vis.get_attr_by_data_model("measure")[0] vis._mark, auto_channel = line_or_bar_or_geo( ldf, dimension, measure) elif ndim == 2 and (nmsr == 0 or nmsr == 1): # Line or Bar chart broken down by the dimension dimensions = vis.get_attr_by_data_model("dimension") d1 = dimensions[0] d2 = dimensions[1] if ldf.cardinality[d1.attribute] < ldf.cardinality[d2.attribute]: # d1.channel = "color" vis.remove_column_from_spec(d1.attribute) dimension = d2 color_attr = d1 else: # if same attribute then remove_column_from_spec will remove both dims, we only want to remove one if d1.attribute == d2.attribute: vis._inferred_intent.pop(0) else: vis.remove_column_from_spec(d2.attribute) dimension = d1 color_attr = d2 # Colored Bar/Line chart with Count as default measure if not ldf.pre_aggregated: if nmsr == 0 and not ldf.pre_aggregated: vis._inferred_intent.append(count_col) measure = vis.get_attr_by_data_model("measure")[0] vis._mark, auto_channel = line_or_bar_or_geo( ldf, dimension, measure) auto_channel["color"] = color_attr elif ndim == 0 and nmsr == 2: # Scatterplot vis._mark = "scatter" vis._inferred_intent[0].set_aggregation(None) vis._inferred_intent[1].set_aggregation(None) auto_channel = { "x": vis._inferred_intent[0], "y": vis._inferred_intent[1] } elif ndim == 1 and nmsr == 2: # Scatterplot broken down by the dimension measure = vis.get_attr_by_data_model("measure") m1 = measure[0] m2 = measure[1] vis._inferred_intent[0].set_aggregation(None) vis._inferred_intent[1].set_aggregation(None) color_attr = vis.get_attr_by_data_model("dimension")[0] vis.remove_column_from_spec(color_attr) vis._mark = "scatter" auto_channel = {"x": m1, "y": m2, "color": color_attr} elif ndim == 0 and nmsr == 3: # Scatterplot with color vis._mark = "scatter" auto_channel = { "x": vis._inferred_intent[0], "y": vis._inferred_intent[1], "color": vis._inferred_intent[2], } relevant_attributes = [ auto_channel[channel].attribute for channel in auto_channel ] relevant_min_max = dict((attr, ldf._min_max[attr]) for attr in relevant_attributes if attr != "Record" and attr in ldf._min_max) vis._min_max = relevant_min_max if auto_channel != {}: vis = Compiler.enforce_specified_channel(vis, auto_channel) vis._inferred_intent.extend( filters) # add back the preserved filters
def test_remove(global_var): df = pytest.car_df vis = Vis([lux.Clause("Horsepower"), lux.Clause("Acceleration")], df) vis.remove_column_from_spec("Horsepower", remove_first=False) assert vis._inferred_intent[0].attribute == "Acceleration"
def test_remove(): from lux.vis.Vis import Vis df = pd.read_csv("lux/data/car.csv") vis = Vis([lux.Clause("Horsepower"), lux.Clause("Acceleration")], df) vis.remove_column_from_spec("Horsepower", remove_first=False) assert vis._inferred_intent[0].attribute == "Acceleration"
def generalize(ldf): """ Generates all possible visualizations when one attribute or filter from the current vis is removed. Parameters ---------- ldf : lux.core.frame LuxDataFrame with underspecified intent. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Generalize action. """ # takes in a dataObject and generates a list of new dataObjects, each with a single measure from the original object removed # --> return list of dataObjects with corresponding interestingness scores output = [] excluded_columns = [] attributes = list(filter(lambda x: x.value == "" and x.attribute != "Record", ldf._intent)) filters = utils.get_filter_specs(ldf._intent) fltr_str = [fltr.attribute + fltr.filter_op + str(fltr.value) for fltr in filters] attr_str = [str(clause.attribute) for clause in attributes] intended_attrs = f'<p class="highlight-intent">{", ".join(attr_str + fltr_str)}</p>' recommendation = { "action": "Generalize", "description": f"Remove an attribute or filter from {intended_attrs}.", "long_description": f"Remove one aspect of the Current Vis. We can either remove an attribute or filter from {intended_attrs}.", } # to observe a more general trend # if we do no have enough column attributes or too many, return no vis. if len(attributes) < 1 or len(attributes) > 4: recommendation["collection"] = [] return recommendation # for each column specification, create a copy of the ldf's vis and remove the column specification # then append the vis to the output if len(attributes) > 1: for clause in attributes: columns = clause.attribute if type(columns) == list: for column in columns: if column not in excluded_columns: temp_vis = Vis(ldf.copy_intent(), score=1) temp_vis.remove_column_from_spec(column, remove_first=True) excluded_columns.append(column) output.append(temp_vis) else: if columns not in excluded_columns: temp_vis = Vis(ldf.copy_intent(), score=1) temp_vis.remove_column_from_spec(columns, remove_first=True) excluded_columns.append(columns) output.append(temp_vis) # for each filter specification, create a copy of the ldf's current vis and remove the filter specification, # then append the vis to the output for clause in filters: # new_spec = ldf._intent.copy() # new_spec.remove_column_from_spec(new_spec.attribute) temp_vis = Vis( ldf.current_vis[0]._inferred_intent.copy(), source=ldf, title="Overall", score=0, ) temp_vis.remove_filter_from_spec(clause.value) output.append(temp_vis) vlist = lux.vis.VisList.VisList(output, source=ldf) # Ignore interestingness sorting since Generalize yields very few vis (preserve order of remove attribute, then remove filters) # for vis in vlist: # vis.score = interestingness(vis,ldf) vlist.remove_duplicates() vlist.sort(remove_invalid=True) vlist._collection = list(filter(lambda x: x.score != -1, vlist._collection)) recommendation["collection"] = vlist return recommendation