def intent(self, inten): for intent_input in inten: if isinstance(intent_input, lux.Clause): is_list_input = isinstance(inten, list) is_vis_input = isinstance(inten, Vis) if not (is_list_input or is_vis_input): raise TypeError( "Input intent must be either a list (of strings or lux.Clause) or a lux.Vis object." "\nSee more at: https://lux-api.readthedocs.io/en/latest/source/guide/intent.html" ) if is_list_input: self.set_intent(inten) elif is_vis_input: self.set_intent_as_vis(inten) elif isinstance(intent_input, str): if len(intent_input) <= 1: inten = [lux.Clause(f"{inten}")] is_list_input = isinstance(inten, list) is_vis_input = isinstance(inten, Vis) if is_list_input: self.set_intent(inten) elif is_vis_input: self.set_intent_as_vis(inten) break else: intent_input = [lux.Clause(f"{intent_input}")] is_list_input = isinstance(intent_input, list) is_vis_input = isinstance(intent_input, Vis) if is_list_input: self.set_intent(intent_input) elif is_vis_input: self.set_intent_as_vis(intent_input) break else: print("other")
def test_case2(): ldf = pd.read_csv("lux/data/car.csv") ldf.set_intent(["Horsepower", lux.Clause("MilesPerGal", channel="x")]) assert type(ldf._intent[0]) is lux.Clause assert ldf._intent[0].attribute == "Horsepower" assert type(ldf._intent[1]) is lux.Clause assert ldf._intent[1].attribute == "MilesPerGal"
def row_group(ldf): recommendation = { "action": "Row Groups", "description": "Shows charts of possible visualizations with respect to the row-wise index.", } collection = [] if ldf.index.nlevels == 1: if ldf.columns.name is not None: dim_name = ldf.columns.name else: dim_name = "index" for row_id in range(len(ldf)): row = ldf.iloc[row_id, ] rowdf = row.reset_index() # if (dim_name =="index"): #TODO: need to change this to auto-detect # rowdf.data_type_lookup["index"]="nominal" # rowdf.data_model_lookup["index"]="dimension" # rowdf.cardinality["index"]=len(rowdf) # if isinstance(ldf.columns,pd.DatetimeIndex): # rowdf.data_type_lookup[dim_name]="temporal" vis = Vis([ dim_name, lux.Clause(row.name, data_model="measure", aggregation=None) ], rowdf) collection.append(vis) vlst = VisList(collection) # Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated data recommendation["collection"] = vlst return recommendation
def column_group(ldf): recommendation = { "action": "Column Groups", "description": "Shows charts of possible visualizations with respect to the column-wise index." } collection = [] ldf_flat = ldf if isinstance(ldf.columns, pd.DatetimeIndex): ldf_flat.columns = ldf_flat.columns.format() ldf_flat = ldf_flat.reset_index( ) #use a single shared ldf_flat so that metadata doesn't need to be computed for every vis if (ldf.index.nlevels == 1): index_column_name = ldf.index.name if isinstance(ldf.columns, pd.DatetimeIndex): ldf.columns = ldf.columns.to_native_types() for attribute in ldf.columns: vis = Vis([ index_column_name, lux.Clause(str(attribute), aggregation=None) ], ldf_flat) collection.append(vis) vlst = VisList(collection) # Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated ldf recommendation["collection"] = vlst return recommendation
def test_case2(global_var): df = pytest.car_df df.set_intent(["Horsepower", lux.Clause("MilesPerGal", channel="x")]) assert type(df._intent[0]) is lux.Clause assert df._intent[0].attribute == "Horsepower" assert type(df._intent[1]) is lux.Clause assert df._intent[1].attribute == "MilesPerGal" df.clear_intent()
def column_group(ldf): recommendation = { "action": "Column Groups", "description": "Shows charts of possible visualizations with respect to the column-wise index.", } collection = [] ldf_flat = ldf if isinstance(ldf.columns, pd.DatetimeIndex): ldf_flat.columns = ldf_flat.columns.format() # use a single shared ldf_flat so that metadata doesn't need to be computed for every vis ldf_flat = ldf_flat.reset_index() if ldf.index.nlevels == 1: if ldf.index.name: index_column_name = ldf.index.name else: index_column_name = "index" if isinstance(ldf.columns, pd.DatetimeIndex): ldf.columns = ldf.columns.to_native_types() for attribute in ldf.columns: if ldf[attribute].dtype != "object" and (attribute != "index"): vis = Vis([ lux.Clause( attribute=index_column_name, data_type="nominal", data_model="dimension", aggregation=None, ), lux.Clause( attribute=str(attribute), data_type="quantitative", data_model="measure", aggregation=None, ), ]) collection.append(vis) vlst = VisList(collection, ldf_flat) # Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated ldf recommendation["collection"] = vlst return recommendation
def random_categorical(ldf): intent = [lux.Clause("?", data_type="nominal")] vlist = VisList(intent, ldf) for vis in vlist: vis.score = 10 vlist = vlist.topK(15) return { "action": "bars", "description": "Random list of Bar charts", "collection": vlist }
def row_group(ldf): recommendation = { "action": "Row Groups", "description": "Shows charts of possible visualizations with respect to the row-wise index.", "long_description": 'A row index can be thought of as an extra row that indicates the values that the user is interested in. \ Lux focuses on visualizing named dataframe indices, i.e., indices with a non-null name property, as a proxy of the attribute \ that the user is interested in or have operated on (e.g., group-by attribute). In particular, dataframes with named indices \ are often pre-aggregated, so Lux visualizes exactly the values that the dataframe portrays. \ <a href="https://lux-api.readthedocs.io/en/latest/source/advanced/indexgroup.html" target="_blank">More details</a>', } collection = [] if ldf.index.nlevels == 1: if ldf.columns.name is not None: dim_name = ldf.columns.name else: dim_name = "index" for row_id in range(len(ldf)): row = ldf.iloc[row_id, ] rowdf = row.reset_index() # if (dim_name =="index"): #TODO: need to change this to auto-detect # rowdf.data_type_lookup["index"]="nominal" # rowdf.data_model_lookup["index"]="dimension" # rowdf.cardinality["index"]=len(rowdf) # if isinstance(ldf.columns,pd.DatetimeIndex): # rowdf.data_type_lookup[dim_name]="temporal" vis = Vis( [ dim_name, lux.Clause( row.name, data_model="measure", aggregation=None), ], rowdf, ) collection.append(vis) vlst = VisList(collection) # Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated data recommendation["collection"] = vlst return recommendation
def row_group(ldf): #for benchmarking if ldf.toggle_benchmarking == True: tic = time.perf_counter() recommendation = { "action": "Row Groups", "description": "Shows charts of possible visualizations with respect to the row-wise index." } collection = [] if (ldf.index.nlevels == 1): if (ldf.columns.name is not None): dim_name = ldf.columns.name else: dim_name = "index" for row_id in range(len(ldf)): row = ldf.iloc[row_id, ] rowdf = row.reset_index() # if (dim_name =="index"): #TODO: need to change this to auto-detect # rowdf.data_type_lookup["index"]="nominal" # rowdf.data_model_lookup["index"]="dimension" # rowdf.cardinality["index"]=len(rowdf) if isinstance(ldf.columns, pd.DatetimeIndex): rowdf.data_type_lookup[dim_name] = "temporal" vis = Vis( [dim_name, lux.Clause(row.name, aggregation=None)], rowdf) collection.append(vis) vlst = VisList(collection) # Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated data recommendation["collection"] = vlst #for benchmarking if ldf.toggle_benchmarking == True: toc = time.perf_counter() print(f"Performed enhance action in {toc - tic:0.4f} seconds") return recommendation
def column_group(ldf): #for benchmarking if ldf.toggle_benchmarking == True: tic = time.perf_counter() recommendation = {"action":"Column Groups", "description":"Shows charts of possible visualizations with respect to the column-wise index."} collection = [] data = ldf.copy() if (ldf.index.nlevels==1): index_column_name = ldf.index.name if isinstance(ldf.columns,pd.DatetimeIndex): data.columns = ldf.columns.to_native_types() for attribute in data.columns: vis = Vis([index_column_name,lux.Clause(str(attribute),aggregation=None)],data[attribute].reset_index()) collection.append(vis) vlst = VisList(collection) # Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated data recommendation["collection"] = vlst #for benchmarking if ldf.toggle_benchmarking == True: toc = time.perf_counter() print(f"Performed enhance action in {toc - tic:0.4f} seconds") return recommendation
def add_filter(ldf): """ Iterates over all possible values of a categorical variable and generates visualizations where each categorical value filters the data. Parameters ---------- ldf : lux.core.frame LuxDataFrame with underspecified intent. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Filter action. """ filters = utils.get_filter_specs(ldf._intent) filter_values = [] output = [] # if fltr is specified, create visualizations where data is filtered by all values of the fltr's categorical # variable column_spec = utils.get_attrs_specs(ldf.current_vis[0].intent) column_spec_attr = list(map(lambda x: x.attribute, column_spec)) if len(filters) == 1: # get unique values for all categorical values specified and creates corresponding filters fltr = filters[0] if ldf.data_type[fltr.attribute] == "nominal": recommendation = { "action": "Filter", "description": f"Changing the <p class='highlight-intent'>{fltr.attribute}</p> filter to an " f"alternative value.", "long_description": f"Swap out the filter value for {fltr.attribute} to other possible values, while " f"keeping all else the same. Visualizations are ranked based on interestingness", } unique_values = ldf.unique_values[fltr.attribute] filter_values.append(fltr.value) # creates vis with new filters for val in unique_values: if val not in filter_values: new_spec = column_spec.copy() new_filter = lux.Clause(attribute=fltr.attribute, value=val) new_spec.append(new_filter) temp_vis = Vis(new_spec) output.append(temp_vis) elif ldf.data_type[fltr.attribute] == "quantitative": recommendation = { "action": "Filter", "description": f"Changing the <p class='highlight-intent'>{fltr.attribute}</p> filter to an " f"alternative inequality operation.", "long_description": f"Changing the <p class='highlight-intent'>{fltr.attribute}</p> filter to an " f"alternative inequality operation.", } # Create vis with complementary filter operations # NOTE: This section of code has been modified to allow for the rendering of multiple vis for op in get_complementary_ops(fltr.filter_op): new_spec = column_spec.copy() new_filter = lux.Clause( attribute=fltr.attribute, filter_op=op, value=fltr.value, ) new_spec.append(new_filter) temp_vis = Vis(new_spec, score=1) output.append(temp_vis) # if no existing filters, create filters using unique values from all categorical variables in the dataset else: intended_attrs = ", ".join( [ str(clause.attribute) for clause in ldf._intent if clause.value == "" and clause.attribute != "Record" ] ) recommendation = { "action": "Filter", "description": f"Applying filters to the <p class='highlight-intent'>{intended_attrs}</p> intent.", "long_description": f"Adding any filter while keeping the attributes on the x and y axes fixed. " f"Visualizations are ranked based on interestingness", } categorical_vars = [] for col in list(ldf.columns): # if cardinality is not too high, and attribute is not one of the X,Y (specified) column if 1 < ldf.cardinality[col] < 30 and col not in column_spec_attr: categorical_vars.append(col) for cat in categorical_vars: unique_values = ldf.unique_values[cat] for val in unique_values: new_spec = column_spec.copy() new_filter = lux.Clause(attribute=cat, filter_op="=", value=val) new_spec.append(new_filter) temp_vis = Vis(new_spec) output.append(temp_vis) if ( ldf.current_vis is not None and len(ldf.current_vis) == 1 and ldf.current_vis[0].mark == "line" and len(get_filter_specs(ldf.intent)) > 0 ): recommendation = { "action": "Similarity", "description": "Show other charts that are visually similar to the Current vis.", "long_description": "Show other charts that are visually similar to the Current vis.", } last = get_filter_specs(ldf.intent)[-1] output = ldf.intent.copy()[0:-1] # array of possible values for attribute arr = ldf[last.attribute].unique().tolist() output.append(lux.Clause(last.attribute, last.attribute, arr)) vlist = lux.vis.VisList.VisList(output, ldf) vlist_copy = lux.vis.VisList.VisList(output, ldf) for i in range(len(vlist_copy)): vlist[i].score = interestingness(vlist_copy[i], ldf) vlist.sort() vlist = vlist.showK() if recommendation["action"] == "Similarity": recommendation["collection"] = vlist[1:] else: recommendation["collection"] = vlist return recommendation
def filter(ldf): #for benchmarking if ldf.toggle_benchmarking == True: tic = time.perf_counter() ''' Iterates over all possible values of a categorical variable and generates visualizations where each categorical value filters the data. Parameters ---------- ldf : lux.luxDataFrame.LuxDataFrame LuxDataFrame with underspecified intent. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Filter action. ''' filters = utils.get_filter_specs(ldf.intent) filter_values = [] output = [] #if fltr is specified, create visualizations where data is filtered by all values of the fltr's categorical variable column_spec = utils.get_attrs_specs(ldf.current_vis[0]._inferred_intent) column_spec_attr = map(lambda x: x.attribute,column_spec) if len(filters) == 1: #get unique values for all categorical values specified and creates corresponding filters fltr = filters[0] unique_values = ldf.unique_values[fltr.attribute] filter_values.append(fltr.value) #creates views with new filters for val in unique_values: if val not in filter_values: new_spec = column_spec.copy() new_filter = lux.Clause(attribute = fltr.attribute, value = val) new_spec.append(new_filter) temp_view = Vis(new_spec) output.append(temp_view) recommendation = {"action":"Filter", "description":f"Changing the <p class='highlight-intent'>{fltr.attribute}</p> filter to an alternative value."} else: #if no existing filters, create filters using unique values from all categorical variables in the dataset intended_attrs = '<b>'+', '.join([clause.attribute for clause in ldf.intent if clause.value=='' and clause.attribute!="Record"])+'</b>' recommendation = {"action":"Filter", "description":f"Applying filters to the <p class='highlight-intent'>{intended_attrs}</p> intent."} categorical_vars = [] for col in list(ldf.columns): # if cardinality is not too high, and attribute is not one of the X,Y (specified) column if ldf.cardinality[col]<30 and col not in column_spec_attr: categorical_vars.append(col) for cat in categorical_vars: unique_values = ldf.unique_values[cat] for i in range(0, len(unique_values)): new_spec = column_spec.copy() new_filter = lux.Clause(attribute=cat, filter_op="=",value=unique_values[i]) new_spec.append(new_filter) temp_view = Vis(new_spec) output.append(temp_view) vc = lux.vis.VisList.VisList(output,ldf) for view in vc: view.score = interestingness(view,ldf) vc = vc.topK(15) recommendation["collection"] = vc #for benchmarking if ldf.toggle_benchmarking == True: toc = time.perf_counter() print(f"Performed filter action in {toc - tic:0.4f} seconds") return recommendation
def univariate(ldf, data_type_constraint="quantitative"): ''' Generates bar chart distributions of different attributes in the dataframe. Parameters ---------- ldf : lux.luxDataFrame.LuxDataFrame LuxDataFrame with underspecified intent. data_type_constraint: str Controls the type of distribution chart that will be rendered. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Distribution action. ''' import scipy.stats import numpy as np #for benchmarking if ldf.toggle_benchmarking == True: tic = time.perf_counter() filter_specs = utils.get_filter_specs(ldf.intent) ignore_rec_flag = False if (data_type_constraint == "quantitative"): intent = [lux.Clause("?", data_type="quantitative")] intent.extend(filter_specs) recommendation = { "action": "Distribution", "description": "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p> attributes." } if ( len(ldf) < 5 ): # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated) ignore_rec_flag = True elif (data_type_constraint == "nominal"): intent = [lux.Clause("?", data_type="nominal")] intent.extend(filter_specs) recommendation = { "action": "Occurrence", "description": "Show frequency of occurrence for <p class='highlight-descriptor'>categorical</p> attributes." } elif (data_type_constraint == "temporal"): intent = [lux.Clause("?", data_type="temporal")] intent.extend(filter_specs) recommendation = { "action": "Temporal", "description": "Show trends over <p class='highlight-descriptor'>time-related</p> attributes." } if ( len(ldf) < 3 ): # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated) ignore_rec_flag = True if (ignore_rec_flag): recommendation["collection"] = [] return recommendation vc = VisList(intent, ldf) for view in vc: view.score = interestingness(view, ldf) vc = vc.topK(15) recommendation["collection"] = vc #for benchmarking if ldf.toggle_benchmarking == True: toc = time.perf_counter() print(f"Performed distribution action in {toc - tic:0.4f} seconds") return recommendation
def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True): """ Generates bivariate visualizations that represent all pairwise relationships in the data. Parameters ---------- ldf : LuxDataFrame LuxDataFrame with underspecified intent. ignore_transpose: bool Boolean flag to ignore pairs of attributes whose transpose are already computed (i.e., {X,Y} will be ignored if {Y,X} is already computed) Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Correlation action. """ import numpy as np filter_specs = utils.get_filter_specs(ldf._intent) intent = [ lux.Clause("?", data_model="measure"), lux.Clause("?", data_model="measure"), ] intent.extend(filter_specs) vlist = VisList(intent, ldf) recommendation = { "action": "Correlation", "description": "Show relationships between two <p class='highlight-descriptor'>quantitative</p> attributes.", } ignore_rec_flag = False # Doesn't make sense to compute correlation if less than 4 data values if len(ldf) < 5: ignore_rec_flag = True # Then use the data populated in the vis list to compute score for vis in vlist: measures = vis.get_attr_by_data_model("measure") if len(measures) < 2: raise ValueError( f"Can not compute correlation between {[x.attribute for x in ldf.columns]} since less than 2 measure values present." ) msr1 = measures[0].attribute msr2 = measures[1].attribute if ignore_transpose: check_transpose = check_transpose_not_computed(vlist, msr1, msr2) else: check_transpose = True if check_transpose: vis.score = interestingness(vis, ldf) else: vis.score = -1 if ignore_rec_flag: recommendation["collection"] = [] return recommendation vlist.sort() vlist = vlist.showK() recommendation["collection"] = vlist return recommendation
def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True): ''' Generates bivariate visualizations that represent all pairwise relationships in the data. Parameters ---------- ldf : LuxDataFrame LuxDataFrame with underspecified intent. ignore_transpose: bool Boolean flag to ignore pairs of attributes whose transpose are already computed (i.e., {X,Y} will be ignored if {Y,X} is already computed) Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Correlation action. ''' import numpy as np # for benchmarking if ldf.toggle_benchmarking == True: tic = time.perf_counter() filter_specs = utils.get_filter_specs(ldf.intent) intent = [ lux.Clause("?", data_model="measure"), lux.Clause("?", data_model="measure") ] intent.extend(filter_specs) vc = VisList(intent, ldf) recommendation = { "action": "Correlation", "description": "Show relationships between two <p class='highlight-descriptor'>quantitative</p> attributes." } ignore_rec_flag = False if ( len(ldf) < 5 ): # Doesn't make sense to compute correlation if less than 4 data values ignore_rec_flag = True # Then use the data populated in the vis list to compute score for view in vc: measures = view.get_attr_by_data_model("measure") if len(measures) < 2: raise ValueError( f"Can not compute correlation between {[x.attribute for x in ldf.columns]} since less than 2 measure values present." ) msr1 = measures[0].attribute msr2 = measures[1].attribute if (ignore_transpose): check_transpose = check_transpose_not_computed(vc, msr1, msr2) else: check_transpose = True if (check_transpose): view.score = interestingness(view, ldf) else: view.score = -1 if (ignore_rec_flag): recommendation["collection"] = [] return recommendation vc = vc.topK(15) recommendation["collection"] = vc # for benchmarking if ldf.toggle_benchmarking == True: toc = time.perf_counter() print(f"Performed correlation action in {toc - tic:0.4f} seconds") return recommendation
def univariate(ldf, *args): """ Generates bar chart distributions of different attributes in the dataframe. Parameters ---------- ldf : lux.core.frame LuxDataFrame with underspecified intent. data_type_constraint: str Controls the type of distribution chart that will be rendered. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Distribution action. """ import numpy as np if len(args) == 0: data_type_constraint = "quantitative" else: data_type_constraint = args[0][0] filter_specs = utils.get_filter_specs(ldf._intent) ignore_rec_flag = False if data_type_constraint == "quantitative": possible_attributes = [ c for c in ldf.columns if ldf.data_type[c] == "quantitative" and ldf.cardinality[c] > 5 and c != "Number of Records" ] intent = [lux.Clause(possible_attributes)] intent.extend(filter_specs) examples = "" if len(possible_attributes) >= 1: examples = f" (e.g., {possible_attributes[0]})" recommendation = { "action": "Distribution", "description": "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p> attributes.", "long_description": f"Distribution displays univariate histogram distributions of all quantitative attributes{examples}. Visualizations are ranked from most to least skewed.", } # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated) if ldf.length < 5: ignore_rec_flag = True elif data_type_constraint == "nominal": possible_attributes = [ c for c in ldf.columns if ldf.data_type[c] == "nominal" and ldf.cardinality[c] > 5 and c != "Number of Records" ] examples = "" if len(possible_attributes) >= 1: examples = f" (e.g., {possible_attributes[0]})" intent = [lux.Clause("?", data_type="nominal")] intent.extend(filter_specs) recommendation = { "action": "Occurrence", "description": "Show frequency of occurrence for <p class='highlight-descriptor'>categorical</p> attributes.", "long_description": f"Occurence displays bar charts of counts for all categorical attributes{examples}. Visualizations are ranked from most to least uneven across the bars. ", } elif data_type_constraint == "geographical": possible_attributes = [ c for c in ldf.columns if ldf.data_type[c] == "geographical" and ldf.cardinality[c] > 5 and c != "Number of Records" ] examples = "" if len(possible_attributes) >= 1: examples = f" (e.g., {possible_attributes[0]})" intent = [ lux.Clause("?", data_type="geographical"), lux.Clause("?", data_model="measure") ] intent.extend(filter_specs) recommendation = { "action": "Geographical", "description": "Show choropleth maps of <p class='highlight-descriptor'>geographic</p> attributes", "long_description": f"Occurence displays choropleths of averages for some geographic attribute{examples}. Visualizations are ranked by diversity of the geographic attribute.", } elif data_type_constraint == "temporal": intent = [lux.Clause("?", data_type="temporal")] intent.extend(filter_specs) recommendation = { "action": "Temporal", "description": "Show trends over <p class='highlight-descriptor'>time-related</p> attributes.", "long_description": "Temporal displays line charts for all attributes related to datetimes in the dataframe.", } # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated) if ldf.length < 3: ignore_rec_flag = True if ignore_rec_flag: recommendation["collection"] = [] return recommendation vlist = VisList(intent, ldf) for vis in vlist: vis.score = interestingness(vis, ldf) vlist.sort() recommendation["collection"] = vlist return recommendation
def univariate(ldf, data_type_constraint="quantitative"): ''' Generates bar chart distributions of different attributes in the dataframe. Parameters ---------- ldf : lux.core.frame LuxDataFrame with underspecified intent. data_type_constraint: str Controls the type of distribution chart that will be rendered. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Distribution action. ''' import numpy as np filter_specs = utils.get_filter_specs(ldf._intent) ignore_rec_flag = False if (data_type_constraint == "quantitative"): intent = [ lux.Clause("?", data_type="quantitative", exclude="Number of Records") ] intent.extend(filter_specs) recommendation = { "action": "Distribution", "description": "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p> attributes." } if ( len(ldf) < 5 ): # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated) ignore_rec_flag = True elif (data_type_constraint == "nominal"): intent = [lux.Clause("?", data_type="nominal")] intent.extend(filter_specs) recommendation = { "action": "Occurrence", "description": "Show frequency of occurrence for <p class='highlight-descriptor'>categorical</p> attributes." } elif (data_type_constraint == "temporal"): intent = [lux.Clause("?", data_type="temporal")] intent.extend(filter_specs) recommendation = { "action": "Temporal", "description": "Show trends over <p class='highlight-descriptor'>time-related</p> attributes." } if ( len(ldf) < 3 ): # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated) ignore_rec_flag = True if (ignore_rec_flag): recommendation["collection"] = [] return recommendation vlist = VisList(intent, ldf) for vis in vlist: vis.score = interestingness(vis, ldf) # vlist = vlist.topK(15) # Basic visualizations should not be capped vlist.sort() recommendation["collection"] = vlist return recommendation
def univariate(ldf, *args): """ Generates bar chart distributions of different attributes in the dataframe. Parameters ---------- ldf : lux.core.frame LuxDataFrame with underspecified intent. data_type_constraint: str Controls the type of distribution chart that will be rendered. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Distribution action. """ import numpy as np if len(args) == 0: data_type_constraint = "quantitative" else: data_type_constraint = args[0][0] filter_specs = utils.get_filter_specs(ldf._intent) ignore_rec_flag = False if data_type_constraint == "quantitative": possible_attributes = [ c for c in ldf.columns if ldf.data_type[c] == "quantitative" and ldf.cardinality[c] > 5 and c != "Number of Records" ] intent = [lux.Clause(possible_attributes)] intent.extend(filter_specs) recommendation = { "action": "Distribution", "description": "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p> attributes.", } # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated) if len(ldf) < 5: ignore_rec_flag = True elif data_type_constraint == "nominal": intent = [lux.Clause("?", data_type="nominal")] intent.extend(filter_specs) recommendation = { "action": "Occurrence", "description": "Show frequency of occurrence for <p class='highlight-descriptor'>categorical</p> attributes.", } elif data_type_constraint == "temporal": intent = [lux.Clause("?", data_type="temporal")] intent.extend(filter_specs) recommendation = { "action": "Temporal", "description": "Show trends over <p class='highlight-descriptor'>time-related</p> attributes.", } # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated) if len(ldf) < 3: ignore_rec_flag = True if ignore_rec_flag: recommendation["collection"] = [] return recommendation vlist = VisList(intent, ldf) for vis in vlist: vis.score = interestingness(vis, ldf) vlist.sort() recommendation["collection"] = vlist return recommendation
def filter(ldf): """ Iterates over all possible values of a categorical variable and generates visualizations where each categorical value filters the data. Parameters ---------- ldf : lux.core.frame LuxDataFrame with underspecified intent. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Filter action. """ filters = utils.get_filter_specs(ldf._intent) filter_values = [] output = [] # if fltr is specified, create visualizations where data is filtered by all values of the fltr's categorical variable column_spec = utils.get_attrs_specs(ldf.current_vis[0]._inferred_intent) column_spec_attr = map(lambda x: x.attribute, column_spec) if len(filters) == 1: # get unique values for all categorical values specified and creates corresponding filters fltr = filters[0] if ldf.data_type_lookup[fltr.attribute] == "nominal": recommendation = { "action": "Filter", "description": f"Changing the <p class='highlight-intent'>{fltr.attribute}</p> filter to an alternative value.", } unique_values = ldf.unique_values[fltr.attribute] filter_values.append(fltr.value) # creates vis with new filters for val in unique_values: if val not in filter_values: new_spec = column_spec.copy() new_filter = lux.Clause(attribute=fltr.attribute, value=val) new_spec.append(new_filter) temp_vis = Vis(new_spec) output.append(temp_vis) elif ldf.data_type_lookup[fltr.attribute] == "quantitative": recommendation = { "action": "Filter", "description": f"Changing the <p class='highlight-intent'>{fltr.attribute}</p> filter to an alternative inequality operation.", } def get_complementary_ops(fltr_op): if fltr_op == ">": return "<=" elif fltr_op == "<": return ">=" elif fltr_op == ">=": return "<" elif fltr_op == "<=": return ">" # TODO: need to support case where fltr_op is "=" --> auto-binned ranges # Create vis with complementary filter operations new_spec = column_spec.copy() new_filter = lux.Clause( attribute=fltr.attribute, filter_op=get_complementary_ops(fltr.filter_op), value=fltr.value, ) new_spec.append(new_filter) temp_vis = Vis(new_spec, score=1) output.append(temp_vis) # if no existing filters, create filters using unique values from all categorical variables in the dataset else: intended_attrs = ", ".join([ clause.attribute for clause in ldf._intent if clause.value == "" and clause.attribute != "Record" ]) recommendation = { "action": "Filter", "description": f"Applying filters to the <p class='highlight-intent'>{intended_attrs}</p> intent.", } categorical_vars = [] for col in list(ldf.columns): # if cardinality is not too high, and attribute is not one of the X,Y (specified) column if ldf.cardinality[col] < 30 and col not in column_spec_attr: categorical_vars.append(col) for cat in categorical_vars: unique_values = ldf.unique_values[cat] for i in range(0, len(unique_values)): new_spec = column_spec.copy() new_filter = lux.Clause(attribute=cat, filter_op="=", value=unique_values[i]) new_spec.append(new_filter) temp_vis = Vis(new_spec) output.append(temp_vis) vlist = lux.vis.VisList.VisList(output, ldf) for vis in vlist: vis.score = interestingness(vis, ldf) vlist = vlist.topK(15) recommendation["collection"] = vlist return recommendation
start = time.perf_counter() vis = Vis(test, df) end = time.perf_counter() t = end - start trial.append([nPts, t, test[0], test[1]]) ################# Color Scatterplot ############################ elif (experiment == "colorscatter"): lux.config.heatmap = False for attr in [ 'host_id', 'host_name', 'neighbourhood_group', 'neighbourhood', 'room_type', 'number_of_reviews' ]: start = time.perf_counter() vis = Vis( ['price', 'minimum_nights', lux.Clause(attr, channel="color")], df) end = time.perf_counter() t = end - start trial.append([nPts, t, attr]) ################# Regular Histogram ############################ elif (experiment == "histogram"): for b in list(range(5, 205, 10)): start = time.perf_counter() vis = Vis([lux.Clause("number_of_reviews", bin_size=b)], df) end = time.perf_counter() t = end - start trial.append([nPts, t, b]) # ################# Regular bar ############################ elif (experiment == "bar"): for attr in [