def random_categorical(ldf): intent = [lux.Clause("?", data_type="nominal")] vlist = VisList(intent, ldf) for vis in vlist: vis.score = 10 vlist.sort() vlist = vlist.showK() return { "action": "bars", "description": "Random list of Bar charts", "collection": vlist, }
def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True): """ Generates bivariate visualizations that represent all pairwise relationships in the data. Parameters ---------- ldf : LuxDataFrame LuxDataFrame with underspecified intent. ignore_transpose: bool Boolean flag to ignore pairs of attributes whose transpose are already computed (i.e., {X,Y} will be ignored if {Y,X} is already computed) Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Correlation action. """ import numpy as np filter_specs = utils.get_filter_specs(ldf._intent) intent = [ lux.Clause("?", data_model="measure"), lux.Clause("?", data_model="measure"), ] intent.extend(filter_specs) vlist = VisList(intent, ldf) recommendation = { "action": "Correlation", "description": "Show relationships between two <p class='highlight-descriptor'>quantitative</p> attributes.", } ignore_rec_flag = False # Doesn't make sense to compute correlation if less than 4 data values if len(ldf) < 5: ignore_rec_flag = True # Then use the data populated in the vis list to compute score for vis in vlist: measures = vis.get_attr_by_data_model("measure") if len(measures) < 2: raise ValueError( f"Can not compute correlation between {[x.attribute for x in ldf.columns]} since less than 2 measure values present." ) msr1 = measures[0].attribute msr2 = measures[1].attribute if ignore_transpose: check_transpose = check_transpose_not_computed(vlist, msr1, msr2) else: check_transpose = True if check_transpose: vis.score = interestingness(vis, ldf) else: vis.score = -1 if ignore_rec_flag: recommendation["collection"] = [] return recommendation vlist.sort() vlist = vlist.showK() recommendation["collection"] = vlist return recommendation
def univariate(ldf, data_type_constraint="quantitative"): ''' Generates bar chart distributions of different attributes in the dataframe. Parameters ---------- ldf : lux.core.frame LuxDataFrame with underspecified intent. data_type_constraint: str Controls the type of distribution chart that will be rendered. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Distribution action. ''' import numpy as np filter_specs = utils.get_filter_specs(ldf._intent) ignore_rec_flag = False if (data_type_constraint == "quantitative"): intent = [ lux.Clause("?", data_type="quantitative", exclude="Number of Records") ] intent.extend(filter_specs) recommendation = { "action": "Distribution", "description": "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p> attributes." } if ( len(ldf) < 5 ): # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated) ignore_rec_flag = True elif (data_type_constraint == "nominal"): intent = [lux.Clause("?", data_type="nominal")] intent.extend(filter_specs) recommendation = { "action": "Occurrence", "description": "Show frequency of occurrence for <p class='highlight-descriptor'>categorical</p> attributes." } elif (data_type_constraint == "temporal"): intent = [lux.Clause("?", data_type="temporal")] intent.extend(filter_specs) recommendation = { "action": "Temporal", "description": "Show trends over <p class='highlight-descriptor'>time-related</p> attributes." } if ( len(ldf) < 3 ): # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated) ignore_rec_flag = True if (ignore_rec_flag): recommendation["collection"] = [] return recommendation vlist = VisList(intent, ldf) for vis in vlist: vis.score = interestingness(vis, ldf) # vlist = vlist.topK(15) # Basic visualizations should not be capped vlist.sort() recommendation["collection"] = vlist return recommendation
def univariate(ldf, *args): """ Generates bar chart distributions of different attributes in the dataframe. Parameters ---------- ldf : lux.core.frame LuxDataFrame with underspecified intent. data_type_constraint: str Controls the type of distribution chart that will be rendered. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Distribution action. """ import numpy as np if len(args) == 0: data_type_constraint = "quantitative" else: data_type_constraint = args[0][0] filter_specs = utils.get_filter_specs(ldf._intent) ignore_rec_flag = False if data_type_constraint == "quantitative": possible_attributes = [ c for c in ldf.columns if ldf.data_type[c] == "quantitative" and ldf.cardinality[c] > 5 and c != "Number of Records" ] intent = [lux.Clause(possible_attributes)] intent.extend(filter_specs) examples = "" if len(possible_attributes) >= 1: examples = f" (e.g., {possible_attributes[0]})" recommendation = { "action": "Distribution", "description": "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p> attributes.", "long_description": f"Distribution displays univariate histogram distributions of all quantitative attributes{examples}. Visualizations are ranked from most to least skewed.", } # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated) if ldf.length < 5: ignore_rec_flag = True elif data_type_constraint == "nominal": possible_attributes = [ c for c in ldf.columns if ldf.data_type[c] == "nominal" and ldf.cardinality[c] > 5 and c != "Number of Records" ] examples = "" if len(possible_attributes) >= 1: examples = f" (e.g., {possible_attributes[0]})" intent = [lux.Clause("?", data_type="nominal")] intent.extend(filter_specs) recommendation = { "action": "Occurrence", "description": "Show frequency of occurrence for <p class='highlight-descriptor'>categorical</p> attributes.", "long_description": f"Occurence displays bar charts of counts for all categorical attributes{examples}. Visualizations are ranked from most to least uneven across the bars. ", } elif data_type_constraint == "geographical": possible_attributes = [ c for c in ldf.columns if ldf.data_type[c] == "geographical" and ldf.cardinality[c] > 5 and c != "Number of Records" ] examples = "" if len(possible_attributes) >= 1: examples = f" (e.g., {possible_attributes[0]})" intent = [ lux.Clause("?", data_type="geographical"), lux.Clause("?", data_model="measure") ] intent.extend(filter_specs) recommendation = { "action": "Geographical", "description": "Show choropleth maps of <p class='highlight-descriptor'>geographic</p> attributes", "long_description": f"Occurence displays choropleths of averages for some geographic attribute{examples}. Visualizations are ranked by diversity of the geographic attribute.", } elif data_type_constraint == "temporal": intent = [lux.Clause("?", data_type="temporal")] intent.extend(filter_specs) recommendation = { "action": "Temporal", "description": "Show trends over <p class='highlight-descriptor'>time-related</p> attributes.", "long_description": "Temporal displays line charts for all attributes related to datetimes in the dataframe.", } # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated) if ldf.length < 3: ignore_rec_flag = True if ignore_rec_flag: recommendation["collection"] = [] return recommendation vlist = VisList(intent, ldf) for vis in vlist: vis.score = interestingness(vis, ldf) vlist.sort() recommendation["collection"] = vlist return recommendation
def univariate(ldf, *args): """ Generates bar chart distributions of different attributes in the dataframe. Parameters ---------- ldf : lux.core.frame LuxDataFrame with underspecified intent. data_type_constraint: str Controls the type of distribution chart that will be rendered. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Distribution action. """ import numpy as np if len(args) == 0: data_type_constraint = "quantitative" else: data_type_constraint = args[0][0] filter_specs = utils.get_filter_specs(ldf._intent) ignore_rec_flag = False if data_type_constraint == "quantitative": possible_attributes = [ c for c in ldf.columns if ldf.data_type[c] == "quantitative" and ldf.cardinality[c] > 5 and c != "Number of Records" ] intent = [lux.Clause(possible_attributes)] intent.extend(filter_specs) recommendation = { "action": "Distribution", "description": "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p> attributes.", } # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated) if len(ldf) < 5: ignore_rec_flag = True elif data_type_constraint == "nominal": intent = [lux.Clause("?", data_type="nominal")] intent.extend(filter_specs) recommendation = { "action": "Occurrence", "description": "Show frequency of occurrence for <p class='highlight-descriptor'>categorical</p> attributes.", } elif data_type_constraint == "temporal": intent = [lux.Clause("?", data_type="temporal")] intent.extend(filter_specs) recommendation = { "action": "Temporal", "description": "Show trends over <p class='highlight-descriptor'>time-related</p> attributes.", } # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated) if len(ldf) < 3: ignore_rec_flag = True if ignore_rec_flag: recommendation["collection"] = [] return recommendation vlist = VisList(intent, ldf) for vis in vlist: vis.score = interestingness(vis, ldf) vlist.sort() recommendation["collection"] = vlist return recommendation