def removeAllInvalid(viewCollection: ViewCollection) -> ViewCollection: """ Given an expanded view collection, remove all views that are invalid. Currently, the invalid views are ones that contain two of the same attribute, no more than two temporal attributes, or overlapping attributes (same filter attribute and visualized attribute). Parameters ---------- viewCollection : list[lux.view.View] empty list that will be populated with specified lux.View objects. Returns ------- views: list[lux.View] view collection with compiled lux.View objects. """ newVC = [] for view in viewCollection: numTemporalSpecs = 0 attributeSet = set() for spec in view.specLst: attributeSet.add(spec.attribute) if spec.dataType == "temporal": numTemporalSpecs += 1 allDistinctSpecs = 0 == len(view.specLst) - len(attributeSet) if numTemporalSpecs < 2 and allDistinctSpecs: newVC.append(view) return ViewCollection(newVC)
def distribution(ldf,dataTypeConstraint="quantitative"): ''' Generates bar chart distributions of different attributes in the dataset. Parameters ---------- ldf : lux.luxDataFrame.LuxDataFrame LuxDataFrame with underspecified context. dataTypeConstraint: str The variable that controls the type of distribution chart that will be rendered. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Distribution action. ''' import scipy.stats import numpy as np #for benchmarking if ldf.toggleBenchmarking == True: tic = time.perf_counter() if (dataTypeConstraint=="quantitative"): query = [lux.Spec("?",dataType="quantitative")] query.extend(ldf.filterSpecs) recommendation = {"action":"Distribution", "description":"Show univariate count distributions of different attributes in the dataset."} elif (dataTypeConstraint=="nominal"): query = [lux.Spec("?",dataType="nominal")] query.extend(ldf.filterSpecs) recommendation = {"action":"Category", "description":"Show bar chart distributions of different attributes in the dataset."} vc = ViewCollection(query) vc = vc.load(ldf) for view in vc: view.score = interestingness(view,ldf) vc = vc.topK(15) recommendation["collection"] = vc #for benchmarking if ldf.toggleBenchmarking == True: toc = time.perf_counter() print(f"Performed distribution action in {toc - tic:0.4f} seconds") return recommendation
def similar_pattern(ldf, queryContext, topK=-1): ''' Generates visualizations with similar patterns to a query visualization. Parameters ---------- ldf : lux.luxDataFrame.LuxDataFrame LuxDataFrame with underspecified context. queryContext: list[lux.Spec] context for specifying the visual query for the similarity search. topK: int number of visual recommendations to return. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Similarity action ''' row_specs = list(filter(lambda x: x.value != "", queryContext)) if (len(row_specs) == 1): search_space_vc = ViewCollection(ldf.current_view.collection.copy()) search_space_vc = search_space_vc.load(ldf) query_vc = ViewCollection(queryContext) query_vc = query_vc.load(ldf) query_view = query_vc[0] preprocess(queryView) #for loop to create assign euclidean distance recommendation = { "action": "Similarity", "description": "Show other charts that are visually similar to the Current View." } for view in search_space_vc: preprocess(view) view.score = euclidean_dist(query_view, view) search_space_vc.normalize_score(invert_order=True) if (topK != -1): search_space_vc = search_space_vc.topK(topK) recommendation["collection"] = search_space_vc return recommendation else: print("Query needs to have 1 row value")
def getExported( self ) -> typing.Union[typing.Dict[str, ViewCollection], ViewCollection]: """ Get selected views as exported View Collection Notes ----- Convert the _exportedVisIdxs dictionary into a programmable ViewCollection Example _exportedVisIdxs : {'Correlation': [0, 2], 'Category': [1]} indicating the 0th and 2nd vis from the `Correlation` tab is selected, and the 1st vis from the `Category` tab is selected. Returns ------- typing.Union[typing.Dict[str,ViewCollection], ViewCollection] When there are no exported vis, return empty list -> [] When all the exported vis is from the same tab, return a ViewCollection of selected views. -> ViewCollection(v1, v2...) When the exported vis is from the different tabs, return a dictionary with the action name as key and selected views in the ViewCollection. -> {"Enhance": ViewCollection(v1, v2...), "Filter": ViewCollection(v5, v7...), ..} """ exportedVisLst = self.widget._exportedVisIdxs exportedViews = [] if (exportedVisLst == {}): import warnings warnings.warn("No visualization selected to export") return [] if len(exportedVisLst) == 1: exportAction = list(exportedVisLst.keys())[0] exportedViews = ViewCollection( list( map(self.recommendation[exportAction].__getitem__, exportedVisLst[exportAction]))) elif len(exportedVisLst) > 1: exportedViews = {} for exportAction in exportedVisLst: exportedViews[exportAction] = ViewCollection( list( map(self.recommendation[exportAction].__getitem__, exportedVisLst[exportAction]))) return exportedViews
def enumerateCollection(specLst: List[Spec], ldf: LuxDataFrame) -> ViewCollection: """ Given specifications that have been expanded thorught populateOptions, recursively iterate over the resulting list combinations to generate a View collection. Parameters ---------- ldf : lux.luxDataFrame.LuxDataFrame LuxDataFrame with underspecified context. Returns ------- ViewCollection: list[lux.View] view collection with compiled lux.View objects. """ import copy specs = Compiler.populateWildcardOptions(specLst, ldf) attributes = specs['attributes'] filters = specs['filters'] if len(attributes) == 0 and len(filters) > 0: ldf.filterSpecs = filters return [] collection = [] # TODO: generate combinations of column attributes recursively by continuing to accumulate attributes for len(colAtrr) times def combine(colAttrs, accum): last = (len(colAttrs) == 1) n = len(colAttrs[0]) for i in range(n): columnList = copy.deepcopy(accum + [colAttrs[0][i]]) if last: if len( filters ) > 0: # if we have filters, generate combinations for each row. for row in filters: specLst = copy.deepcopy(columnList + [row]) view = View( specLst, title= f"{row.attribute} {row.filterOp} {row.value}") collection.append(view) else: view = View(columnList) collection.append(view) else: combine(colAttrs[1:], columnList) combine(attributes, []) return ViewCollection(collection)
def test_viewCollection(): df = pd.read_csv("lux/data/olympic.csv") from lux.view.ViewCollection import ViewCollection vc = ViewCollection(["Height","SportType=Ball","?"]) vc = vc.load(df) viewWithYear = list(filter(lambda x: x.getAttrByAttrName("Year")!=[],vc))[0] assert viewWithYear.getAttrByChannel("x")[0].attribute=="Year" assert len(vc) == len(df.columns) -1 -1 #remove 1 for view with same filter attribute and remove 1 view with for same attribute vc = ViewCollection(["Height","?"]) vc = vc.load(df) assert len(vc) == len(df.columns) -1 #remove 1 for view with for same attribute
def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True): ''' Generates bivariate visualizations that represent all pairwise relationships in the data. Parameters ---------- ldf : LuxDataFrame LuxDataFrame with underspecified context. ignore_transpose: bool Boolean flag to ignore pairs of attributes whose transpose are already computed (i.e., {X,Y} will be ignored if {Y,X} is already computed) Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Correlation action. ''' import numpy as np # for benchmarking if ldf.toggle_benchmarking == True: tic = time.perf_counter() query = [ lux.Spec("?", data_model="measure"), lux.Spec("?", data_model="measure") ] query.extend(ldf.filter_specs) vc = ViewCollection(query) recommendation = { "action": "Correlation", "description": "Show relationships between two quantitative attributes." } vc = vc.load(ldf) # Then use the data populated in the view collection to compute score for view in vc: measures = view.get_attr_by_data_model("measure") if len(measures) < 2: raise ValueError( f"Can not compute correlation between {[x.attribute for x in ldf.columns]} since less than 2 measure values present." ) msr1 = measures[0].attribute msr2 = measures[1].attribute if (ignore_transpose): check_transpose = check_transpose_not_computed(vc, msr1, msr2) else: check_transpose = True if (check_transpose): view.score = interestingness(view, ldf) else: view.score = -1 vc = vc.topK(15) recommendation["collection"] = vc # for benchmarking if ldf.toggle_benchmarking == True: toc = time.perf_counter() print(f"Performed correlation action in {toc - tic:0.4f} seconds") return recommendation