def enforceSpecifiedChannel(view: View, autoChannel: Dict[str, str]): """ Enforces that the channels specified in the View by users overrides the showMe autoChannels. Parameters ---------- view : lux.view.View Input View without channel specification. autoChannel : Dict[str,str] Key-value pair in the form [channel: attributeName] specifying the showMe recommended channel location. Returns ------- view : lux.view.View View with channel specification combining both original and autoChannel specification. Raises ------ ValueError Ensures no more than one attribute is placed in the same channel. """ resultDict = { } # result of enforcing specified channel will be stored in resultDict specifiedDict = { } # specifiedDict={"x":[],"y":[list of Dobj with y specified as channel]} # create a dictionary of specified channels in the given dobj for val in autoChannel.keys(): specifiedDict[val] = view.getAttrByChannel(val) resultDict[val] = "" # for every element, replace with what's in specifiedDict if specified for sVal, sAttr in specifiedDict.items(): if (len(sAttr) == 1): # if specified in dobj # remove the specified channel from autoChannel (matching by value, since channel key may not be same) for i in list(autoChannel.keys()): if ( (autoChannel[i].attribute == sAttr[0].attribute) and (autoChannel[i].channel == sVal) ): # need to ensure that the channel is the same (edge case when duplicate Cols with same attribute name) autoChannel.pop(i) break sAttr[0].channel = sVal resultDict[sVal] = sAttr[0] elif (len(sAttr) > 1): raise ValueError( "There should not be more than one attribute specified in the same channel." ) # For the leftover channels that are still unspecified in resultDict, # and the leftovers in the autoChannel specification, # step through them together and fill it automatically. leftover_channels = list( filter(lambda x: resultDict[x] == '', resultDict)) for leftover_channel, leftover_encoding in zip(leftover_channels, autoChannel.values()): leftover_encoding.channel = leftover_channel resultDict[leftover_channel] = leftover_encoding view.specLst = list(resultDict.values()) return view
def executeAggregate(view: View): ''' Aggregate data points on an axis for bar or line charts Parameters ---------- view: lux.View lux.View object that represents a visualization ldf : lux.luxDataFrame.LuxDataFrame LuxDataFrame with specified context. Returns ------- None ''' import numpy as np xAttr = view.getAttrByChannel("x")[0] yAttr = view.getAttrByChannel("y")[0] groupbyAttr = "" measureAttr = "" if (yAttr.aggregation != ""): groupbyAttr = xAttr measureAttr = yAttr aggFunc = yAttr.aggregation if (xAttr.aggregation != ""): groupbyAttr = yAttr measureAttr = xAttr aggFunc = xAttr.aggregation allAttrVals = view.data.uniqueValues[groupbyAttr.attribute] if (measureAttr != ""): if (measureAttr.attribute == "Record"): view.data = view.data.reset_index() view.data = view.data.groupby( groupbyAttr.attribute).count().reset_index() view.data = view.data.rename(columns={"index": "Record"}) view.data = view.data[[groupbyAttr.attribute, "Record"]] else: groupbyResult = view.data.groupby(groupbyAttr.attribute) view.data = groupbyResult.agg(aggFunc).reset_index() resultVals = list(view.data[groupbyAttr.attribute]) if (len(resultVals) != len(allAttrVals)): # For filtered aggregation that have missing groupby-attribute values, set these aggregated value as 0, since no datapoints for vals in allAttrVals: if (vals not in resultVals): view.data.loc[len(view.data)] = [vals, 0] assert len(list(view.data[groupbyAttr.attribute])) == len( allAttrVals ), f"Aggregated data missing values compared to original range of values of `{groupbyAttr.attribute}`." view.data = view.data.sort_values(by=groupbyAttr.attribute, ascending=True) view.data = view.data.reset_index() view.data = view.data.drop(columns="index")
def executeAggregate(view:View, ldf:LuxDataFrame): import pandas as pd xAttr = view.getAttrByChannel("x")[0] yAttr = view.getAttrByChannel("y")[0] groupbyAttr ="" measureAttr ="" if (yAttr.aggregation!=""): groupbyAttr = xAttr measureAttr = yAttr aggFunc = yAttr.aggregation if (xAttr.aggregation!=""): groupbyAttr = yAttr measureAttr = xAttr aggFunc = xAttr.aggregation if (measureAttr!=""): #barchart case, need count data for each group if (measureAttr.attribute=="Record"): whereClause, filterVars = SQLExecutor.executeFilter(view) countQuery = "SELECT {}, COUNT({}) FROM {} {} GROUP BY {}".format(groupbyAttr.attribute, groupbyAttr.attribute, ldf.table_name, whereClause, groupbyAttr.attribute) view.data = pd.read_sql(countQuery, ldf.SQLconnection) view.data = view.data.rename(columns={"count":"Record"}) view.data = utils.pandasToLux(view.data) else: whereClause, filterVars = SQLExecutor.executeFilter(view) if aggFunc == "mean": meanQuery = "SELECT {}, AVG({}) as {} FROM {} {} GROUP BY {}".format(groupbyAttr.attribute, measureAttr.attribute, measureAttr.attribute, ldf.table_name, whereClause, groupbyAttr.attribute) view.data = pd.read_sql(meanQuery, ldf.SQLconnection) view.data = utils.pandasToLux(view.data) if aggFunc == "sum": meanQuery = "SELECT {}, SUM({}) as {} FROM {} {} GROUP BY {}".format(groupbyAttr.attribute, measureAttr.attribute, measureAttr.attribute, ldf.table_name, whereClause, groupbyAttr.attribute) view.data = pd.read_sql(meanQuery, ldf.SQLconnection) view.data = utils.pandasToLux(view.data) if aggFunc == "max": meanQuery = "SELECT {}, MAX({}) as {} FROM {} {} GROUP BY {}".format(groupbyAttr.attribute, measureAttr.attribute, measureAttr.attribute, ldf.table_name, whereClause, groupbyAttr.attribute) view.data = pd.read_sql(meanQuery, ldf.SQLconnection) view.data = utils.pandasToLux(view.data)
def interestingness(view:View ,ldf:LuxDataFrame) -> int: """ Compute the interestingness score of the view. The interestingness metric is dependent on the view type. Parameters ---------- view : View ldf : LuxDataFrame Returns ------- int Interestingness Score """ if view.data is None: raise Exception("View.data needs to be populated before interestingness can be computed. Run Executor.execute(view,ldf).") n_dim = 0 n_msr = 0 filterSpecs = utils.getFilterSpecs(view.specLst) viewAttrsSpecs = utils.getAttrsSpecs(view.specLst) for spec in viewAttrsSpecs: if (spec.attribute!="Record"): if (spec.dataModel == 'dimension'): n_dim += 1 if (spec.dataModel == 'measure'): n_msr += 1 n_filter = len(filterSpecs) attr_specs = [spec for spec in viewAttrsSpecs if spec.attribute != "Record"] dimensionLst = view.getAttrByDataModel("dimension") measureLst = view.getAttrByDataModel("measure") # Bar Chart if (n_dim == 1 and (n_msr == 0 or n_msr==1)): if (n_filter == 0): return unevenness(view, ldf, measureLst, dimensionLst) elif(n_filter==1): return deviationFromOverall(view,ldf,filterSpecs,measureLst[0].attribute) # Histogram elif (n_dim == 0 and n_msr == 1): if (n_filter == 0): v = view.data["Count of Records"] return skewness(v) elif (n_filter == 1): return deviationFromOverall(view,ldf,filterSpecs,"Count of Records") # Scatter Plot elif (n_dim == 0 and n_msr == 2): if (n_filter==1): v_filter_size = getFilteredSize(filterSpecs,view.data) v_size = len(view.data) sig = v_filter_size/v_size else: sig = 1 return sig * monotonicity(view,attr_specs) # Scatterplot colored by Dimension elif (n_dim == 1 and n_msr == 2): colorAttr = view.getAttrByChannel("color")[0].attribute C = ldf.cardinality[colorAttr] if (C<40): return 1/C else: return -1 # Scatterplot colored by dimension elif (n_dim== 1 and n_msr == 2): return 0.2 # Scatterplot colored by measure elif (n_msr == 3): return 0.1 # Default else: return -1