def executeAggregate(view: View): ''' Aggregate data points on an axis for bar or line charts Parameters ---------- view: lux.View lux.View object that represents a visualization ldf : lux.luxDataFrame.LuxDataFrame LuxDataFrame with specified context. Returns ------- None ''' import numpy as np xAttr = view.getAttrByChannel("x")[0] yAttr = view.getAttrByChannel("y")[0] groupbyAttr = "" measureAttr = "" if (yAttr.aggregation != ""): groupbyAttr = xAttr measureAttr = yAttr aggFunc = yAttr.aggregation if (xAttr.aggregation != ""): groupbyAttr = yAttr measureAttr = xAttr aggFunc = xAttr.aggregation allAttrVals = view.data.uniqueValues[groupbyAttr.attribute] if (measureAttr != ""): if (measureAttr.attribute == "Record"): view.data = view.data.reset_index() view.data = view.data.groupby( groupbyAttr.attribute).count().reset_index() view.data = view.data.rename(columns={"index": "Record"}) view.data = view.data[[groupbyAttr.attribute, "Record"]] else: groupbyResult = view.data.groupby(groupbyAttr.attribute) view.data = groupbyResult.agg(aggFunc).reset_index() resultVals = list(view.data[groupbyAttr.attribute]) if (len(resultVals) != len(allAttrVals)): # For filtered aggregation that have missing groupby-attribute values, set these aggregated value as 0, since no datapoints for vals in allAttrVals: if (vals not in resultVals): view.data.loc[len(view.data)] = [vals, 0] assert len(list(view.data[groupbyAttr.attribute])) == len( allAttrVals ), f"Aggregated data missing values compared to original range of values of `{groupbyAttr.attribute}`." view.data = view.data.sort_values(by=groupbyAttr.attribute, ascending=True) view.data = view.data.reset_index() view.data = view.data.drop(columns="index")
def executeFilter(view: View): assert view.data is not None, "executeFilter assumes input view.data is populated (if not, populate with LuxDataFrame values)" filters = utils.getFilterSpecs(view.specLst) if (filters): # TODO: Need to handle OR logic for filter in filters: view.data = PandasExecutor.applyFilter(view.data, filter.attribute, filter.filterOp, filter.value)
def executeAggregate(view:View, ldf:LuxDataFrame): import pandas as pd xAttr = view.getAttrByChannel("x")[0] yAttr = view.getAttrByChannel("y")[0] groupbyAttr ="" measureAttr ="" if (yAttr.aggregation!=""): groupbyAttr = xAttr measureAttr = yAttr aggFunc = yAttr.aggregation if (xAttr.aggregation!=""): groupbyAttr = yAttr measureAttr = xAttr aggFunc = xAttr.aggregation if (measureAttr!=""): #barchart case, need count data for each group if (measureAttr.attribute=="Record"): whereClause, filterVars = SQLExecutor.executeFilter(view) countQuery = "SELECT {}, COUNT({}) FROM {} {} GROUP BY {}".format(groupbyAttr.attribute, groupbyAttr.attribute, ldf.table_name, whereClause, groupbyAttr.attribute) view.data = pd.read_sql(countQuery, ldf.SQLconnection) view.data = view.data.rename(columns={"count":"Record"}) view.data = utils.pandasToLux(view.data) else: whereClause, filterVars = SQLExecutor.executeFilter(view) if aggFunc == "mean": meanQuery = "SELECT {}, AVG({}) as {} FROM {} {} GROUP BY {}".format(groupbyAttr.attribute, measureAttr.attribute, measureAttr.attribute, ldf.table_name, whereClause, groupbyAttr.attribute) view.data = pd.read_sql(meanQuery, ldf.SQLconnection) view.data = utils.pandasToLux(view.data) if aggFunc == "sum": meanQuery = "SELECT {}, SUM({}) as {} FROM {} {} GROUP BY {}".format(groupbyAttr.attribute, measureAttr.attribute, measureAttr.attribute, ldf.table_name, whereClause, groupbyAttr.attribute) view.data = pd.read_sql(meanQuery, ldf.SQLconnection) view.data = utils.pandasToLux(view.data) if aggFunc == "max": meanQuery = "SELECT {}, MAX({}) as {} FROM {} {} GROUP BY {}".format(groupbyAttr.attribute, measureAttr.attribute, measureAttr.attribute, ldf.table_name, whereClause, groupbyAttr.attribute) view.data = pd.read_sql(meanQuery, ldf.SQLconnection) view.data = utils.pandasToLux(view.data)