예제 #1
    def executeAggregate(view: View):
        Aggregate data points on an axis for bar or line charts

        view: lux.View
            lux.View object that represents a visualization
        ldf : lux.luxDataFrame.LuxDataFrame
            LuxDataFrame with specified context.

        import numpy as np
        xAttr = view.getAttrByChannel("x")[0]
        yAttr = view.getAttrByChannel("y")[0]
        groupbyAttr = ""
        measureAttr = ""
        if (yAttr.aggregation != ""):
            groupbyAttr = xAttr
            measureAttr = yAttr
            aggFunc = yAttr.aggregation
        if (xAttr.aggregation != ""):
            groupbyAttr = yAttr
            measureAttr = xAttr
            aggFunc = xAttr.aggregation
        allAttrVals = view.data.uniqueValues[groupbyAttr.attribute]
        if (measureAttr != ""):
            if (measureAttr.attribute == "Record"):
                view.data = view.data.reset_index()
                view.data = view.data.groupby(
                view.data = view.data.rename(columns={"index": "Record"})
                view.data = view.data[[groupbyAttr.attribute, "Record"]]
                groupbyResult = view.data.groupby(groupbyAttr.attribute)
                view.data = groupbyResult.agg(aggFunc).reset_index()
            resultVals = list(view.data[groupbyAttr.attribute])
            if (len(resultVals) != len(allAttrVals)):
                # For filtered aggregation that have missing groupby-attribute values, set these aggregated value as 0, since no datapoints
                for vals in allAttrVals:
                    if (vals not in resultVals):
                        view.data.loc[len(view.data)] = [vals, 0]
            assert len(list(view.data[groupbyAttr.attribute])) == len(
            ), f"Aggregated data missing values compared to original range of values of `{groupbyAttr.attribute}`."
            view.data = view.data.sort_values(by=groupbyAttr.attribute,
            view.data = view.data.reset_index()
            view.data = view.data.drop(columns="index")
예제 #2
    def executeFilter(view: View):
        assert view.data is not None, "executeFilter assumes input view.data is populated (if not, populate with LuxDataFrame values)"
        filters = utils.getFilterSpecs(view.specLst)

        if (filters):
            # TODO: Need to handle OR logic
            for filter in filters:
                view.data = PandasExecutor.applyFilter(view.data,
예제 #3
    def executeAggregate(view:View, ldf:LuxDataFrame):
        import pandas as pd
        xAttr = view.getAttrByChannel("x")[0]
        yAttr = view.getAttrByChannel("y")[0]
        groupbyAttr =""
        measureAttr =""
        if (yAttr.aggregation!=""):
            groupbyAttr = xAttr
            measureAttr = yAttr
            aggFunc = yAttr.aggregation
        if (xAttr.aggregation!=""):
            groupbyAttr = yAttr
            measureAttr = xAttr
            aggFunc = xAttr.aggregation
        if (measureAttr!=""):
            #barchart case, need count data for each group
            if (measureAttr.attribute=="Record"):
                whereClause, filterVars = SQLExecutor.executeFilter(view)
                countQuery = "SELECT {}, COUNT({}) FROM {} {} GROUP BY {}".format(groupbyAttr.attribute, groupbyAttr.attribute, ldf.table_name, whereClause, groupbyAttr.attribute)
                view.data = pd.read_sql(countQuery, ldf.SQLconnection)
                view.data = view.data.rename(columns={"count":"Record"})
                view.data = utils.pandasToLux(view.data)

                whereClause, filterVars = SQLExecutor.executeFilter(view)
                if aggFunc == "mean":
                    meanQuery = "SELECT {}, AVG({}) as {} FROM {} {} GROUP BY {}".format(groupbyAttr.attribute, measureAttr.attribute, measureAttr.attribute, ldf.table_name, whereClause, groupbyAttr.attribute)
                    view.data = pd.read_sql(meanQuery, ldf.SQLconnection)
                    view.data = utils.pandasToLux(view.data)
                if aggFunc == "sum":
                    meanQuery = "SELECT {}, SUM({}) as {} FROM {} {} GROUP BY {}".format(groupbyAttr.attribute, measureAttr.attribute, measureAttr.attribute, ldf.table_name, whereClause, groupbyAttr.attribute)
                    view.data = pd.read_sql(meanQuery, ldf.SQLconnection)
                    view.data = utils.pandasToLux(view.data)
                if aggFunc == "max":
                    meanQuery = "SELECT {}, MAX({}) as {} FROM {} {} GROUP BY {}".format(groupbyAttr.attribute, measureAttr.attribute, measureAttr.attribute, ldf.table_name, whereClause, groupbyAttr.attribute)
                    view.data = pd.read_sql(meanQuery, ldf.SQLconnection)
                    view.data = utils.pandasToLux(view.data)