Ejemplo n.º 1
0
    def where(self, dataTable, functionTable, performanceTable):       
        """Approximate implementation of SQL where using the Formula class.

        It has a C{between} operator and various other SQL-like
        methods, but it is not syntactically identical to SQL.  See
        the Formula class for more.
        
        @type dataTable: DataTable
        @param dataTable: The input DataTable, containing any fields that might be used to evaluate this expression.
        @type functionTable: FunctionTable
        @param functionTable: The FunctionTable, containing any functions that might be called in this expression.
        @type performanceTable: PerformanceTable
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @rtype: 1d Numpy array of bool
        @return: The result as a Numpy selector.
        """

        formula = self.get("sqlWhere")
        if formula is None:
            return None

        performanceTable.begin("Aggregate sqlWhere")

        dataColumn = Formula().evaluate(dataTable, functionTable, performanceTable, formula)
        if dataColumn.fieldType.dataType != "boolean":
            raise defs.PmmlValidationError("Aggregate sqlWhere must evaluate to a boolean expression, not \"%s\"" % formula)
        dataColumn._unlock()

        if dataColumn.mask is not None:
            NP("logical_and", dataColumn.data, NP(dataColumn.mask == defs.VALID), dataColumn.data)

        performanceTable.end("Aggregate sqlWhere")
        return dataColumn.data
Ejemplo n.º 2
0
    def where(self, dataTable, functionTable, performanceTable):
        """Approximate implementation of SQL where using the Formula class.

        It has a C{between} operator and various other SQL-like
        methods, but it is not syntactically identical to SQL.  See
        the Formula class for more.
        
        @type dataTable: DataTable
        @param dataTable: The input DataTable, containing any fields that might be used to evaluate this expression.
        @type functionTable: FunctionTable
        @param functionTable: The FunctionTable, containing any functions that might be called in this expression.
        @type performanceTable: PerformanceTable
        @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation.
        @rtype: 1d Numpy array of bool
        @return: The result as a Numpy selector.
        """

        formula = self.get("sqlWhere")
        if formula is None:
            return None

        performanceTable.begin("Aggregate sqlWhere")

        dataColumn = Formula().evaluate(dataTable, functionTable,
                                        performanceTable, formula)
        if dataColumn.fieldType.dataType != "boolean":
            raise defs.PmmlValidationError(
                "Aggregate sqlWhere must evaluate to a boolean expression, not \"%s\""
                % formula)
        dataColumn._unlock()

        if dataColumn.mask is not None:
            NP("logical_and", dataColumn.data,
               NP(dataColumn.mask == defs.VALID), dataColumn.data)

        performanceTable.end("Aggregate sqlWhere")
        return dataColumn.data
Ejemplo n.º 3
0
    def evaluate(self, dataTable, functionTable, performanceTable):
        """Evaluate the formula, given input data (most likely a grid
        of sample points) and a function table.

        @type dataTable: DataTable
        @param dataTable: Contains the data to plot.
        @type functionTable: FunctionTable
        @param functionTable: Defines functions that may be used to transform data for plotting.
        @type performanceTable: PerformanceTable
        @param performanceTable: Measures and records performance (time and memory consumption) of the drawing process.
        @rtype: DataColumn
        @return: The result of the expression as a DataColumn.
        """

        parsed = Formula.parse(self.text)
        return parsed.evaluate(dataTable, functionTable, performanceTable)
Ejemplo n.º 4
0
    def expressionsToPoints(cls, expression, derivative, samples, loop,
                            functionTable, performanceTable):
        """Evaluate a set of given string-based formulae to generate
        numeric points.

        This is used to plot mathematical curves.

        @type expression: 1- or 2-tuple of strings
        @param expression: If a 1-tuple, the string is passed to Formula and interpreted as y(x); if a 2-tuple, the strings are passed to Formula and interpreted as x(t), y(t).
        @type derivative: 1- or 2-tuple of strings (same length as C{expression})
        @param derivative: Strings are passed to Formua and interpreted as dy/dx (if a 1-tuple) or dx/dt, dy/dt (if a 2-tuple).
        @type samples: 1d Numpy array
        @param samples: Values of x or t at which to evaluate the expression or expressions.
        @type loop: bool
        @param loop: If False, disconnect the end of the set of points from the beginning.
        @type functionTable: FunctionTable
        @param functionTable: Functions that may be used to perform the calculation.
        @type performanceTable: PerformanceTable
        @param performanceTable: Measures and records performance (time and memory consumption) of the process.
        @rtype: 6-tuple
        @return: C{xlist}, C{ylist}, C{dxlist}, C{dylist} (1d Numpy arrays), xfieldType, yfieldType (FieldTypes).
        """

        if len(expression) == 1:
            sampleTable = DataTable({"x": "double"}, {"x": samples})

            parsed = Formula.parse(expression[0])
            ydataColumn = parsed.evaluate(sampleTable, functionTable,
                                          performanceTable)
            if not ydataColumn.fieldType.isnumeric(
            ) and not ydataColumn.fieldType.istemporal():
                raise defs.PmmlValidationError(
                    "PlotFormula y(x) must return a numeric expression, not %r"
                    % ydataColumn.fieldType)

            xfieldType = cls.xfieldType
            yfieldType = ydataColumn.fieldType

            selection = None
            if ydataColumn.mask is not None:
                selection = NP(ydataColumn.mask == defs.VALID)

            if derivative[0] is None:
                if selection is None:
                    xlist = samples
                    ylist = ydataColumn.data
                else:
                    xlist = samples[selection]
                    ylist = ydataColumn.data[selection]

                dxlist = NP(
                    (NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0)
                dylist = NP(
                    (NP("roll", ylist, -1) - NP("roll", ylist, 1)) / 2.0)
                if not loop:
                    dxlist[0] = 0.0
                    dxlist[-1] = 0.0
                    dylist[0] = 0.0
                    dylist[-1] = 0.0

            else:
                parsed = Formula.parse(derivative[0])
                dydataColumn = parsed.evaluate(sampleTable, functionTable,
                                               performanceTable)
                if not dydataColumn.fieldType.isnumeric(
                ) and not dydataColumn.fieldType.istemporal():
                    raise defs.PmmlValidationError(
                        "PlotFormula dy/dx must return a numeric expression, not %r"
                        % dydataColumn.fieldType)

                if dydataColumn.mask is not None:
                    if selection is None:
                        selection = NP(dydataColumn.mask == defs.VALID)
                    else:
                        NP("logical_and", selection,
                           NP(dydataColumn.mask == defs.VALID), selection)

                if selection is None:
                    xlist = samples
                    ylist = ydataColumn.data
                    dxlist = NP(
                        (NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0)
                    dylist = dydataColumn.data
                else:
                    xlist = samples[selection]
                    ylist = ydataColumn.data[selection]
                    dxlist = NP(
                        (NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0)
                    dylist = NP(dydataColumn.data[selection] * dxlist)

                if not loop:
                    dxlist[0] = 0.0
                    dxlist[-1] = 0.0
                    dylist[0] = 0.0
                    dylist[-1] = 0.0

        elif len(expression) == 2:
            sampleTable = DataTable({"t": "double"}, {"t": samples})

            parsed = Formula.parse(expression[0])
            xdataColumn = parsed.evaluate(sampleTable, functionTable,
                                          performanceTable)
            if not xdataColumn.fieldType.isnumeric(
            ) and not xdataColumn.fieldType.istemporal():
                raise defs.PmmlValidationError(
                    "PlotFormula x(t) must return a numeric expression, not %r"
                    % xdataColumn.fieldType)

            parsed = Formula.parse(expression[1])
            ydataColumn = parsed.evaluate(sampleTable, functionTable,
                                          performanceTable)
            if not ydataColumn.fieldType.isnumeric(
            ) and not ydataColumn.fieldType.istemporal():
                raise defs.PmmlValidationError(
                    "PlotFormula y(t) must return a numeric expression, not %r"
                    % ydataColumn.fieldType)

            xfieldType = xdataColumn.fieldType
            yfieldType = ydataColumn.fieldType

            selection = None
            if xdataColumn.mask is not None:
                selection = NP(xdataColumn.mask == defs.VALID)
            if ydataColumn.mask is not None:
                if selection is None:
                    selection = NP(ydataColumn.mask == defs.VALID)
                else:
                    NP("logical_and", selection,
                       NP(ydataColumn.mask == defs.VALID), selection)

            if derivative[0] is None:
                if selection is None:
                    xlist = xdataColumn.data
                    ylist = ydataColumn.data
                else:
                    xlist = xdataColumn.data[selection]
                    ylist = ydataColumn.data[selection]

                dxlist = NP(
                    (NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0)
                dylist = NP(
                    (NP("roll", ylist, -1) - NP("roll", ylist, 1)) / 2.0)
                if not loop:
                    dxlist[0] = 0.0
                    dxlist[-1] = 0.0
                    dylist[0] = 0.0
                    dylist[-1] = 0.0

            else:
                parsed = Formula.parse(derivative[0])
                dxdataColumn = parsed.evaluate(sampleTable, functionTable,
                                               performanceTable)
                if not dxdataColumn.fieldType.isnumeric(
                ) and not dxdataColumn.fieldType.istemporal():
                    raise defs.PmmlValidationError(
                        "PlotFormula dx/dt must return a numeric expression, not %r"
                        % dxdataColumn.fieldType)

                parsed = Formula.parse(derivative[1])
                dydataColumn = parsed.evaluate(sampleTable, functionTable,
                                               performanceTable)
                if not dydataColumn.fieldType.isnumeric(
                ) and not dydataColumn.fieldType.istemporal():
                    raise defs.PmmlValidationError(
                        "PlotFormula dy/dt must return a numeric expression, not %r"
                        % dydataColumn.fieldType)

                if dxdataColumn.mask is not None:
                    if selection is None:
                        selection = NP(dxdataColumn.mask == defs.VALID)
                    else:
                        NP("logical_and", selection,
                           NP(dxdataColumn.mask == defs.VALID), selection)

                if dydataColumn.mask is not None:
                    if selection is None:
                        selection = NP(dydataColumn.mask == defs.VALID)
                    else:
                        NP("logical_and", selection,
                           NP(dydataColumn.mask == defs.VALID), selection)

                if selection is None:
                    dt = NP(
                        (NP("roll", samples, -1) - NP("roll", samples, 1)) /
                        2.0)

                    xlist = xdataColumn.data
                    ylist = ydataColumn.data
                    dxlist = NP(dxdataColumn.data * dt)
                    dylist = NP(dydataColumn.data * dt)
                else:
                    dt = NP((NP("roll", samples[selection], -1) -
                             NP("roll", samples[selection], 1)) / 2.0)

                    xlist = xdataColumn.data[selection]
                    ylist = ydataColumn.data[selection]
                    dxlist = NP(dxdataColumn.data[selection] * dt)
                    dylist = NP(dydataColumn.data[selection] * dt)

                if not loop:
                    dxlist[0] = 0.0
                    dxlist[-1] = 0.0
                    dylist[0] = 0.0
                    dylist[-1] = 0.0

        return xlist, ylist, dxlist, dylist, xfieldType, yfieldType
Ejemplo n.º 5
0
    def prepare(self, state, dataTable, functionTable, performanceTable,
                plotRange):
        """Prepare a plot element for drawing.

        This stage consists of calculating all quantities and
        determing the bounds of the data.  These bounds may be unioned
        with bounds from other plot elements that overlay this plot
        element, so the drawing (which requires a finalized coordinate
        system) cannot begin yet.

        This method modifies C{plotRange}.

        @type state: ad-hoc Python object
        @param state: State information that persists long enough to use quantities computed in C{prepare} in the C{draw} stage.  This is a work-around of lxml's refusal to let its Python instances maintain C{self} and it is unrelated to DataTableState.
        @type dataTable: DataTable
        @param dataTable: Contains the data to plot.
        @type functionTable: FunctionTable
        @param functionTable: Defines functions that may be used to transform data for plotting.
        @type performanceTable: PerformanceTable
        @param performanceTable: Measures and records performance (time and memory consumption) of the drawing process.
        @type plotRange: PlotRange
        @param plotRange: The bounding box of plot coordinates that this function will expand.
        """

        self.checkRoles(["z(x,y)", "x", "y", "zmean", "zweight"])

        performanceTable.begin("PlotHeatMap prepare")
        self._saveContext(dataTable)

        zofxy = self.xpath("pmml:PlotFormula[@role='z(x,y)']")
        xexpr = self.xpath("pmml:PlotNumericExpression[@role='x']")
        yexpr = self.xpath("pmml:PlotNumericExpression[@role='y']")
        zmean = self.xpath("pmml:PlotNumericExpression[@role='zmean']")
        zweight = self.xpath("pmml:PlotNumericExpression[@role='zweight']")
        cutExpression = self.xpath("pmml:PlotSelection")

        if len(zofxy) == 1 and len(xexpr) == 0 and len(yexpr) == 0 and len(
                zmean) == 0 and len(zweight) == 0:
            xbins = self.get("xbins", convertType=True)
            xlow = self.get("xlow", convertType=True)
            xhigh = self.get("xhigh", convertType=True)
            ybins = self.get("ybins", convertType=True)
            ylow = self.get("ylow", convertType=True)
            yhigh = self.get("yhigh", convertType=True)

            if xbins is None or xlow is None or xhigh is None or ybins is None or ylow is None or yhigh is None:
                raise defs.PmmlValidationError(
                    "xbins, xlow, xhigh, ybins, ylow, and yhigh are required for HeatMaps of a mathematical formula"
                )

            if xlow >= xhigh or ylow >= yhigh:
                raise defs.PmmlValidationError(
                    "xlow must be less than xhigh and ylow must be less than yhigh"
                )

            if plotRange.xStrictlyPositive or plotRange.yStrictlyPositive:
                raise defs.PmmlValidationError(
                    "PlotHeatMap can only be properly displayed in linear x, y coordinates"
                )

            xbinWidth = (xhigh - xlow) / float(xbins)
            ybinWidth = (yhigh - ylow) / float(ybins)

            xarray = NP("tile",
                        NP("linspace", xlow, xhigh, xbins, endpoint=True),
                        ybins)
            yarray = NP("repeat",
                        NP("linspace", ylow, yhigh, ybins, endpoint=True),
                        xbins)

            sampleTable = DataTable({
                "x": "double",
                "y": "double"
            }, {
                "x": xarray,
                "y": yarray
            })
            parsed = Formula.parse(zofxy[0].text)

            performanceTable.pause("PlotHeatMap prepare")
            zdataColumn = parsed.evaluate(sampleTable, functionTable,
                                          performanceTable)
            performanceTable.unpause("PlotHeatMap prepare")
            if not zdataColumn.fieldType.isnumeric():
                raise defs.PmmlValidationError(
                    "PlotFormula z(x,y) must return a numeric expression, not %r"
                    % zdataColumn.fieldType)

            selection = NP("isfinite", zdataColumn.data)
            if zdataColumn.mask is not None:
                NP("logical_and", selection,
                   NP(zdataColumn.mask == defs.VALID), selection)
            if plotRange.zStrictlyPositive:
                NP("logical_and", selection, NP(zdataColumn.data > 0.0),
                   selection)

            gooddata = zdataColumn.data[selection]
            plotRange.zminPush(gooddata.min(),
                               zdataColumn.fieldType,
                               sticky=False)
            plotRange.zmaxPush(gooddata.max(),
                               zdataColumn.fieldType,
                               sticky=False)

            state.zdata = zdataColumn.data
            state.zmask = NP("logical_not", selection) * defs.INVALID

        elif len(zofxy) == 0 and len(xexpr) == 1 and len(yexpr) == 1:
            performanceTable.pause("PlotHeatMap prepare")
            xdataColumn = xexpr[0].evaluate(dataTable, functionTable,
                                            performanceTable)
            ydataColumn = yexpr[0].evaluate(dataTable, functionTable,
                                            performanceTable)
            performanceTable.unpause("PlotHeatMap prepare")

            xbins = self.get("xbins", convertType=True)
            xlow = self.get("xlow", convertType=True)
            xhigh = self.get("xhigh", convertType=True)
            ybins = self.get("ybins", convertType=True)
            ylow = self.get("ylow", convertType=True)
            yhigh = self.get("yhigh", convertType=True)

            if len(xdataColumn) > 0:
                if xlow is None: xlow = NP("nanmin", xdataColumn.data)
                if xhigh is None: xhigh = NP("nanmax", xdataColumn.data)
                if ylow is None: ylow = NP("nanmin", ydataColumn.data)
                if yhigh is None: yhigh = NP("nanmax", ydataColumn.data)
            else:
                if xlow is None: xlow = 0.0
                if xhigh is None: xhigh = 1.0
                if ylow is None: ylow = 0.0
                if yhigh is None: yhigh = 1.0

            if xbins is None:
                q1, q3 = NP("percentile", xdataColumn.data, [25.0, 75.0])
                binWidth = 2.0 * (q3 - q1) / math.pow(len(xdataColumn.data),
                                                      1.0 / 3.0)
                if binWidth > 0.0:
                    xbins = max(10, int(math.ceil((xhigh - xlow) / binWidth)))
                else:
                    xbins = 10

            if ybins is None:
                q1, q3 = NP("percentile", ydataColumn.data, [25.0, 75.0])
                binWidth = 2.0 * (q3 - q1) / math.pow(len(ydataColumn.data),
                                                      1.0 / 3.0)
                if binWidth > 0.0:
                    ybins = max(10, int(math.ceil((yhigh - ylow) / binWidth)))
                else:
                    ybins = 10

            if xlow >= xhigh or ylow >= yhigh:
                raise defs.PmmlValidationError(
                    "xlow must be less than xhigh and ylow must be less than yhigh"
                )

            if plotRange.xStrictlyPositive or plotRange.yStrictlyPositive:
                raise defs.PmmlValidationError(
                    "PlotHeatMap can only be properly displayed in linear x, y coordinates"
                )

            persistentState = {}
            stateId = self.get("stateId")
            if stateId is not None:
                if stateId in dataTable.state:
                    persistentState = dataTable.state[stateId]
                else:
                    dataTable.state[stateId] = persistentState

            if len(zmean) == 0:
                if "xbins" in persistentState: xbins = persistentState["xbins"]
                if "xlow" in persistentState: xlow = persistentState["xlow"]
                if "xhigh" in persistentState: xhigh = persistentState["xhigh"]
                if "ybins" in persistentState: ybins = persistentState["ybins"]
                if "ylow" in persistentState: ylow = persistentState["ylow"]
                if "yhigh" in persistentState: yhigh = persistentState["yhigh"]

                persistentState["xbins"] = xbins
                persistentState["xlow"] = xlow
                persistentState["xhigh"] = xhigh
                persistentState["ybins"] = ybins
                persistentState["ylow"] = ylow
                persistentState["yhigh"] = yhigh

            xbinWidth = (xhigh - xlow) / float(xbins)
            ybinWidth = (yhigh - ylow) / float(ybins)

            mask = NP("ones", len(dataTable), dtype=NP.dtype(float))
            if xdataColumn.mask is not None:
                NP("multiply", mask, (xdataColumn.mask == defs.VALID), mask)
            if ydataColumn.mask is not None:
                NP("multiply", mask, (ydataColumn.mask == defs.VALID), mask)

            if len(cutExpression) == 1:
                performanceTable.pause("PlotHeatMap prepare")
                NP(
                    "multiply", mask,
                    cutExpression[0].select(dataTable, functionTable,
                                            performanceTable), mask)
                performanceTable.unpause("PlotHeatMap prepare")

            if len(zmean) == 0 and len(zweight) == 0:
                histogram, xedges, yedges = NP("histogram2d",
                                               ydataColumn.data,
                                               xdataColumn.data,
                                               bins=(ybins, xbins),
                                               range=[[ylow, yhigh],
                                                      [xlow, xhigh]],
                                               weights=mask)
                if len(dataTable) == 0:
                    # work around Numpy <= 1.6.1 bug
                    histogram = NP("zeros", (ybins, xbins),
                                   dtype=NP.dtype(float))

                if "histogram" in persistentState:
                    persistentState["histogram"] = NP(
                        persistentState["histogram"] + histogram)
                else:
                    persistentState["histogram"] = histogram

                histogram = persistentState["histogram"]

                if plotRange.zStrictlyPositive:
                    zmin = 0.1
                else:
                    zmin = 0.0
                zmax = NP("nanmax", histogram)

                plotRange.zminPush(zmin, self.zfieldType, sticky=True)
                if zmax > zmin:
                    plotRange.zmaxPush(zmax, self.zfieldType, sticky=False)

            elif len(zmean) == 0 and len(zweight) == 1:
                performanceTable.pause("PlotHeatMap prepare")
                weightsDataColumn = zweight[0].evaluate(
                    dataTable, functionTable, performanceTable)
                performanceTable.unpause("PlotHeatMap prepare")

                if weightsDataColumn.mask is not None:
                    NP("multiply", mask,
                       (weightsDataColumn.mask == defs.VALID), mask)
                weights = NP(weightsDataColumn.data * mask)

                histogram, xedges, yedges = NP("histogram2d",
                                               ydataColumn.data,
                                               xdataColumn.data,
                                               bins=(ybins, xbins),
                                               range=[[ylow, yhigh],
                                                      [xlow, xhigh]],
                                               weights=weights)

                if "histogram" in persistentState:
                    persistentState["histogram"] = NP(
                        persistentState["histogram"] + histogram)
                else:
                    persistentState["histogram"] = histogram

                histogram = persistentState["histogram"]

                if plotRange.zStrictlyPositive:
                    w = weights[NP(weights > 0.0)]
                    if len(w) > 0:
                        zmin = 0.1 * NP("nanmin", w)
                    else:
                        zmin = 0.1
                else:
                    zmin = 0.0
                zmax = NP("nanmax", histogram)

                plotRange.zminPush(zmin, self.zfieldType, sticky=True)
                if zmax > zmin:
                    plotRange.zmaxPush(zmax, self.zfieldType, sticky=False)

            elif len(zmean) == 1 and len(zweight) == 0:
                performanceTable.pause("PlotHeatMap prepare")
                zdataColumn = zmean[0].evaluate(dataTable, functionTable,
                                                performanceTable)
                performanceTable.unpause("PlotHeatMap prepare")

                if zdataColumn.mask is not None:
                    NP("multiply", mask, (zdataColumn.mask == defs.VALID),
                       mask)
                weights = NP(zdataColumn.data * mask)

                numer, xedges, yedges = NP("histogram2d",
                                           ydataColumn.data,
                                           xdataColumn.data,
                                           bins=(ybins, xbins),
                                           range=[[ylow, yhigh], [xlow,
                                                                  xhigh]],
                                           weights=weights)
                denom, xedges, yedges = NP("histogram2d",
                                           ydataColumn.data,
                                           xdataColumn.data,
                                           bins=(ybins, xbins),
                                           range=[[ylow, yhigh], [xlow,
                                                                  xhigh]],
                                           weights=mask)

                if "numer" in persistentState:
                    persistentState["numer"] = NP(persistentState["numer"] +
                                                  numer)
                    persistentState["denom"] = NP(persistentState["denom"] +
                                                  denom)
                else:
                    persistentState["numer"] = numer
                    persistentState["denom"] = denom

                numer = persistentState["numer"]
                denom = persistentState["denom"]
                histogram = numer / denom

                selection = NP("isfinite", histogram)
                if plotRange.zStrictlyPositive:
                    NP("logical_and", selection, NP(histogram > 0.0),
                       selection)

                if NP("count_nonzero", selection) > 0:
                    gooddata = histogram[selection]
                    plotRange.zminPush(gooddata.min(),
                                       self.zfieldType,
                                       sticky=False)
                    plotRange.zmaxPush(gooddata.max(),
                                       self.zfieldType,
                                       sticky=False)

            else:
                raise defs.PmmlValidationError(
                    "The only allowed combinations of PlotFormula/PlotNumericExpressions are: \"z(x,y)\" (function), \"x y\" (histogram), \"x y zmean\" (mean of z in x y bins), \"x y zweight\" (weighted x y histogram)"
                )

            state.zdata = NP("reshape", histogram, xbins * ybins)
            state.zmask = None

        else:
            raise defs.PmmlValidationError(
                "The only allowed combinations of PlotFormula/PlotNumericExpressions are: \"z(x,y)\" (function), \"x y\" (histogram), \"x y zmean\" (mean of z in x y bins), \"x y zweight\" (weighted x y histogram)"
            )

        plotRange.xminPush(xlow, self.xyfieldType, sticky=True)
        plotRange.yminPush(ylow, self.xyfieldType, sticky=True)
        plotRange.xmaxPush(xhigh, self.xyfieldType, sticky=True)
        plotRange.ymaxPush(yhigh, self.xyfieldType, sticky=True)

        state.xbins = xbins
        state.xlow = xlow
        state.xhigh = xhigh
        state.ybins = ybins
        state.ylow = ylow
        state.yhigh = yhigh

        performanceTable.end("PlotHeatMap prepare")
Ejemplo n.º 6
0
    def expressionsToPoints(cls, expression, derivative, samples, loop, functionTable, performanceTable):
        """Evaluate a set of given string-based formulae to generate
        numeric points.

        This is used to plot mathematical curves.

        @type expression: 1- or 2-tuple of strings
        @param expression: If a 1-tuple, the string is passed to Formula and interpreted as y(x); if a 2-tuple, the strings are passed to Formula and interpreted as x(t), y(t).
        @type derivative: 1- or 2-tuple of strings (same length as C{expression})
        @param derivative: Strings are passed to Formua and interpreted as dy/dx (if a 1-tuple) or dx/dt, dy/dt (if a 2-tuple).
        @type samples: 1d Numpy array
        @param samples: Values of x or t at which to evaluate the expression or expressions.
        @type loop: bool
        @param loop: If False, disconnect the end of the set of points from the beginning.
        @type functionTable: FunctionTable
        @param functionTable: Functions that may be used to perform the calculation.
        @type performanceTable: PerformanceTable
        @param performanceTable: Measures and records performance (time and memory consumption) of the process.
        @rtype: 6-tuple
        @return: C{xlist}, C{ylist}, C{dxlist}, C{dylist} (1d Numpy arrays), xfieldType, yfieldType (FieldTypes).
        """

        if len(expression) == 1:
            sampleTable = DataTable({"x": "double"}, {"x": samples})

            parsed = Formula.parse(expression[0])
            ydataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable)
            if not ydataColumn.fieldType.isnumeric() and not ydataColumn.fieldType.istemporal():
                raise defs.PmmlValidationError("PlotFormula y(x) must return a numeric expression, not %r" % ydataColumn.fieldType)

            xfieldType = cls.xfieldType
            yfieldType = ydataColumn.fieldType

            selection = None
            if ydataColumn.mask is not None:
                selection = NP(ydataColumn.mask == defs.VALID)

            if derivative[0] is None:
                if selection is None:
                    xlist = samples
                    ylist = ydataColumn.data
                else:
                    xlist = samples[selection]
                    ylist = ydataColumn.data[selection]

                dxlist = NP((NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0)
                dylist = NP((NP("roll", ylist, -1) - NP("roll", ylist, 1)) / 2.0)
                if not loop:
                    dxlist[0] = 0.0
                    dxlist[-1] = 0.0
                    dylist[0] = 0.0
                    dylist[-1] = 0.0
                
            else:
                parsed = Formula.parse(derivative[0])
                dydataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable)
                if not dydataColumn.fieldType.isnumeric() and not dydataColumn.fieldType.istemporal():
                    raise defs.PmmlValidationError("PlotFormula dy/dx must return a numeric expression, not %r" % dydataColumn.fieldType)
                
                if dydataColumn.mask is not None:
                    if selection is None:
                        selection = NP(dydataColumn.mask == defs.VALID)
                    else:
                        NP("logical_and", selection, NP(dydataColumn.mask == defs.VALID), selection)

                if selection is None:
                    xlist = samples
                    ylist = ydataColumn.data
                    dxlist = NP((NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0)
                    dylist = dydataColumn.data
                else:
                    xlist = samples[selection]
                    ylist = ydataColumn.data[selection]
                    dxlist = NP((NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0)
                    dylist = NP(dydataColumn.data[selection] * dxlist)

                if not loop:
                    dxlist[0] = 0.0
                    dxlist[-1] = 0.0
                    dylist[0] = 0.0
                    dylist[-1] = 0.0

        elif len(expression) == 2:
            sampleTable = DataTable({"t": "double"}, {"t": samples})

            parsed = Formula.parse(expression[0])
            xdataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable)
            if not xdataColumn.fieldType.isnumeric() and not xdataColumn.fieldType.istemporal():
                raise defs.PmmlValidationError("PlotFormula x(t) must return a numeric expression, not %r" % xdataColumn.fieldType)

            parsed = Formula.parse(expression[1])
            ydataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable)
            if not ydataColumn.fieldType.isnumeric() and not ydataColumn.fieldType.istemporal():
                raise defs.PmmlValidationError("PlotFormula y(t) must return a numeric expression, not %r" % ydataColumn.fieldType)
            
            xfieldType = xdataColumn.fieldType
            yfieldType = ydataColumn.fieldType

            selection = None
            if xdataColumn.mask is not None:
                selection = NP(xdataColumn.mask == defs.VALID)
            if ydataColumn.mask is not None:
                if selection is None:
                    selection = NP(ydataColumn.mask == defs.VALID)
                else:
                    NP("logical_and", selection, NP(ydataColumn.mask == defs.VALID), selection)

            if derivative[0] is None:
                if selection is None:
                    xlist = xdataColumn.data
                    ylist = ydataColumn.data
                else:
                    xlist = xdataColumn.data[selection]
                    ylist = ydataColumn.data[selection]

                dxlist = NP((NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0)
                dylist = NP((NP("roll", ylist, -1) - NP("roll", ylist, 1)) / 2.0)
                if not loop:
                    dxlist[0] = 0.0
                    dxlist[-1] = 0.0
                    dylist[0] = 0.0
                    dylist[-1] = 0.0

            else:
                parsed = Formula.parse(derivative[0])
                dxdataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable)
                if not dxdataColumn.fieldType.isnumeric() and not dxdataColumn.fieldType.istemporal():
                    raise defs.PmmlValidationError("PlotFormula dx/dt must return a numeric expression, not %r" % dxdataColumn.fieldType)

                parsed = Formula.parse(derivative[1])
                dydataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable)
                if not dydataColumn.fieldType.isnumeric() and not dydataColumn.fieldType.istemporal():
                    raise defs.PmmlValidationError("PlotFormula dy/dt must return a numeric expression, not %r" % dydataColumn.fieldType)
                
                if dxdataColumn.mask is not None:
                    if selection is None:
                        selection = NP(dxdataColumn.mask == defs.VALID)
                    else:
                        NP("logical_and", selection, NP(dxdataColumn.mask == defs.VALID), selection)
                
                if dydataColumn.mask is not None:
                    if selection is None:
                        selection = NP(dydataColumn.mask == defs.VALID)
                    else:
                        NP("logical_and", selection, NP(dydataColumn.mask == defs.VALID), selection)

                if selection is None:
                    dt = NP((NP("roll", samples, -1) - NP("roll", samples, 1)) / 2.0)

                    xlist = xdataColumn.data
                    ylist = ydataColumn.data
                    dxlist = NP(dxdataColumn.data * dt)
                    dylist = NP(dydataColumn.data * dt)
                else:
                    dt = NP((NP("roll", samples[selection], -1) - NP("roll", samples[selection], 1)) / 2.0)

                    xlist = xdataColumn.data[selection]
                    ylist = ydataColumn.data[selection]
                    dxlist = NP(dxdataColumn.data[selection] * dt)
                    dylist = NP(dydataColumn.data[selection] * dt)

                if not loop:
                    dxlist[0] = 0.0
                    dxlist[-1] = 0.0
                    dylist[0] = 0.0
                    dylist[-1] = 0.0

        return xlist, ylist, dxlist, dylist, xfieldType, yfieldType
Ejemplo n.º 7
0
    def prepare(self, state, dataTable, functionTable, performanceTable, plotRange):
        """Prepare a plot element for drawing.

        This stage consists of calculating all quantities and
        determing the bounds of the data.  These bounds may be unioned
        with bounds from other plot elements that overlay this plot
        element, so the drawing (which requires a finalized coordinate
        system) cannot begin yet.

        This method modifies C{plotRange}.

        @type state: ad-hoc Python object
        @param state: State information that persists long enough to use quantities computed in C{prepare} in the C{draw} stage.  This is a work-around of lxml's refusal to let its Python instances maintain C{self} and it is unrelated to DataTableState.
        @type dataTable: DataTable
        @param dataTable: Contains the data to plot.
        @type functionTable: FunctionTable
        @param functionTable: Defines functions that may be used to transform data for plotting.
        @type performanceTable: PerformanceTable
        @param performanceTable: Measures and records performance (time and memory consumption) of the drawing process.
        @type plotRange: PlotRange
        @param plotRange: The bounding box of plot coordinates that this function will expand.
        """

        self.checkRoles(["z(x,y)", "x", "y", "zmean", "zweight"])

        performanceTable.begin("PlotHeatMap prepare")
        self._saveContext(dataTable)
        
        zofxy = self.xpath("pmml:PlotFormula[@role='z(x,y)']")
        xexpr = self.xpath("pmml:PlotNumericExpression[@role='x']")
        yexpr = self.xpath("pmml:PlotNumericExpression[@role='y']")
        zmean = self.xpath("pmml:PlotNumericExpression[@role='zmean']")
        zweight = self.xpath("pmml:PlotNumericExpression[@role='zweight']")
        cutExpression = self.xpath("pmml:PlotSelection")

        if len(zofxy) == 1 and len(xexpr) == 0 and len(yexpr) == 0 and len(zmean) == 0 and len(zweight) == 0:
            xbins = self.get("xbins", convertType=True)
            xlow = self.get("xlow", convertType=True)
            xhigh = self.get("xhigh", convertType=True)
            ybins = self.get("ybins", convertType=True)
            ylow = self.get("ylow", convertType=True)
            yhigh = self.get("yhigh", convertType=True)

            if xbins is None or xlow is None or xhigh is None or ybins is None or ylow is None or yhigh is None:
                raise defs.PmmlValidationError("xbins, xlow, xhigh, ybins, ylow, and yhigh are required for HeatMaps of a mathematical formula")

            if xlow >= xhigh or ylow >= yhigh:
                raise defs.PmmlValidationError("xlow must be less than xhigh and ylow must be less than yhigh")

            if plotRange.xStrictlyPositive or plotRange.yStrictlyPositive:
                raise defs.PmmlValidationError("PlotHeatMap can only be properly displayed in linear x, y coordinates")

            xbinWidth = (xhigh - xlow) / float(xbins)
            ybinWidth = (yhigh - ylow) / float(ybins)

            xarray = NP("tile", NP("linspace", xlow, xhigh, xbins, endpoint=True), ybins)
            yarray = NP("repeat", NP("linspace", ylow, yhigh, ybins, endpoint=True), xbins)

            sampleTable = DataTable({"x": "double", "y": "double"}, {"x": xarray, "y": yarray})
            parsed = Formula.parse(zofxy[0].text)

            performanceTable.pause("PlotHeatMap prepare")
            zdataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable)
            performanceTable.unpause("PlotHeatMap prepare")
            if not zdataColumn.fieldType.isnumeric():
                raise defs.PmmlValidationError("PlotFormula z(x,y) must return a numeric expression, not %r" % zdataColumn.fieldType)

            selection = NP("isfinite", zdataColumn.data)
            if zdataColumn.mask is not None:
                NP("logical_and", selection, NP(zdataColumn.mask == defs.VALID), selection)
            if plotRange.zStrictlyPositive:
                NP("logical_and", selection, NP(zdataColumn.data > 0.0), selection)

            gooddata = zdataColumn.data[selection]
            plotRange.zminPush(gooddata.min(), zdataColumn.fieldType, sticky=False)
            plotRange.zmaxPush(gooddata.max(), zdataColumn.fieldType, sticky=False)

            state.zdata = zdataColumn.data
            state.zmask = NP("logical_not", selection) * defs.INVALID

        elif len(zofxy) == 0 and len(xexpr) == 1 and len(yexpr) == 1:
            performanceTable.pause("PlotHeatMap prepare")
            xdataColumn = xexpr[0].evaluate(dataTable, functionTable, performanceTable)
            ydataColumn = yexpr[0].evaluate(dataTable, functionTable, performanceTable)
            performanceTable.unpause("PlotHeatMap prepare")

            xbins = self.get("xbins", convertType=True)
            xlow = self.get("xlow", convertType=True)
            xhigh = self.get("xhigh", convertType=True)
            ybins = self.get("ybins", convertType=True)
            ylow = self.get("ylow", convertType=True)
            yhigh = self.get("yhigh", convertType=True)

            if len(xdataColumn) > 0:
                if xlow is None: xlow = NP("nanmin", xdataColumn.data)
                if xhigh is None: xhigh = NP("nanmax", xdataColumn.data)
                if ylow is None: ylow = NP("nanmin", ydataColumn.data)
                if yhigh is None: yhigh = NP("nanmax", ydataColumn.data)
            else:
                if xlow is None: xlow = 0.0
                if xhigh is None: xhigh = 1.0
                if ylow is None: ylow = 0.0
                if yhigh is None: yhigh = 1.0

            if xbins is None:
                q1, q3 = NP("percentile", xdataColumn.data, [25.0, 75.0])
                binWidth = 2.0 * (q3 - q1) / math.pow(len(xdataColumn.data), 1.0/3.0)
                if binWidth > 0.0:
                    xbins = max(10, int(math.ceil((xhigh - xlow)/binWidth)))
                else:
                    xbins = 10

            if ybins is None:
                q1, q3 = NP("percentile", ydataColumn.data, [25.0, 75.0])
                binWidth = 2.0 * (q3 - q1) / math.pow(len(ydataColumn.data), 1.0/3.0)
                if binWidth > 0.0:
                    ybins = max(10, int(math.ceil((yhigh - ylow)/binWidth)))
                else:
                    ybins = 10

            if xlow >= xhigh or ylow >= yhigh:
                raise defs.PmmlValidationError("xlow must be less than xhigh and ylow must be less than yhigh")

            if plotRange.xStrictlyPositive or plotRange.yStrictlyPositive:
                raise defs.PmmlValidationError("PlotHeatMap can only be properly displayed in linear x, y coordinates")

            persistentState = {}
            stateId = self.get("stateId")
            if stateId is not None:
                if stateId in dataTable.state:
                    persistentState = dataTable.state[stateId]
                else:
                    dataTable.state[stateId] = persistentState

            if len(zmean) == 0:
                if "xbins" in persistentState: xbins = persistentState["xbins"]
                if "xlow" in persistentState: xlow = persistentState["xlow"]
                if "xhigh" in persistentState: xhigh = persistentState["xhigh"]
                if "ybins" in persistentState: ybins = persistentState["ybins"]
                if "ylow" in persistentState: ylow = persistentState["ylow"]
                if "yhigh" in persistentState: yhigh = persistentState["yhigh"]

                persistentState["xbins"] = xbins
                persistentState["xlow"] = xlow
                persistentState["xhigh"] = xhigh
                persistentState["ybins"] = ybins
                persistentState["ylow"] = ylow
                persistentState["yhigh"] = yhigh
                
            xbinWidth = (xhigh - xlow) / float(xbins)
            ybinWidth = (yhigh - ylow) / float(ybins)

            mask = NP("ones", len(dataTable), dtype=NP.dtype(float))
            if xdataColumn.mask is not None:
                NP("multiply", mask, (xdataColumn.mask == defs.VALID), mask)
            if ydataColumn.mask is not None:
                NP("multiply", mask, (ydataColumn.mask == defs.VALID), mask)

            if len(cutExpression) == 1:
                performanceTable.pause("PlotHeatMap prepare")
                NP("multiply", mask, cutExpression[0].select(dataTable, functionTable, performanceTable), mask)
                performanceTable.unpause("PlotHeatMap prepare")

            if len(zmean) == 0 and len(zweight) == 0:
                histogram, xedges, yedges = NP("histogram2d", ydataColumn.data, xdataColumn.data, bins=(ybins, xbins), range=[[ylow, yhigh], [xlow, xhigh]], weights=mask)
                if len(dataTable) == 0:
                    # work around Numpy <= 1.6.1 bug
                    histogram = NP("zeros", (ybins, xbins), dtype=NP.dtype(float))

                if "histogram" in persistentState:
                    persistentState["histogram"] = NP(persistentState["histogram"] + histogram)
                else:
                    persistentState["histogram"] = histogram

                histogram = persistentState["histogram"]

                if plotRange.zStrictlyPositive:
                    zmin = 0.1
                else:
                    zmin = 0.0
                zmax = NP("nanmax", histogram)

                plotRange.zminPush(zmin, self.zfieldType, sticky=True)
                if zmax > zmin:
                    plotRange.zmaxPush(zmax, self.zfieldType, sticky=False)

            elif len(zmean) == 0 and len(zweight) == 1:
                performanceTable.pause("PlotHeatMap prepare")
                weightsDataColumn = zweight[0].evaluate(dataTable, functionTable, performanceTable)
                performanceTable.unpause("PlotHeatMap prepare")

                if weightsDataColumn.mask is not None:
                    NP("multiply", mask, (weightsDataColumn.mask == defs.VALID), mask)
                weights = NP(weightsDataColumn.data * mask)

                histogram, xedges, yedges = NP("histogram2d", ydataColumn.data, xdataColumn.data, bins=(ybins, xbins), range=[[ylow, yhigh], [xlow, xhigh]], weights=weights)

                if "histogram" in persistentState:
                    persistentState["histogram"] = NP(persistentState["histogram"] + histogram)
                else:
                    persistentState["histogram"] = histogram

                histogram = persistentState["histogram"]

                if plotRange.zStrictlyPositive:
                    w = weights[NP(weights > 0.0)]
                    if len(w) > 0:
                        zmin = 0.1 * NP("nanmin", w)
                    else:
                        zmin = 0.1
                else:
                    zmin = 0.0
                zmax = NP("nanmax", histogram)

                plotRange.zminPush(zmin, self.zfieldType, sticky=True)
                if zmax > zmin:
                    plotRange.zmaxPush(zmax, self.zfieldType, sticky=False)

            elif len(zmean) == 1 and len(zweight) == 0:
                performanceTable.pause("PlotHeatMap prepare")
                zdataColumn = zmean[0].evaluate(dataTable, functionTable, performanceTable)
                performanceTable.unpause("PlotHeatMap prepare")

                if zdataColumn.mask is not None:
                    NP("multiply", mask, (zdataColumn.mask == defs.VALID), mask)
                weights = NP(zdataColumn.data * mask)

                numer, xedges, yedges = NP("histogram2d", ydataColumn.data, xdataColumn.data, bins=(ybins, xbins), range=[[ylow, yhigh], [xlow, xhigh]], weights=weights)
                denom, xedges, yedges = NP("histogram2d", ydataColumn.data, xdataColumn.data, bins=(ybins, xbins), range=[[ylow, yhigh], [xlow, xhigh]], weights=mask)

                if "numer" in persistentState:
                    persistentState["numer"] = NP(persistentState["numer"] + numer)
                    persistentState["denom"] = NP(persistentState["denom"] + denom)
                else:
                    persistentState["numer"] = numer
                    persistentState["denom"] = denom

                numer = persistentState["numer"]
                denom = persistentState["denom"]
                histogram = numer / denom

                selection = NP("isfinite", histogram)
                if plotRange.zStrictlyPositive:
                    NP("logical_and", selection, NP(histogram > 0.0), selection)

                if NP("count_nonzero", selection) > 0:
                    gooddata = histogram[selection]
                    plotRange.zminPush(gooddata.min(), self.zfieldType, sticky=False)
                    plotRange.zmaxPush(gooddata.max(), self.zfieldType, sticky=False)

            else:
                raise defs.PmmlValidationError("The only allowed combinations of PlotFormula/PlotNumericExpressions are: \"z(x,y)\" (function), \"x y\" (histogram), \"x y zmean\" (mean of z in x y bins), \"x y zweight\" (weighted x y histogram)")

            state.zdata = NP("reshape", histogram, xbins*ybins)
            state.zmask = None
                        
        else:
            raise defs.PmmlValidationError("The only allowed combinations of PlotFormula/PlotNumericExpressions are: \"z(x,y)\" (function), \"x y\" (histogram), \"x y zmean\" (mean of z in x y bins), \"x y zweight\" (weighted x y histogram)")

        plotRange.xminPush(xlow, self.xyfieldType, sticky=True)
        plotRange.yminPush(ylow, self.xyfieldType, sticky=True)
        plotRange.xmaxPush(xhigh, self.xyfieldType, sticky=True)
        plotRange.ymaxPush(yhigh, self.xyfieldType, sticky=True)

        state.xbins = xbins
        state.xlow = xlow
        state.xhigh = xhigh
        state.ybins = ybins
        state.ylow = ylow
        state.yhigh = yhigh

        performanceTable.end("PlotHeatMap prepare")