def where(self, dataTable, functionTable, performanceTable): """Approximate implementation of SQL where using the Formula class. It has a C{between} operator and various other SQL-like methods, but it is not syntactically identical to SQL. See the Formula class for more. @type dataTable: DataTable @param dataTable: The input DataTable, containing any fields that might be used to evaluate this expression. @type functionTable: FunctionTable @param functionTable: The FunctionTable, containing any functions that might be called in this expression. @type performanceTable: PerformanceTable @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation. @rtype: 1d Numpy array of bool @return: The result as a Numpy selector. """ formula = self.get("sqlWhere") if formula is None: return None performanceTable.begin("Aggregate sqlWhere") dataColumn = Formula().evaluate(dataTable, functionTable, performanceTable, formula) if dataColumn.fieldType.dataType != "boolean": raise defs.PmmlValidationError("Aggregate sqlWhere must evaluate to a boolean expression, not \"%s\"" % formula) dataColumn._unlock() if dataColumn.mask is not None: NP("logical_and", dataColumn.data, NP(dataColumn.mask == defs.VALID), dataColumn.data) performanceTable.end("Aggregate sqlWhere") return dataColumn.data
def where(self, dataTable, functionTable, performanceTable): """Approximate implementation of SQL where using the Formula class. It has a C{between} operator and various other SQL-like methods, but it is not syntactically identical to SQL. See the Formula class for more. @type dataTable: DataTable @param dataTable: The input DataTable, containing any fields that might be used to evaluate this expression. @type functionTable: FunctionTable @param functionTable: The FunctionTable, containing any functions that might be called in this expression. @type performanceTable: PerformanceTable @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation. @rtype: 1d Numpy array of bool @return: The result as a Numpy selector. """ formula = self.get("sqlWhere") if formula is None: return None performanceTable.begin("Aggregate sqlWhere") dataColumn = Formula().evaluate(dataTable, functionTable, performanceTable, formula) if dataColumn.fieldType.dataType != "boolean": raise defs.PmmlValidationError( "Aggregate sqlWhere must evaluate to a boolean expression, not \"%s\"" % formula) dataColumn._unlock() if dataColumn.mask is not None: NP("logical_and", dataColumn.data, NP(dataColumn.mask == defs.VALID), dataColumn.data) performanceTable.end("Aggregate sqlWhere") return dataColumn.data
def evaluate(self, dataTable, functionTable, performanceTable): """Evaluate the formula, given input data (most likely a grid of sample points) and a function table. @type dataTable: DataTable @param dataTable: Contains the data to plot. @type functionTable: FunctionTable @param functionTable: Defines functions that may be used to transform data for plotting. @type performanceTable: PerformanceTable @param performanceTable: Measures and records performance (time and memory consumption) of the drawing process. @rtype: DataColumn @return: The result of the expression as a DataColumn. """ parsed = Formula.parse(self.text) return parsed.evaluate(dataTable, functionTable, performanceTable)
def expressionsToPoints(cls, expression, derivative, samples, loop, functionTable, performanceTable): """Evaluate a set of given string-based formulae to generate numeric points. This is used to plot mathematical curves. @type expression: 1- or 2-tuple of strings @param expression: If a 1-tuple, the string is passed to Formula and interpreted as y(x); if a 2-tuple, the strings are passed to Formula and interpreted as x(t), y(t). @type derivative: 1- or 2-tuple of strings (same length as C{expression}) @param derivative: Strings are passed to Formua and interpreted as dy/dx (if a 1-tuple) or dx/dt, dy/dt (if a 2-tuple). @type samples: 1d Numpy array @param samples: Values of x or t at which to evaluate the expression or expressions. @type loop: bool @param loop: If False, disconnect the end of the set of points from the beginning. @type functionTable: FunctionTable @param functionTable: Functions that may be used to perform the calculation. @type performanceTable: PerformanceTable @param performanceTable: Measures and records performance (time and memory consumption) of the process. @rtype: 6-tuple @return: C{xlist}, C{ylist}, C{dxlist}, C{dylist} (1d Numpy arrays), xfieldType, yfieldType (FieldTypes). """ if len(expression) == 1: sampleTable = DataTable({"x": "double"}, {"x": samples}) parsed = Formula.parse(expression[0]) ydataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable) if not ydataColumn.fieldType.isnumeric( ) and not ydataColumn.fieldType.istemporal(): raise defs.PmmlValidationError( "PlotFormula y(x) must return a numeric expression, not %r" % ydataColumn.fieldType) xfieldType = cls.xfieldType yfieldType = ydataColumn.fieldType selection = None if ydataColumn.mask is not None: selection = NP(ydataColumn.mask == defs.VALID) if derivative[0] is None: if selection is None: xlist = samples ylist = ydataColumn.data else: xlist = samples[selection] ylist = ydataColumn.data[selection] dxlist = NP( (NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0) dylist = NP( (NP("roll", ylist, -1) - NP("roll", ylist, 1)) / 2.0) if not loop: dxlist[0] = 0.0 dxlist[-1] = 0.0 dylist[0] = 0.0 dylist[-1] = 0.0 else: parsed = Formula.parse(derivative[0]) dydataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable) if not dydataColumn.fieldType.isnumeric( ) and not dydataColumn.fieldType.istemporal(): raise defs.PmmlValidationError( "PlotFormula dy/dx must return a numeric expression, not %r" % dydataColumn.fieldType) if dydataColumn.mask is not None: if selection is None: selection = NP(dydataColumn.mask == defs.VALID) else: NP("logical_and", selection, NP(dydataColumn.mask == defs.VALID), selection) if selection is None: xlist = samples ylist = ydataColumn.data dxlist = NP( (NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0) dylist = dydataColumn.data else: xlist = samples[selection] ylist = ydataColumn.data[selection] dxlist = NP( (NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0) dylist = NP(dydataColumn.data[selection] * dxlist) if not loop: dxlist[0] = 0.0 dxlist[-1] = 0.0 dylist[0] = 0.0 dylist[-1] = 0.0 elif len(expression) == 2: sampleTable = DataTable({"t": "double"}, {"t": samples}) parsed = Formula.parse(expression[0]) xdataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable) if not xdataColumn.fieldType.isnumeric( ) and not xdataColumn.fieldType.istemporal(): raise defs.PmmlValidationError( "PlotFormula x(t) must return a numeric expression, not %r" % xdataColumn.fieldType) parsed = Formula.parse(expression[1]) ydataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable) if not ydataColumn.fieldType.isnumeric( ) and not ydataColumn.fieldType.istemporal(): raise defs.PmmlValidationError( "PlotFormula y(t) must return a numeric expression, not %r" % ydataColumn.fieldType) xfieldType = xdataColumn.fieldType yfieldType = ydataColumn.fieldType selection = None if xdataColumn.mask is not None: selection = NP(xdataColumn.mask == defs.VALID) if ydataColumn.mask is not None: if selection is None: selection = NP(ydataColumn.mask == defs.VALID) else: NP("logical_and", selection, NP(ydataColumn.mask == defs.VALID), selection) if derivative[0] is None: if selection is None: xlist = xdataColumn.data ylist = ydataColumn.data else: xlist = xdataColumn.data[selection] ylist = ydataColumn.data[selection] dxlist = NP( (NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0) dylist = NP( (NP("roll", ylist, -1) - NP("roll", ylist, 1)) / 2.0) if not loop: dxlist[0] = 0.0 dxlist[-1] = 0.0 dylist[0] = 0.0 dylist[-1] = 0.0 else: parsed = Formula.parse(derivative[0]) dxdataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable) if not dxdataColumn.fieldType.isnumeric( ) and not dxdataColumn.fieldType.istemporal(): raise defs.PmmlValidationError( "PlotFormula dx/dt must return a numeric expression, not %r" % dxdataColumn.fieldType) parsed = Formula.parse(derivative[1]) dydataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable) if not dydataColumn.fieldType.isnumeric( ) and not dydataColumn.fieldType.istemporal(): raise defs.PmmlValidationError( "PlotFormula dy/dt must return a numeric expression, not %r" % dydataColumn.fieldType) if dxdataColumn.mask is not None: if selection is None: selection = NP(dxdataColumn.mask == defs.VALID) else: NP("logical_and", selection, NP(dxdataColumn.mask == defs.VALID), selection) if dydataColumn.mask is not None: if selection is None: selection = NP(dydataColumn.mask == defs.VALID) else: NP("logical_and", selection, NP(dydataColumn.mask == defs.VALID), selection) if selection is None: dt = NP( (NP("roll", samples, -1) - NP("roll", samples, 1)) / 2.0) xlist = xdataColumn.data ylist = ydataColumn.data dxlist = NP(dxdataColumn.data * dt) dylist = NP(dydataColumn.data * dt) else: dt = NP((NP("roll", samples[selection], -1) - NP("roll", samples[selection], 1)) / 2.0) xlist = xdataColumn.data[selection] ylist = ydataColumn.data[selection] dxlist = NP(dxdataColumn.data[selection] * dt) dylist = NP(dydataColumn.data[selection] * dt) if not loop: dxlist[0] = 0.0 dxlist[-1] = 0.0 dylist[0] = 0.0 dylist[-1] = 0.0 return xlist, ylist, dxlist, dylist, xfieldType, yfieldType
def prepare(self, state, dataTable, functionTable, performanceTable, plotRange): """Prepare a plot element for drawing. This stage consists of calculating all quantities and determing the bounds of the data. These bounds may be unioned with bounds from other plot elements that overlay this plot element, so the drawing (which requires a finalized coordinate system) cannot begin yet. This method modifies C{plotRange}. @type state: ad-hoc Python object @param state: State information that persists long enough to use quantities computed in C{prepare} in the C{draw} stage. This is a work-around of lxml's refusal to let its Python instances maintain C{self} and it is unrelated to DataTableState. @type dataTable: DataTable @param dataTable: Contains the data to plot. @type functionTable: FunctionTable @param functionTable: Defines functions that may be used to transform data for plotting. @type performanceTable: PerformanceTable @param performanceTable: Measures and records performance (time and memory consumption) of the drawing process. @type plotRange: PlotRange @param plotRange: The bounding box of plot coordinates that this function will expand. """ self.checkRoles(["z(x,y)", "x", "y", "zmean", "zweight"]) performanceTable.begin("PlotHeatMap prepare") self._saveContext(dataTable) zofxy = self.xpath("pmml:PlotFormula[@role='z(x,y)']") xexpr = self.xpath("pmml:PlotNumericExpression[@role='x']") yexpr = self.xpath("pmml:PlotNumericExpression[@role='y']") zmean = self.xpath("pmml:PlotNumericExpression[@role='zmean']") zweight = self.xpath("pmml:PlotNumericExpression[@role='zweight']") cutExpression = self.xpath("pmml:PlotSelection") if len(zofxy) == 1 and len(xexpr) == 0 and len(yexpr) == 0 and len( zmean) == 0 and len(zweight) == 0: xbins = self.get("xbins", convertType=True) xlow = self.get("xlow", convertType=True) xhigh = self.get("xhigh", convertType=True) ybins = self.get("ybins", convertType=True) ylow = self.get("ylow", convertType=True) yhigh = self.get("yhigh", convertType=True) if xbins is None or xlow is None or xhigh is None or ybins is None or ylow is None or yhigh is None: raise defs.PmmlValidationError( "xbins, xlow, xhigh, ybins, ylow, and yhigh are required for HeatMaps of a mathematical formula" ) if xlow >= xhigh or ylow >= yhigh: raise defs.PmmlValidationError( "xlow must be less than xhigh and ylow must be less than yhigh" ) if plotRange.xStrictlyPositive or plotRange.yStrictlyPositive: raise defs.PmmlValidationError( "PlotHeatMap can only be properly displayed in linear x, y coordinates" ) xbinWidth = (xhigh - xlow) / float(xbins) ybinWidth = (yhigh - ylow) / float(ybins) xarray = NP("tile", NP("linspace", xlow, xhigh, xbins, endpoint=True), ybins) yarray = NP("repeat", NP("linspace", ylow, yhigh, ybins, endpoint=True), xbins) sampleTable = DataTable({ "x": "double", "y": "double" }, { "x": xarray, "y": yarray }) parsed = Formula.parse(zofxy[0].text) performanceTable.pause("PlotHeatMap prepare") zdataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable) performanceTable.unpause("PlotHeatMap prepare") if not zdataColumn.fieldType.isnumeric(): raise defs.PmmlValidationError( "PlotFormula z(x,y) must return a numeric expression, not %r" % zdataColumn.fieldType) selection = NP("isfinite", zdataColumn.data) if zdataColumn.mask is not None: NP("logical_and", selection, NP(zdataColumn.mask == defs.VALID), selection) if plotRange.zStrictlyPositive: NP("logical_and", selection, NP(zdataColumn.data > 0.0), selection) gooddata = zdataColumn.data[selection] plotRange.zminPush(gooddata.min(), zdataColumn.fieldType, sticky=False) plotRange.zmaxPush(gooddata.max(), zdataColumn.fieldType, sticky=False) state.zdata = zdataColumn.data state.zmask = NP("logical_not", selection) * defs.INVALID elif len(zofxy) == 0 and len(xexpr) == 1 and len(yexpr) == 1: performanceTable.pause("PlotHeatMap prepare") xdataColumn = xexpr[0].evaluate(dataTable, functionTable, performanceTable) ydataColumn = yexpr[0].evaluate(dataTable, functionTable, performanceTable) performanceTable.unpause("PlotHeatMap prepare") xbins = self.get("xbins", convertType=True) xlow = self.get("xlow", convertType=True) xhigh = self.get("xhigh", convertType=True) ybins = self.get("ybins", convertType=True) ylow = self.get("ylow", convertType=True) yhigh = self.get("yhigh", convertType=True) if len(xdataColumn) > 0: if xlow is None: xlow = NP("nanmin", xdataColumn.data) if xhigh is None: xhigh = NP("nanmax", xdataColumn.data) if ylow is None: ylow = NP("nanmin", ydataColumn.data) if yhigh is None: yhigh = NP("nanmax", ydataColumn.data) else: if xlow is None: xlow = 0.0 if xhigh is None: xhigh = 1.0 if ylow is None: ylow = 0.0 if yhigh is None: yhigh = 1.0 if xbins is None: q1, q3 = NP("percentile", xdataColumn.data, [25.0, 75.0]) binWidth = 2.0 * (q3 - q1) / math.pow(len(xdataColumn.data), 1.0 / 3.0) if binWidth > 0.0: xbins = max(10, int(math.ceil((xhigh - xlow) / binWidth))) else: xbins = 10 if ybins is None: q1, q3 = NP("percentile", ydataColumn.data, [25.0, 75.0]) binWidth = 2.0 * (q3 - q1) / math.pow(len(ydataColumn.data), 1.0 / 3.0) if binWidth > 0.0: ybins = max(10, int(math.ceil((yhigh - ylow) / binWidth))) else: ybins = 10 if xlow >= xhigh or ylow >= yhigh: raise defs.PmmlValidationError( "xlow must be less than xhigh and ylow must be less than yhigh" ) if plotRange.xStrictlyPositive or plotRange.yStrictlyPositive: raise defs.PmmlValidationError( "PlotHeatMap can only be properly displayed in linear x, y coordinates" ) persistentState = {} stateId = self.get("stateId") if stateId is not None: if stateId in dataTable.state: persistentState = dataTable.state[stateId] else: dataTable.state[stateId] = persistentState if len(zmean) == 0: if "xbins" in persistentState: xbins = persistentState["xbins"] if "xlow" in persistentState: xlow = persistentState["xlow"] if "xhigh" in persistentState: xhigh = persistentState["xhigh"] if "ybins" in persistentState: ybins = persistentState["ybins"] if "ylow" in persistentState: ylow = persistentState["ylow"] if "yhigh" in persistentState: yhigh = persistentState["yhigh"] persistentState["xbins"] = xbins persistentState["xlow"] = xlow persistentState["xhigh"] = xhigh persistentState["ybins"] = ybins persistentState["ylow"] = ylow persistentState["yhigh"] = yhigh xbinWidth = (xhigh - xlow) / float(xbins) ybinWidth = (yhigh - ylow) / float(ybins) mask = NP("ones", len(dataTable), dtype=NP.dtype(float)) if xdataColumn.mask is not None: NP("multiply", mask, (xdataColumn.mask == defs.VALID), mask) if ydataColumn.mask is not None: NP("multiply", mask, (ydataColumn.mask == defs.VALID), mask) if len(cutExpression) == 1: performanceTable.pause("PlotHeatMap prepare") NP( "multiply", mask, cutExpression[0].select(dataTable, functionTable, performanceTable), mask) performanceTable.unpause("PlotHeatMap prepare") if len(zmean) == 0 and len(zweight) == 0: histogram, xedges, yedges = NP("histogram2d", ydataColumn.data, xdataColumn.data, bins=(ybins, xbins), range=[[ylow, yhigh], [xlow, xhigh]], weights=mask) if len(dataTable) == 0: # work around Numpy <= 1.6.1 bug histogram = NP("zeros", (ybins, xbins), dtype=NP.dtype(float)) if "histogram" in persistentState: persistentState["histogram"] = NP( persistentState["histogram"] + histogram) else: persistentState["histogram"] = histogram histogram = persistentState["histogram"] if plotRange.zStrictlyPositive: zmin = 0.1 else: zmin = 0.0 zmax = NP("nanmax", histogram) plotRange.zminPush(zmin, self.zfieldType, sticky=True) if zmax > zmin: plotRange.zmaxPush(zmax, self.zfieldType, sticky=False) elif len(zmean) == 0 and len(zweight) == 1: performanceTable.pause("PlotHeatMap prepare") weightsDataColumn = zweight[0].evaluate( dataTable, functionTable, performanceTable) performanceTable.unpause("PlotHeatMap prepare") if weightsDataColumn.mask is not None: NP("multiply", mask, (weightsDataColumn.mask == defs.VALID), mask) weights = NP(weightsDataColumn.data * mask) histogram, xedges, yedges = NP("histogram2d", ydataColumn.data, xdataColumn.data, bins=(ybins, xbins), range=[[ylow, yhigh], [xlow, xhigh]], weights=weights) if "histogram" in persistentState: persistentState["histogram"] = NP( persistentState["histogram"] + histogram) else: persistentState["histogram"] = histogram histogram = persistentState["histogram"] if plotRange.zStrictlyPositive: w = weights[NP(weights > 0.0)] if len(w) > 0: zmin = 0.1 * NP("nanmin", w) else: zmin = 0.1 else: zmin = 0.0 zmax = NP("nanmax", histogram) plotRange.zminPush(zmin, self.zfieldType, sticky=True) if zmax > zmin: plotRange.zmaxPush(zmax, self.zfieldType, sticky=False) elif len(zmean) == 1 and len(zweight) == 0: performanceTable.pause("PlotHeatMap prepare") zdataColumn = zmean[0].evaluate(dataTable, functionTable, performanceTable) performanceTable.unpause("PlotHeatMap prepare") if zdataColumn.mask is not None: NP("multiply", mask, (zdataColumn.mask == defs.VALID), mask) weights = NP(zdataColumn.data * mask) numer, xedges, yedges = NP("histogram2d", ydataColumn.data, xdataColumn.data, bins=(ybins, xbins), range=[[ylow, yhigh], [xlow, xhigh]], weights=weights) denom, xedges, yedges = NP("histogram2d", ydataColumn.data, xdataColumn.data, bins=(ybins, xbins), range=[[ylow, yhigh], [xlow, xhigh]], weights=mask) if "numer" in persistentState: persistentState["numer"] = NP(persistentState["numer"] + numer) persistentState["denom"] = NP(persistentState["denom"] + denom) else: persistentState["numer"] = numer persistentState["denom"] = denom numer = persistentState["numer"] denom = persistentState["denom"] histogram = numer / denom selection = NP("isfinite", histogram) if plotRange.zStrictlyPositive: NP("logical_and", selection, NP(histogram > 0.0), selection) if NP("count_nonzero", selection) > 0: gooddata = histogram[selection] plotRange.zminPush(gooddata.min(), self.zfieldType, sticky=False) plotRange.zmaxPush(gooddata.max(), self.zfieldType, sticky=False) else: raise defs.PmmlValidationError( "The only allowed combinations of PlotFormula/PlotNumericExpressions are: \"z(x,y)\" (function), \"x y\" (histogram), \"x y zmean\" (mean of z in x y bins), \"x y zweight\" (weighted x y histogram)" ) state.zdata = NP("reshape", histogram, xbins * ybins) state.zmask = None else: raise defs.PmmlValidationError( "The only allowed combinations of PlotFormula/PlotNumericExpressions are: \"z(x,y)\" (function), \"x y\" (histogram), \"x y zmean\" (mean of z in x y bins), \"x y zweight\" (weighted x y histogram)" ) plotRange.xminPush(xlow, self.xyfieldType, sticky=True) plotRange.yminPush(ylow, self.xyfieldType, sticky=True) plotRange.xmaxPush(xhigh, self.xyfieldType, sticky=True) plotRange.ymaxPush(yhigh, self.xyfieldType, sticky=True) state.xbins = xbins state.xlow = xlow state.xhigh = xhigh state.ybins = ybins state.ylow = ylow state.yhigh = yhigh performanceTable.end("PlotHeatMap prepare")
def expressionsToPoints(cls, expression, derivative, samples, loop, functionTable, performanceTable): """Evaluate a set of given string-based formulae to generate numeric points. This is used to plot mathematical curves. @type expression: 1- or 2-tuple of strings @param expression: If a 1-tuple, the string is passed to Formula and interpreted as y(x); if a 2-tuple, the strings are passed to Formula and interpreted as x(t), y(t). @type derivative: 1- or 2-tuple of strings (same length as C{expression}) @param derivative: Strings are passed to Formua and interpreted as dy/dx (if a 1-tuple) or dx/dt, dy/dt (if a 2-tuple). @type samples: 1d Numpy array @param samples: Values of x or t at which to evaluate the expression or expressions. @type loop: bool @param loop: If False, disconnect the end of the set of points from the beginning. @type functionTable: FunctionTable @param functionTable: Functions that may be used to perform the calculation. @type performanceTable: PerformanceTable @param performanceTable: Measures and records performance (time and memory consumption) of the process. @rtype: 6-tuple @return: C{xlist}, C{ylist}, C{dxlist}, C{dylist} (1d Numpy arrays), xfieldType, yfieldType (FieldTypes). """ if len(expression) == 1: sampleTable = DataTable({"x": "double"}, {"x": samples}) parsed = Formula.parse(expression[0]) ydataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable) if not ydataColumn.fieldType.isnumeric() and not ydataColumn.fieldType.istemporal(): raise defs.PmmlValidationError("PlotFormula y(x) must return a numeric expression, not %r" % ydataColumn.fieldType) xfieldType = cls.xfieldType yfieldType = ydataColumn.fieldType selection = None if ydataColumn.mask is not None: selection = NP(ydataColumn.mask == defs.VALID) if derivative[0] is None: if selection is None: xlist = samples ylist = ydataColumn.data else: xlist = samples[selection] ylist = ydataColumn.data[selection] dxlist = NP((NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0) dylist = NP((NP("roll", ylist, -1) - NP("roll", ylist, 1)) / 2.0) if not loop: dxlist[0] = 0.0 dxlist[-1] = 0.0 dylist[0] = 0.0 dylist[-1] = 0.0 else: parsed = Formula.parse(derivative[0]) dydataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable) if not dydataColumn.fieldType.isnumeric() and not dydataColumn.fieldType.istemporal(): raise defs.PmmlValidationError("PlotFormula dy/dx must return a numeric expression, not %r" % dydataColumn.fieldType) if dydataColumn.mask is not None: if selection is None: selection = NP(dydataColumn.mask == defs.VALID) else: NP("logical_and", selection, NP(dydataColumn.mask == defs.VALID), selection) if selection is None: xlist = samples ylist = ydataColumn.data dxlist = NP((NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0) dylist = dydataColumn.data else: xlist = samples[selection] ylist = ydataColumn.data[selection] dxlist = NP((NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0) dylist = NP(dydataColumn.data[selection] * dxlist) if not loop: dxlist[0] = 0.0 dxlist[-1] = 0.0 dylist[0] = 0.0 dylist[-1] = 0.0 elif len(expression) == 2: sampleTable = DataTable({"t": "double"}, {"t": samples}) parsed = Formula.parse(expression[0]) xdataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable) if not xdataColumn.fieldType.isnumeric() and not xdataColumn.fieldType.istemporal(): raise defs.PmmlValidationError("PlotFormula x(t) must return a numeric expression, not %r" % xdataColumn.fieldType) parsed = Formula.parse(expression[1]) ydataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable) if not ydataColumn.fieldType.isnumeric() and not ydataColumn.fieldType.istemporal(): raise defs.PmmlValidationError("PlotFormula y(t) must return a numeric expression, not %r" % ydataColumn.fieldType) xfieldType = xdataColumn.fieldType yfieldType = ydataColumn.fieldType selection = None if xdataColumn.mask is not None: selection = NP(xdataColumn.mask == defs.VALID) if ydataColumn.mask is not None: if selection is None: selection = NP(ydataColumn.mask == defs.VALID) else: NP("logical_and", selection, NP(ydataColumn.mask == defs.VALID), selection) if derivative[0] is None: if selection is None: xlist = xdataColumn.data ylist = ydataColumn.data else: xlist = xdataColumn.data[selection] ylist = ydataColumn.data[selection] dxlist = NP((NP("roll", xlist, -1) - NP("roll", xlist, 1)) / 2.0) dylist = NP((NP("roll", ylist, -1) - NP("roll", ylist, 1)) / 2.0) if not loop: dxlist[0] = 0.0 dxlist[-1] = 0.0 dylist[0] = 0.0 dylist[-1] = 0.0 else: parsed = Formula.parse(derivative[0]) dxdataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable) if not dxdataColumn.fieldType.isnumeric() and not dxdataColumn.fieldType.istemporal(): raise defs.PmmlValidationError("PlotFormula dx/dt must return a numeric expression, not %r" % dxdataColumn.fieldType) parsed = Formula.parse(derivative[1]) dydataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable) if not dydataColumn.fieldType.isnumeric() and not dydataColumn.fieldType.istemporal(): raise defs.PmmlValidationError("PlotFormula dy/dt must return a numeric expression, not %r" % dydataColumn.fieldType) if dxdataColumn.mask is not None: if selection is None: selection = NP(dxdataColumn.mask == defs.VALID) else: NP("logical_and", selection, NP(dxdataColumn.mask == defs.VALID), selection) if dydataColumn.mask is not None: if selection is None: selection = NP(dydataColumn.mask == defs.VALID) else: NP("logical_and", selection, NP(dydataColumn.mask == defs.VALID), selection) if selection is None: dt = NP((NP("roll", samples, -1) - NP("roll", samples, 1)) / 2.0) xlist = xdataColumn.data ylist = ydataColumn.data dxlist = NP(dxdataColumn.data * dt) dylist = NP(dydataColumn.data * dt) else: dt = NP((NP("roll", samples[selection], -1) - NP("roll", samples[selection], 1)) / 2.0) xlist = xdataColumn.data[selection] ylist = ydataColumn.data[selection] dxlist = NP(dxdataColumn.data[selection] * dt) dylist = NP(dydataColumn.data[selection] * dt) if not loop: dxlist[0] = 0.0 dxlist[-1] = 0.0 dylist[0] = 0.0 dylist[-1] = 0.0 return xlist, ylist, dxlist, dylist, xfieldType, yfieldType
def prepare(self, state, dataTable, functionTable, performanceTable, plotRange): """Prepare a plot element for drawing. This stage consists of calculating all quantities and determing the bounds of the data. These bounds may be unioned with bounds from other plot elements that overlay this plot element, so the drawing (which requires a finalized coordinate system) cannot begin yet. This method modifies C{plotRange}. @type state: ad-hoc Python object @param state: State information that persists long enough to use quantities computed in C{prepare} in the C{draw} stage. This is a work-around of lxml's refusal to let its Python instances maintain C{self} and it is unrelated to DataTableState. @type dataTable: DataTable @param dataTable: Contains the data to plot. @type functionTable: FunctionTable @param functionTable: Defines functions that may be used to transform data for plotting. @type performanceTable: PerformanceTable @param performanceTable: Measures and records performance (time and memory consumption) of the drawing process. @type plotRange: PlotRange @param plotRange: The bounding box of plot coordinates that this function will expand. """ self.checkRoles(["z(x,y)", "x", "y", "zmean", "zweight"]) performanceTable.begin("PlotHeatMap prepare") self._saveContext(dataTable) zofxy = self.xpath("pmml:PlotFormula[@role='z(x,y)']") xexpr = self.xpath("pmml:PlotNumericExpression[@role='x']") yexpr = self.xpath("pmml:PlotNumericExpression[@role='y']") zmean = self.xpath("pmml:PlotNumericExpression[@role='zmean']") zweight = self.xpath("pmml:PlotNumericExpression[@role='zweight']") cutExpression = self.xpath("pmml:PlotSelection") if len(zofxy) == 1 and len(xexpr) == 0 and len(yexpr) == 0 and len(zmean) == 0 and len(zweight) == 0: xbins = self.get("xbins", convertType=True) xlow = self.get("xlow", convertType=True) xhigh = self.get("xhigh", convertType=True) ybins = self.get("ybins", convertType=True) ylow = self.get("ylow", convertType=True) yhigh = self.get("yhigh", convertType=True) if xbins is None or xlow is None or xhigh is None or ybins is None or ylow is None or yhigh is None: raise defs.PmmlValidationError("xbins, xlow, xhigh, ybins, ylow, and yhigh are required for HeatMaps of a mathematical formula") if xlow >= xhigh or ylow >= yhigh: raise defs.PmmlValidationError("xlow must be less than xhigh and ylow must be less than yhigh") if plotRange.xStrictlyPositive or plotRange.yStrictlyPositive: raise defs.PmmlValidationError("PlotHeatMap can only be properly displayed in linear x, y coordinates") xbinWidth = (xhigh - xlow) / float(xbins) ybinWidth = (yhigh - ylow) / float(ybins) xarray = NP("tile", NP("linspace", xlow, xhigh, xbins, endpoint=True), ybins) yarray = NP("repeat", NP("linspace", ylow, yhigh, ybins, endpoint=True), xbins) sampleTable = DataTable({"x": "double", "y": "double"}, {"x": xarray, "y": yarray}) parsed = Formula.parse(zofxy[0].text) performanceTable.pause("PlotHeatMap prepare") zdataColumn = parsed.evaluate(sampleTable, functionTable, performanceTable) performanceTable.unpause("PlotHeatMap prepare") if not zdataColumn.fieldType.isnumeric(): raise defs.PmmlValidationError("PlotFormula z(x,y) must return a numeric expression, not %r" % zdataColumn.fieldType) selection = NP("isfinite", zdataColumn.data) if zdataColumn.mask is not None: NP("logical_and", selection, NP(zdataColumn.mask == defs.VALID), selection) if plotRange.zStrictlyPositive: NP("logical_and", selection, NP(zdataColumn.data > 0.0), selection) gooddata = zdataColumn.data[selection] plotRange.zminPush(gooddata.min(), zdataColumn.fieldType, sticky=False) plotRange.zmaxPush(gooddata.max(), zdataColumn.fieldType, sticky=False) state.zdata = zdataColumn.data state.zmask = NP("logical_not", selection) * defs.INVALID elif len(zofxy) == 0 and len(xexpr) == 1 and len(yexpr) == 1: performanceTable.pause("PlotHeatMap prepare") xdataColumn = xexpr[0].evaluate(dataTable, functionTable, performanceTable) ydataColumn = yexpr[0].evaluate(dataTable, functionTable, performanceTable) performanceTable.unpause("PlotHeatMap prepare") xbins = self.get("xbins", convertType=True) xlow = self.get("xlow", convertType=True) xhigh = self.get("xhigh", convertType=True) ybins = self.get("ybins", convertType=True) ylow = self.get("ylow", convertType=True) yhigh = self.get("yhigh", convertType=True) if len(xdataColumn) > 0: if xlow is None: xlow = NP("nanmin", xdataColumn.data) if xhigh is None: xhigh = NP("nanmax", xdataColumn.data) if ylow is None: ylow = NP("nanmin", ydataColumn.data) if yhigh is None: yhigh = NP("nanmax", ydataColumn.data) else: if xlow is None: xlow = 0.0 if xhigh is None: xhigh = 1.0 if ylow is None: ylow = 0.0 if yhigh is None: yhigh = 1.0 if xbins is None: q1, q3 = NP("percentile", xdataColumn.data, [25.0, 75.0]) binWidth = 2.0 * (q3 - q1) / math.pow(len(xdataColumn.data), 1.0/3.0) if binWidth > 0.0: xbins = max(10, int(math.ceil((xhigh - xlow)/binWidth))) else: xbins = 10 if ybins is None: q1, q3 = NP("percentile", ydataColumn.data, [25.0, 75.0]) binWidth = 2.0 * (q3 - q1) / math.pow(len(ydataColumn.data), 1.0/3.0) if binWidth > 0.0: ybins = max(10, int(math.ceil((yhigh - ylow)/binWidth))) else: ybins = 10 if xlow >= xhigh or ylow >= yhigh: raise defs.PmmlValidationError("xlow must be less than xhigh and ylow must be less than yhigh") if plotRange.xStrictlyPositive or plotRange.yStrictlyPositive: raise defs.PmmlValidationError("PlotHeatMap can only be properly displayed in linear x, y coordinates") persistentState = {} stateId = self.get("stateId") if stateId is not None: if stateId in dataTable.state: persistentState = dataTable.state[stateId] else: dataTable.state[stateId] = persistentState if len(zmean) == 0: if "xbins" in persistentState: xbins = persistentState["xbins"] if "xlow" in persistentState: xlow = persistentState["xlow"] if "xhigh" in persistentState: xhigh = persistentState["xhigh"] if "ybins" in persistentState: ybins = persistentState["ybins"] if "ylow" in persistentState: ylow = persistentState["ylow"] if "yhigh" in persistentState: yhigh = persistentState["yhigh"] persistentState["xbins"] = xbins persistentState["xlow"] = xlow persistentState["xhigh"] = xhigh persistentState["ybins"] = ybins persistentState["ylow"] = ylow persistentState["yhigh"] = yhigh xbinWidth = (xhigh - xlow) / float(xbins) ybinWidth = (yhigh - ylow) / float(ybins) mask = NP("ones", len(dataTable), dtype=NP.dtype(float)) if xdataColumn.mask is not None: NP("multiply", mask, (xdataColumn.mask == defs.VALID), mask) if ydataColumn.mask is not None: NP("multiply", mask, (ydataColumn.mask == defs.VALID), mask) if len(cutExpression) == 1: performanceTable.pause("PlotHeatMap prepare") NP("multiply", mask, cutExpression[0].select(dataTable, functionTable, performanceTable), mask) performanceTable.unpause("PlotHeatMap prepare") if len(zmean) == 0 and len(zweight) == 0: histogram, xedges, yedges = NP("histogram2d", ydataColumn.data, xdataColumn.data, bins=(ybins, xbins), range=[[ylow, yhigh], [xlow, xhigh]], weights=mask) if len(dataTable) == 0: # work around Numpy <= 1.6.1 bug histogram = NP("zeros", (ybins, xbins), dtype=NP.dtype(float)) if "histogram" in persistentState: persistentState["histogram"] = NP(persistentState["histogram"] + histogram) else: persistentState["histogram"] = histogram histogram = persistentState["histogram"] if plotRange.zStrictlyPositive: zmin = 0.1 else: zmin = 0.0 zmax = NP("nanmax", histogram) plotRange.zminPush(zmin, self.zfieldType, sticky=True) if zmax > zmin: plotRange.zmaxPush(zmax, self.zfieldType, sticky=False) elif len(zmean) == 0 and len(zweight) == 1: performanceTable.pause("PlotHeatMap prepare") weightsDataColumn = zweight[0].evaluate(dataTable, functionTable, performanceTable) performanceTable.unpause("PlotHeatMap prepare") if weightsDataColumn.mask is not None: NP("multiply", mask, (weightsDataColumn.mask == defs.VALID), mask) weights = NP(weightsDataColumn.data * mask) histogram, xedges, yedges = NP("histogram2d", ydataColumn.data, xdataColumn.data, bins=(ybins, xbins), range=[[ylow, yhigh], [xlow, xhigh]], weights=weights) if "histogram" in persistentState: persistentState["histogram"] = NP(persistentState["histogram"] + histogram) else: persistentState["histogram"] = histogram histogram = persistentState["histogram"] if plotRange.zStrictlyPositive: w = weights[NP(weights > 0.0)] if len(w) > 0: zmin = 0.1 * NP("nanmin", w) else: zmin = 0.1 else: zmin = 0.0 zmax = NP("nanmax", histogram) plotRange.zminPush(zmin, self.zfieldType, sticky=True) if zmax > zmin: plotRange.zmaxPush(zmax, self.zfieldType, sticky=False) elif len(zmean) == 1 and len(zweight) == 0: performanceTable.pause("PlotHeatMap prepare") zdataColumn = zmean[0].evaluate(dataTable, functionTable, performanceTable) performanceTable.unpause("PlotHeatMap prepare") if zdataColumn.mask is not None: NP("multiply", mask, (zdataColumn.mask == defs.VALID), mask) weights = NP(zdataColumn.data * mask) numer, xedges, yedges = NP("histogram2d", ydataColumn.data, xdataColumn.data, bins=(ybins, xbins), range=[[ylow, yhigh], [xlow, xhigh]], weights=weights) denom, xedges, yedges = NP("histogram2d", ydataColumn.data, xdataColumn.data, bins=(ybins, xbins), range=[[ylow, yhigh], [xlow, xhigh]], weights=mask) if "numer" in persistentState: persistentState["numer"] = NP(persistentState["numer"] + numer) persistentState["denom"] = NP(persistentState["denom"] + denom) else: persistentState["numer"] = numer persistentState["denom"] = denom numer = persistentState["numer"] denom = persistentState["denom"] histogram = numer / denom selection = NP("isfinite", histogram) if plotRange.zStrictlyPositive: NP("logical_and", selection, NP(histogram > 0.0), selection) if NP("count_nonzero", selection) > 0: gooddata = histogram[selection] plotRange.zminPush(gooddata.min(), self.zfieldType, sticky=False) plotRange.zmaxPush(gooddata.max(), self.zfieldType, sticky=False) else: raise defs.PmmlValidationError("The only allowed combinations of PlotFormula/PlotNumericExpressions are: \"z(x,y)\" (function), \"x y\" (histogram), \"x y zmean\" (mean of z in x y bins), \"x y zweight\" (weighted x y histogram)") state.zdata = NP("reshape", histogram, xbins*ybins) state.zmask = None else: raise defs.PmmlValidationError("The only allowed combinations of PlotFormula/PlotNumericExpressions are: \"z(x,y)\" (function), \"x y\" (histogram), \"x y zmean\" (mean of z in x y bins), \"x y zweight\" (weighted x y histogram)") plotRange.xminPush(xlow, self.xyfieldType, sticky=True) plotRange.yminPush(ylow, self.xyfieldType, sticky=True) plotRange.xmaxPush(xhigh, self.xyfieldType, sticky=True) plotRange.ymaxPush(yhigh, self.xyfieldType, sticky=True) state.xbins = xbins state.xlow = xlow state.xhigh = xhigh state.ybins = ybins state.ylow = ylow state.yhigh = yhigh performanceTable.end("PlotHeatMap prepare")