def evaluate(self, dataTable, functionTable, performanceTable): """Evaluate the expression, using a DataTable as input. @type dataTable: DataTable @param dataTable: The input DataTable, containing any fields that might be used to evaluate this expression. @type functionTable: FunctionTable @param functionTable: The FunctionTable, containing any functions that might be called in this expression. @type performanceTable: PerformanceTable @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation. @rtype: DataColumn @return: The result of the calculation as a DataColumn. """ performanceTable.begin("Apply") function = functionTable.get(self.get("function")) if function is None: raise LookupError("Apply references function \"%s\", but it does not exist" % self.get("function")) arguments = self.childrenOfClass(PmmlExpression) performanceTable.pause("Apply") dataColumn = function.evaluate(dataTable, functionTable, performanceTable, arguments) performanceTable.unpause("Apply") mask = FieldCastMethods.applyInvalidValueTreatment(dataColumn.mask, self.get("invalidValueTreatment")) data, mask = FieldCastMethods.applyMapMissingTo(dataColumn.fieldType, dataColumn.data, mask, self.get("mapMissingTo")) performanceTable.end("Apply") return DataColumn(dataColumn.fieldType, data, mask)
def evaluate(self, dataTable, functionTable, performanceTable, text=None): """Evaluate the expression, using a DataTable as input. @type dataTable: DataTable @param dataTable: The input DataTable, containing any fields that might be used to evaluate this expression. @type functionTable: FunctionTable @param functionTable: The FunctionTable, containing any functions that might be called in this expression. @type performanceTable: PerformanceTable @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation. @type text: string or None @param text: If None, use the text of this Formula object; otherwise, use C{text} instead. @rtype: DataColumn @return: The result of the calculation as a DataColumn. """ if text is None: text = self.text performanceTable.begin("Formula parse") parsed = Formula.parse(text) performanceTable.end("Formula parse") performanceTable.begin("Formula evaluate") dataColumn = parsed.evaluate(dataTable, functionTable, performanceTable) if dataColumn.mask is None: return dataColumn data = dataColumn.data mask = dataColumn.mask mask = FieldCastMethods.applyInvalidValueTreatment(mask, self.get("invalidValueTreatment")) data, mask = FieldCastMethods.applyMapMissingTo(dataColumn.fieldType, data, mask, self.get("mapMissingTo")) performanceTable.end("Formula evaluate") return DataColumn(dataColumn.fieldType, data, mask)
def evaluate(self, dataTable, functionTable, performanceTable): """Evaluate the expression, using a DataTable as input. @type dataTable: DataTable @param dataTable: The input DataTable, containing any fields that might be used to evaluate this expression. @type functionTable: FunctionTable @param functionTable: The FunctionTable, containing any functions that might be called in this expression. @type performanceTable: PerformanceTable @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation. @rtype: DataColumn @return: The result of the calculation as a DataColumn. """ dataColumn = self.childOfClass(PmmlExpression).evaluate(dataTable, functionTable, performanceTable) performanceTable.begin("CastExpression") dataColumn = FieldCastMethods.cast(FieldType(self), dataColumn) mask = FieldCastMethods.applyInvalidValueTreatment(dataColumn.mask, self.get("invalidValueTreatment")) data, mask = FieldCastMethods.applyMapMissingTo(dataColumn.fieldType, dataColumn.data, mask, self.get("mapMissingTo")) performanceTable.end("CastExpression") return DataColumn(dataColumn.fieldType, data, mask)
def evaluate(self, dataTable, functionTable, performanceTable): """Evaluate the expression, using a DataTable as input. @type dataTable: DataTable @param dataTable: The input DataTable, containing any fields that might be used to evaluate this expression. @type functionTable: FunctionTable @param functionTable: The FunctionTable, containing any functions that might be called in this expression. @type performanceTable: PerformanceTable @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation. @rtype: DataColumn @return: The result of the calculation as a DataColumn. """ performanceTable.begin("Apply") function = functionTable.get(self.get("function")) if function is None: raise LookupError( "Apply references function \"%s\", but it does not exist" % self.get("function")) arguments = self.childrenOfClass(PmmlExpression) performanceTable.pause("Apply") dataColumn = function.evaluate(dataTable, functionTable, performanceTable, arguments) performanceTable.unpause("Apply") mask = FieldCastMethods.applyInvalidValueTreatment( dataColumn.mask, self.get("invalidValueTreatment")) data, mask = FieldCastMethods.applyMapMissingTo( dataColumn.fieldType, dataColumn.data, mask, self.get("mapMissingTo")) performanceTable.end("Apply") return DataColumn(dataColumn.fieldType, data, mask)
def replaceField(self, dataTable, functionTable, performanceTable): """Replace a field in the DataTable for outlier removal, missing value handling, and invalid value treatment. @type dataTable: DataTable @param dataTable: The pre-built DataTable. @type functionTable: FunctionTable @param functionTable: A table of functions. @type performanceTable: PerformanceTable @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation. """ dataColumn = dataTable.fields.get(self.name) if dataColumn is None: return performanceTable.begin("MiningField") optype = self.get("optype", dataColumn.fieldType.optype) if optype != dataColumn.fieldType.optype: dataColumn = FieldCastMethods.cast(FakeFieldType(dataColumn.fieldType.dataType, optype), dataColumn) data = dataColumn.data mask = dataColumn.mask outliers = self.get("outliers") lowValue = self.get("lowValue") if lowValue is not None: lowValue = dataColumn.fieldType.stringToValue(lowValue) if outliers == "asMissingValues": selection = NP(dataColumn.data < lowValue) mask = FieldCastMethods.outliersAsMissing(mask, dataColumn.mask, selection) elif outliers == "asExtremeValues": selection = NP(dataColumn.data < lowValue) if data is dataColumn.data: data = NP("copy", data) data.setflags(write=True) data[selection] = lowValue highValue = self.get("highValue") if highValue is not None: highValue = dataColumn.fieldType.stringToValue(highValue) if outliers == "asMissingValues": selection = NP(dataColumn.data > highValue) mask = FieldCastMethods.outliersAsMissing(mask, dataColumn.mask, selection) elif outliers == "asExtremeValues": selection = NP(dataColumn.data > highValue) if data is dataColumn.data: data = NP("copy", data) data.setflags(write=True) data[selection] = highValue mask = FieldCastMethods.applyInvalidValueTreatment(mask, self.get("invalidValueTreatment")) data, mask = FieldCastMethods.applyMapMissingTo(dataColumn.fieldType, data, mask, self.get("missingValueReplacement")) dataTable.fields.replaceField(self.name, DataColumn(dataColumn.fieldType, data, mask)) performanceTable.end("MiningField")
def replaceField(self, dataTable, functionTable, performanceTable): """Replace a field in the DataTable for outlier removal, missing value handling, and invalid value treatment. @type dataTable: DataTable @param dataTable: The pre-built DataTable. @type functionTable: FunctionTable @param functionTable: A table of functions. @type performanceTable: PerformanceTable @param performanceTable: A PerformanceTable for measuring the efficiency of the calculation. """ dataColumn = dataTable.fields.get(self.name) if dataColumn is None: return performanceTable.begin("MiningField") optype = self.get("optype", dataColumn.fieldType.optype) if optype != dataColumn.fieldType.optype: dataColumn = FieldCastMethods.cast( FakeFieldType(dataColumn.fieldType.dataType, optype), dataColumn) data = dataColumn.data mask = dataColumn.mask outliers = self.get("outliers") lowValue = self.get("lowValue") if lowValue is not None: lowValue = dataColumn.fieldType.stringToValue(lowValue) if outliers == "asMissingValues": selection = NP(dataColumn.data < lowValue) mask = FieldCastMethods.outliersAsMissing( mask, dataColumn.mask, selection) elif outliers == "asExtremeValues": selection = NP(dataColumn.data < lowValue) if data is dataColumn.data: data = NP("copy", data) data.setflags(write=True) data[selection] = lowValue highValue = self.get("highValue") if highValue is not None: highValue = dataColumn.fieldType.stringToValue(highValue) if outliers == "asMissingValues": selection = NP(dataColumn.data > highValue) mask = FieldCastMethods.outliersAsMissing( mask, dataColumn.mask, selection) elif outliers == "asExtremeValues": selection = NP(dataColumn.data > highValue) if data is dataColumn.data: data = NP("copy", data) data.setflags(write=True) data[selection] = highValue mask = FieldCastMethods.applyInvalidValueTreatment( mask, self.get("invalidValueTreatment")) data, mask = FieldCastMethods.applyMapMissingTo( dataColumn.fieldType, data, mask, self.get("missingValueReplacement")) dataTable.fields.replaceField( self.name, DataColumn(dataColumn.fieldType, data, mask)) performanceTable.end("MiningField")