def _checkValues(self, data, mask): values = self.values if len(values) == 0: return data, mask if mask is None: missing = NP("zeros", len(data), dtype=NP.dtype(bool)) invalid = NP("zeros", len(data), dtype=NP.dtype(bool)) else: missing = NP(mask == defs.MISSING) invalid = NP(mask == defs.INVALID) valid = NP("zeros", len(data), dtype=NP.dtype(bool)) numberOfValidSpecified = 0 for value in values: v = value.get("value") displayValue = value.get("displayValue") if displayValue is not None: self._displayValue[v] = displayValue prop = value.get("property", "valid") try: v2 = self.stringToValue(v) except ValueError: raise defs.PmmlValidationError("Improper value in Value specification: \"%s\"" % v) if prop == "valid": NP("logical_or", valid, NP(data == v2), valid) numberOfValidSpecified += 1 elif prop == "missing": NP("logical_or", missing, NP(data == v2), missing) elif prop == "invalid": NP("logical_or", invalid, NP(data == v2), invalid) if numberOfValidSpecified > 0: # guilty until proven innocent NP("logical_and", valid, NP("logical_not", missing), valid) if valid.all(): return data, None mask = NP(NP("ones", len(data), dtype=defs.maskType) * defs.INVALID) mask[missing] = defs.MISSING mask[valid] = defs.VALID else: # innocent until proven guilty NP("logical_and", invalid, NP("logical_not", missing), invalid) if not NP("logical_or", invalid, missing).any(): return data, None mask = NP("zeros", len(data), dtype=defs.maskType) mask[missing] = defs.MISSING mask[invalid] = defs.INVALID return data, mask
def _selectMax(self, dataTable, functionTable, performanceTable, segmentation): """Used by C{calculateScore}.""" performanceTable.begin("Segmentation max") scoresData = NP("empty", len(dataTable), dtype=NP.dtype(object)) filled = NP("zeros", len(dataTable), dtype=NP.dtype(bool)) unfilled = NP("ones", len(dataTable), dtype=NP.dtype(bool)) newOutputData = [] for segment in segmentation.childrenOfTag("Segment", iterator=True): performanceTable.pause("Segmentation max") selection = segment.childOfClass(PmmlPredicate).evaluate( dataTable, functionTable, performanceTable) performanceTable.unpause("Segmentation max") if not selection.any(): continue subTable = dataTable.subTable(selection) subModel = segment.childOfClass(PmmlModel) performanceTable.pause("Segmentation max") subModel.calculate(subTable, functionTable, performanceTable) performanceTable.unpause("Segmentation max") if subTable.score.fieldType.dataType in ("string", "boolean", "object"): raise defs.PmmlValidationError( "Segmentation with multipleModelMethod=\"max\" cannot be applied to models that produce dataType \"%s\"" % subTable.score.fieldType.dataType) # ignore invalid in matches (like the built-in "min" Apply function) if subTable.score.mask is not None: NP("logical_and", selection, NP(subTable.score.mask == defs.VALID), selection) selectionFilled = NP("logical_and", selection, filled) selectionUnfilled = NP("logical_and", selection, unfilled) filled_selection = filled[selection] unfilled_selection = unfilled[selection] left, right = subTable.score.data[filled_selection], scoresData[ selectionFilled] condition = NP(left > right) scoresData[selectionFilled] = NP("where", condition, left, right) scoresData[selectionUnfilled] = subTable.score.data[ unfilled_selection] for fieldName, dataColumn in subTable.output.items(): if fieldName not in dataTable.output: data = NP("empty", len(dataTable), dtype=dataColumn.fieldType.dtype) data[selectionUnfilled] = dataColumn.data mask = NP( NP("ones", len(dataTable), dtype=defs.maskType) * defs.MISSING) if dataColumn.mask is None: mask[selectionUnfilled] = defs.VALID else: mask[selectionUnfilled] = dataColumn.mask newDataColumn = DataColumn(dataColumn.fieldType, data, mask) newDataColumn._unlock() dataTable.output[fieldName] = newDataColumn newOutputData.append(newDataColumn) else: newDataColumn = dataTable.output[fieldName] newDataColumn.data[selectionFilled] = NP( "where", condition, dataColumn.data[filled_selection], newDataColumn.data[selectionFilled]) newDataColumn.data[selectionUnfilled] = dataColumn.data[ unfilled_selection] if dataColumn.mask is None: newDataColumn.mask[selectionUnfilled] = defs.VALID else: newDataColumn.mask[selectionUnfilled] = dataColumn.mask filled += selectionUnfilled unfilled -= selectionUnfilled for newDataColumn in newOutputData: if not newDataColumn.mask.any(): newDataColumn._mask = None newDataColumn._lock() if filled.all(): scoresMask = None else: scoresMask = NP(NP("logical_not", filled) * defs.MISSING) scores = DataColumn(self.scoreType, scoresData, scoresMask) performanceTable.end("Segmentation max") return {None: scores}
def _selectMax(self, dataTable, functionTable, performanceTable, segmentation): """Used by C{calculateScore}.""" performanceTable.begin("Segmentation max") scoresData = NP("empty", len(dataTable), dtype=NP.dtype(object)) filled = NP("zeros", len(dataTable), dtype=NP.dtype(bool)) unfilled = NP("ones", len(dataTable), dtype=NP.dtype(bool)) newOutputData = [] for segment in segmentation.childrenOfTag("Segment", iterator=True): performanceTable.pause("Segmentation max") selection = segment.childOfClass(PmmlPredicate).evaluate(dataTable, functionTable, performanceTable) performanceTable.unpause("Segmentation max") if not selection.any(): continue subTable = dataTable.subTable(selection) subModel = segment.childOfClass(PmmlModel) performanceTable.pause("Segmentation max") subModel.calculate(subTable, functionTable, performanceTable) performanceTable.unpause("Segmentation max") if subTable.score.fieldType.dataType in ("string", "boolean", "object"): raise defs.PmmlValidationError("Segmentation with multipleModelMethod=\"max\" cannot be applied to models that produce dataType \"%s\"" % subTable.score.fieldType.dataType) # ignore invalid in matches (like the built-in "min" Apply function) if subTable.score.mask is not None: NP("logical_and", selection, NP(subTable.score.mask == defs.VALID), selection) selectionFilled = NP("logical_and", selection, filled) selectionUnfilled = NP("logical_and", selection, unfilled) filled_selection = filled[selection] unfilled_selection = unfilled[selection] left, right = subTable.score.data[filled_selection], scoresData[selectionFilled] condition = NP(left > right) scoresData[selectionFilled] = NP("where", condition, left, right) scoresData[selectionUnfilled] = subTable.score.data[unfilled_selection] for fieldName, dataColumn in subTable.output.items(): if fieldName not in dataTable.output: data = NP("empty", len(dataTable), dtype=dataColumn.fieldType.dtype) data[selectionUnfilled] = dataColumn.data mask = NP(NP("ones", len(dataTable), dtype=defs.maskType) * defs.MISSING) if dataColumn.mask is None: mask[selectionUnfilled] = defs.VALID else: mask[selectionUnfilled] = dataColumn.mask newDataColumn = DataColumn(dataColumn.fieldType, data, mask) newDataColumn._unlock() dataTable.output[fieldName] = newDataColumn newOutputData.append(newDataColumn) else: newDataColumn = dataTable.output[fieldName] newDataColumn.data[selectionFilled] = NP("where", condition, dataColumn.data[filled_selection], newDataColumn.data[selectionFilled]) newDataColumn.data[selectionUnfilled] = dataColumn.data[unfilled_selection] if dataColumn.mask is None: newDataColumn.mask[selectionUnfilled] = defs.VALID else: newDataColumn.mask[selectionUnfilled] = dataColumn.mask filled += selectionUnfilled unfilled -= selectionUnfilled for newDataColumn in newOutputData: if not newDataColumn.mask.any(): newDataColumn._mask = None newDataColumn._lock() if filled.all(): scoresMask = None else: scoresMask = NP(NP("logical_not", filled) * defs.MISSING) scores = DataColumn(self.scoreType, scoresData, scoresMask) performanceTable.end("Segmentation max") return {None: scores}