Beispiel #1
0
    def _selectMax(self, dataTable, functionTable, performanceTable,
                   segmentation):
        """Used by C{calculateScore}."""

        performanceTable.begin("Segmentation max")

        scoresData = NP("empty", len(dataTable), dtype=NP.dtype(object))
        filled = NP("zeros", len(dataTable), dtype=NP.dtype(bool))
        unfilled = NP("ones", len(dataTable), dtype=NP.dtype(bool))

        newOutputData = []
        for segment in segmentation.childrenOfTag("Segment", iterator=True):
            performanceTable.pause("Segmentation max")
            selection = segment.childOfClass(PmmlPredicate).evaluate(
                dataTable, functionTable, performanceTable)
            performanceTable.unpause("Segmentation max")
            if not selection.any():
                continue

            subTable = dataTable.subTable(selection)
            subModel = segment.childOfClass(PmmlModel)
            performanceTable.pause("Segmentation max")
            subModel.calculate(subTable, functionTable, performanceTable)
            performanceTable.unpause("Segmentation max")

            if subTable.score.fieldType.dataType in ("string", "boolean",
                                                     "object"):
                raise defs.PmmlValidationError(
                    "Segmentation with multipleModelMethod=\"max\" cannot be applied to models that produce dataType \"%s\""
                    % subTable.score.fieldType.dataType)

            # ignore invalid in matches (like the built-in "min" Apply function)
            if subTable.score.mask is not None:
                NP("logical_and", selection,
                   NP(subTable.score.mask == defs.VALID), selection)

            selectionFilled = NP("logical_and", selection, filled)
            selectionUnfilled = NP("logical_and", selection, unfilled)
            filled_selection = filled[selection]
            unfilled_selection = unfilled[selection]

            left, right = subTable.score.data[filled_selection], scoresData[
                selectionFilled]
            condition = NP(left > right)
            scoresData[selectionFilled] = NP("where", condition, left, right)
            scoresData[selectionUnfilled] = subTable.score.data[
                unfilled_selection]

            for fieldName, dataColumn in subTable.output.items():
                if fieldName not in dataTable.output:
                    data = NP("empty",
                              len(dataTable),
                              dtype=dataColumn.fieldType.dtype)
                    data[selectionUnfilled] = dataColumn.data

                    mask = NP(
                        NP("ones", len(dataTable), dtype=defs.maskType) *
                        defs.MISSING)
                    if dataColumn.mask is None:
                        mask[selectionUnfilled] = defs.VALID
                    else:
                        mask[selectionUnfilled] = dataColumn.mask

                    newDataColumn = DataColumn(dataColumn.fieldType, data,
                                               mask)
                    newDataColumn._unlock()
                    dataTable.output[fieldName] = newDataColumn
                    newOutputData.append(newDataColumn)

                else:
                    newDataColumn = dataTable.output[fieldName]

                    newDataColumn.data[selectionFilled] = NP(
                        "where", condition, dataColumn.data[filled_selection],
                        newDataColumn.data[selectionFilled])
                    newDataColumn.data[selectionUnfilled] = dataColumn.data[
                        unfilled_selection]

                    if dataColumn.mask is None:
                        newDataColumn.mask[selectionUnfilled] = defs.VALID
                    else:
                        newDataColumn.mask[selectionUnfilled] = dataColumn.mask

            filled += selectionUnfilled
            unfilled -= selectionUnfilled

        for newDataColumn in newOutputData:
            if not newDataColumn.mask.any():
                newDataColumn._mask = None
            newDataColumn._lock()

        if filled.all():
            scoresMask = None
        else:
            scoresMask = NP(NP("logical_not", filled) * defs.MISSING)

        scores = DataColumn(self.scoreType, scoresData, scoresMask)

        performanceTable.end("Segmentation max")
        return {None: scores}
Beispiel #2
0
    def _selectMax(self, dataTable, functionTable, performanceTable, segmentation):
        """Used by C{calculateScore}."""

        performanceTable.begin("Segmentation max")

        scoresData = NP("empty", len(dataTable), dtype=NP.dtype(object))
        filled = NP("zeros", len(dataTable), dtype=NP.dtype(bool))
        unfilled = NP("ones", len(dataTable), dtype=NP.dtype(bool))

        newOutputData = []
        for segment in segmentation.childrenOfTag("Segment", iterator=True):
            performanceTable.pause("Segmentation max")
            selection = segment.childOfClass(PmmlPredicate).evaluate(dataTable, functionTable, performanceTable)
            performanceTable.unpause("Segmentation max")
            if not selection.any():
                continue
            
            subTable = dataTable.subTable(selection)
            subModel = segment.childOfClass(PmmlModel)
            performanceTable.pause("Segmentation max")
            subModel.calculate(subTable, functionTable, performanceTable)
            performanceTable.unpause("Segmentation max")

            if subTable.score.fieldType.dataType in ("string", "boolean", "object"):
                raise defs.PmmlValidationError("Segmentation with multipleModelMethod=\"max\" cannot be applied to models that produce dataType \"%s\"" % subTable.score.fieldType.dataType)

            # ignore invalid in matches (like the built-in "min" Apply function)
            if subTable.score.mask is not None:
                NP("logical_and", selection, NP(subTable.score.mask == defs.VALID), selection)

            selectionFilled = NP("logical_and", selection, filled)
            selectionUnfilled = NP("logical_and", selection, unfilled)
            filled_selection = filled[selection]
            unfilled_selection = unfilled[selection]

            left, right = subTable.score.data[filled_selection], scoresData[selectionFilled]
            condition = NP(left > right)
            scoresData[selectionFilled] = NP("where", condition, left, right)
            scoresData[selectionUnfilled] = subTable.score.data[unfilled_selection]

            for fieldName, dataColumn in subTable.output.items():
                if fieldName not in dataTable.output:
                    data = NP("empty", len(dataTable), dtype=dataColumn.fieldType.dtype)
                    data[selectionUnfilled] = dataColumn.data

                    mask = NP(NP("ones", len(dataTable), dtype=defs.maskType) * defs.MISSING)
                    if dataColumn.mask is None:
                        mask[selectionUnfilled] = defs.VALID
                    else:
                        mask[selectionUnfilled] = dataColumn.mask

                    newDataColumn = DataColumn(dataColumn.fieldType, data, mask)
                    newDataColumn._unlock()
                    dataTable.output[fieldName] = newDataColumn
                    newOutputData.append(newDataColumn)

                else:
                    newDataColumn = dataTable.output[fieldName]

                    newDataColumn.data[selectionFilled] = NP("where", condition, dataColumn.data[filled_selection], newDataColumn.data[selectionFilled])
                    newDataColumn.data[selectionUnfilled] = dataColumn.data[unfilled_selection]

                    if dataColumn.mask is None:
                        newDataColumn.mask[selectionUnfilled] = defs.VALID
                    else:
                        newDataColumn.mask[selectionUnfilled] = dataColumn.mask

            filled += selectionUnfilled
            unfilled -= selectionUnfilled

        for newDataColumn in newOutputData:
            if not newDataColumn.mask.any():
                newDataColumn._mask = None
            newDataColumn._lock()
            
        if filled.all():
            scoresMask = None
        else:
            scoresMask = NP(NP("logical_not", filled) * defs.MISSING)
        
        scores = DataColumn(self.scoreType, scoresData, scoresMask)

        performanceTable.end("Segmentation max")
        return {None: scores}
Beispiel #3
0
    def _selectFirst(self, dataTable, functionTable, performanceTable,
                     segmentation):
        """Used by C{calculateScore}."""

        performanceTable.begin("Segmentation selectFirst")

        scoresData = NP("empty", len(dataTable), dtype=NP.dtype(object))
        scoresMask = NP("zeros", len(dataTable), dtype=defs.maskType)
        unfilled = NP("ones", len(dataTable), dtype=NP.dtype(bool))
        segments = NP("empty", len(dataTable), dtype=NP.dtype(object))

        newOutputData = []
        for segment in segmentation.childrenOfTag("Segment", iterator=True):
            performanceTable.pause("Segmentation selectFirst")
            selection = segment.childOfClass(PmmlPredicate).evaluate(
                dataTable, functionTable, performanceTable)
            performanceTable.unpause("Segmentation selectFirst")
            NP("logical_and", selection, unfilled, selection)
            if not selection.any():
                continue

            subTable = dataTable.subTable(selection)
            subModel = segment.childOfClass(PmmlModel)
            performanceTable.pause("Segmentation selectFirst")

            subModel.calculate(subTable, functionTable, performanceTable)
            performanceTable.unpause("Segmentation selectFirst")

            scoresData[selection] = subTable.score.data
            if subTable.score.mask is not None:
                scoresMask[selection] = subTable.score.mask
            else:
                scoresMask[selection] = defs.VALID

            segmentName = segment.get("id")
            if segmentName is not None:
                segments[selection] = segmentName

            for fieldName, dataColumn in subTable.output.items():
                if fieldName not in dataTable.output:
                    data = NP("empty",
                              len(dataTable),
                              dtype=dataColumn.fieldType.dtype)
                    data[selection] = dataColumn.data

                    mask = NP(
                        NP("ones", len(dataTable), dtype=defs.maskType) *
                        defs.MISSING)
                    if dataColumn.mask is None:
                        mask[selection] = defs.VALID
                    else:
                        mask[selection] = dataColumn.mask

                    newDataColumn = DataColumn(dataColumn.fieldType, data,
                                               mask)
                    newDataColumn._unlock()
                    dataTable.output[fieldName] = newDataColumn
                    newOutputData.append(newDataColumn)

                else:
                    newDataColumn = dataTable.output[fieldName]

                    newDataColumn.data[selection] = dataColumn.data
                    if dataColumn.mask is None:
                        newDataColumn.mask[selection] = defs.VALID
                    else:
                        newDataColumn.mask[selection] = dataColumn.mask

            unfilled -= selection
            if not unfilled.any():
                break

        for newDataColumn in newOutputData:
            if not newDataColumn.mask.any():
                newDataColumn._mask = None
            newDataColumn._lock()

        if not scoresMask.any():
            scoresMask = None

        scores = DataColumn(self.scoreType, scoresData, scoresMask)

        if self.name is None:
            performanceTable.end("Segmentation selectFirst")
            return {None: scores}
        else:
            performanceTable.end("Segmentation selectFirst")
            return {
                None: scores,
                "segment": DataColumn(self.scoreTypeSegment, segments, None)
            }
Beispiel #4
0
    def _selectFirst(self, dataTable, functionTable, performanceTable, segmentation):
        """Used by C{calculateScore}."""

        performanceTable.begin("Segmentation selectFirst")

        scoresData = NP("empty", len(dataTable), dtype=NP.dtype(object))
        scoresMask = NP("zeros", len(dataTable), dtype=defs.maskType)
        unfilled = NP("ones", len(dataTable), dtype=NP.dtype(bool))
        segments = NP("empty", len(dataTable), dtype=NP.dtype(object))

        newOutputData = []
        for segment in segmentation.childrenOfTag("Segment", iterator=True):
            performanceTable.pause("Segmentation selectFirst")
            selection = segment.childOfClass(PmmlPredicate).evaluate(dataTable, functionTable, performanceTable)
            performanceTable.unpause("Segmentation selectFirst")
            NP("logical_and", selection, unfilled, selection)
            if not selection.any():
                continue

            subTable = dataTable.subTable(selection)
            subModel = segment.childOfClass(PmmlModel)
            performanceTable.pause("Segmentation selectFirst")

            subModel.calculate(subTable, functionTable, performanceTable)
            performanceTable.unpause("Segmentation selectFirst")

            scoresData[selection] = subTable.score.data
            if subTable.score.mask is not None:
                scoresMask[selection] = subTable.score.mask
            else:
                scoresMask[selection] = defs.VALID

            segmentName = segment.get("id")
            if segmentName is not None:
                segments[selection] = segmentName

            for fieldName, dataColumn in subTable.output.items():
                if fieldName not in dataTable.output:
                    data = NP("empty", len(dataTable), dtype=dataColumn.fieldType.dtype)
                    data[selection] = dataColumn.data

                    mask = NP(NP("ones", len(dataTable), dtype=defs.maskType) * defs.MISSING)
                    if dataColumn.mask is None:
                        mask[selection] = defs.VALID
                    else:
                        mask[selection] = dataColumn.mask

                    newDataColumn = DataColumn(dataColumn.fieldType, data, mask)
                    newDataColumn._unlock()
                    dataTable.output[fieldName] = newDataColumn
                    newOutputData.append(newDataColumn)

                else:
                    newDataColumn = dataTable.output[fieldName]

                    newDataColumn.data[selection] = dataColumn.data
                    if dataColumn.mask is None:
                        newDataColumn.mask[selection] = defs.VALID
                    else:
                        newDataColumn.mask[selection] = dataColumn.mask

            unfilled -= selection
            if not unfilled.any():
                break

        for newDataColumn in newOutputData:
            if not newDataColumn.mask.any():
                newDataColumn._mask = None
            newDataColumn._lock()

        if not scoresMask.any():
            scoresMask = None

        scores = DataColumn(self.scoreType, scoresData, scoresMask)

        if self.name is None:
            performanceTable.end("Segmentation selectFirst")
            return {None: scores}
        else:
            performanceTable.end("Segmentation selectFirst")
            return {None: scores, "segment": DataColumn(self.scoreTypeSegment, segments, None)}