Esempio n. 1
0
    def updateChiSquareIndependence(self, syncNumber, get):
        """Update a baseline model with a chiSquareIndependence
        testStatistic.

        This updates a multi-dimensional CountTable with the data,
        entirely encoded in PMML.
        """

        self.resetLoggerLevels()
        if self.first:
            if not self._updateChiSquareIndependence_first(get):
                return False
            self.first = False

        got = {}
        key = []
        for f in self.fields:
            value = get(f)
            if value is INVALID or value is MISSING:
                self.logger.debug(
                    "updateChiSquareIndependence: returning False (INVALID or MISSING data)"
                )
                return False
            got[f] = value
            key.append(value)
        key = tuple(key)

        depth, node = self._updateChiSquareIndependence_findNode(
            0, got, self.countTable)

        # if we don't have a table entry for this yet, make one
        for i in xrange(depth, len(self.fields)):
            f = self.fields[i]
            if i == len(self.fields) - 1:
                child = pmml.FieldValueCount(field=f, value=got[f], count=0.)
            else:
                child = pmml.FieldValue(field=f, value=got[f])

            node.children.append(child)
            node = child

        updator = self.updators.get(key, None)
        if updator is None:
            updator = self.engine.producerUpdateScheme.updator(SUMX)
            updator.initialize({SUMX: node.attrib["count"]})
            self.updators[key] = updator

        # for histograms, increment all bins, but only the correct bin gets a non-zero value
        for otherupdator in self.updators.values():
            if updator is not otherupdator:
                otherupdator.increment(syncNumber, 0.)

        updator.increment(syncNumber, 1.)
        node.attrib["count"] = updator.sum()

        self.total_updator.increment(syncNumber, 1.)
        self.countTable.attrib["sample"] = self.total_updator.sum()

        return True
Esempio n. 2
0
    def _updateChiSquareIndependence_newTableFromFields(self, got):
        self.countTable = pmml.CountTable(sample=0.)
        dimension = self.countTable
        for i, f in enumerate(self.fields):
            if i == len(self.fields) - 1:
                dimension.children.append(pmml.FieldValueCount(field=f, value=got[f], count=0.))
            else:
                dimension.children.append(pmml.FieldValue(field=f, value=got[f]))
            dimension = dimension.child(pmml.nonExtension)

        self.baseline.children = [self.countTable]