Esempio n. 1
0
    def serialize(self):
        self.attrib["n"] = len(self.n_to_v)

        keys = self.n_to_v.keys()
        if not isinstance(keys, list): keys = list(keys)
        if len(keys) != 0:
            keys.sort()
            if keys[0] != 0 or keys[-1] != len(keys) - 1:
                raise TypeError(
                    "LookupTable.n_to_v keys must be range(len(n_to_v))")

        self.children = [
            xmlbase.XMLText(self.attrib["delimiter"].join(
                [self.n_to_v[k] for k in keys]))
        ]
        del self.n_to_v
Esempio n. 2
0
    def construct(self, pattern, variables, namedGroups):
        if isinstance(pattern, xmlbase.XMLText):
            text = "".join(pattern.text).lstrip(string.whitespace).rstrip(
                string.whitespace)
            if text[0] == "{" and text[-1] == "}":
                ## CAREFUL: another string evaluation as Python code!
                text = str(eval(text[1:-1], variables))
            return xmlbase.XMLText(text)

        elif isinstance(pattern, xmlbase.XMLSpecial):
            return pattern.copy()

        elif isinstance(pattern, NamedGroupMatch):
            return [i.copy() for i in namedGroups[pattern["name"]]]

        else:
            pmmlClass = pmml.X_ODG_PMML.classMap.get(pattern.tag, None)
            if pmmlClass is None:
                output = new.instance(xmlbase.XML)
            else:
                output = new.instance(pmmlClass)

            output.tag = pattern.tag
            output.attrib = {}
            output.children = []

            for key, value in pattern.attrib.items():
                if value[0] == "{" and value[-1] == "}":
                    ## CAREFUL: another string evaluation as Python code!
                    output[key] = str(eval(value[1:-1], variables))
                else:
                    output[key] = value

            for child in pattern.children:
                construction = self.construct(child, variables, namedGroups)
                if isinstance(construction, list):
                    output.children.extend(construction)
                else:
                    output.children.append(construction)

            return output
Esempio n. 3
0
    def initialize(self):
        """Interpret PMML file, set up SegmentRecords list, and
        initialize all algorithms."""

        self.firstSegment = True

        # set up the header, so that our models can be stamped with time and event number
        header = self.pmmlFile.child(pmml.Header)
        if header.exists(pmml.Extension):
            headerExtension = header.child(pmml.Extension)
        else:
            headerExtension = pmml.Extension()
            header.children.insert(0, headerExtension)

        if headerExtension.exists(pmml.X_ODG_RandomSeed):
            del headerExtension[headerExtension.index(pmml.X_ODG_RandomSeed)]
        augustusRandomSeed = pmml.X_ODG_RandomSeed(
            value=self.augustusRandomSeed)
        headerExtension.children.append(augustusRandomSeed)

        if headerExtension.exists(pmml.X_ODG_Eventstamp):
            del headerExtension[headerExtension.index(pmml.X_ODG_Eventstamp)]
        self.eventStamp = pmml.X_ODG_Eventstamp(number=0)
        headerExtension.children.append(self.eventStamp)

        if header.exists(pmml.Timestamp):
            del header[header.index(pmml.Timestamp)]
        self.timeStamp = pmml.Timestamp(
            xmlbase.XMLText(datetime.datetime.today().isoformat()))
        header.children.append(self.timeStamp)

        # select the first model or select a model by name
        if self.modelName is None:
            self.pmmlModel = self.pmmlFile.topModels[0]
        else:
            self.pmmlModel = None
            for model in self.pmmlFile.topModels:
                if "modelName" in model.attrib and model.attrib[
                        "modelName"] == self.modelName:
                    self.pmmlModel = model
                    break
            if self.pmmlModel is None:
                raise RuntimeError, "No model named \"%s\" was found in the PMML file" % self.modelName

        # connect the dataContext to the dataStream, so that events will flow from the input file into the transformations
        self.resetDataStream(self.dataStream)

        # clear the cache the model DataContexts (initializes some dictionaries)
        self.pmmlModel.dataContext.clear()
        if self.pmmlModel.dataContext.transformationDictionary:
            self.metadata.data["Transformation dictionary elements"] = len(
                self.pmmlModel.dataContext.transformationDictionary.cast)
        else:
            self.metadata.data["Transformation dictionary elements"] = 0

        self.segmentRecords = []
        self._lookup = NameSpace(tuples={}, fields={}, other=[])
        SegmentRecord.maturityThreshold = self.maturityThreshold
        SegmentRecord.lockingThreshold = self.lockingThreshold

        if self.pmmlFile.exists(pmml.TransformationDictionary):
            if self.pmmlFile.child(pmml.TransformationDictionary).exists(
                    pmml.Aggregate, maxdepth=None):
                raise NotImplementedError, "Aggregate transformations in the TransformationDictionary are not supported"
            if self.pmmlFile.child(pmml.TransformationDictionary).exists(
                    pmml.X_ODG_AggregateReduce, maxdepth=None):
                raise NotImplementedError, "X-ODG-AggregateReduce transformations in the TransformationDictionary are not supported"

        # MiningModels are special because we handle segmentation at the Engine level
        # Currently no support for MiningModels nested within MiningModels
        if isinstance(self.pmmlModel, pmml.MiningModel):
            self.pmmlOutput = self.pmmlModel.child(pmml.Output,
                                                   exception=False)
            segmentation = self.pmmlModel.child(pmml.Segmentation,
                                                exception=False)
            # for now, assume a MiningModel without any segments will be populated through autosegmentation

            if self.pmmlModel.exists(pmml.LocalTransformations):
                if self.pmmlModel.child(pmml.LocalTransformations).exists(
                        pmml.Aggregate, maxdepth=None):
                    raise NotImplementedError, "Aggregate transformations in the MiningModel's LocalTransformations are not supported"
                if self.pmmlModel.child(pmml.LocalTransformations).exists(
                        pmml.X_ODG_AggregateReduce, maxdepth=None):
                    raise NotImplementedError, "X-ODG-AggregateReduce transformations in the MiningModel's LocalTransformations are not supported"

            if segmentation.attrib["multipleModelMethod"] == "selectFirst":
                self.multipleModelMethod = SELECTFIRST
            elif segmentation.attrib["multipleModelMethod"] == "selectAll":
                self.multipleModelMethod = SELECTALL
            else:
                raise NotImplementedError, "Only 'selectFirst', 'selectAll', and no segmentation have been implemented."
            self.metadata.data[
                "Match all segments"] = self.multipleModelMethod != SELECTFIRST

            for pmmlSegment in segmentation.matches(pmml.Segment):
                self._makeSegmentRecord(pmmlSegment)

        else:
            self.multipleModelMethod = SELECTONLY

            segmentRecord = SegmentRecord(self.pmmlModel, None, None, self)

            modelClass = self.pmmlModel.__class__
            algoName = self.producerAlgorithm[
                modelClass.__name__].attrib["algorithm"]
            segmentRecord.consumerAlgorithm = consumerAlgorithmMap[modelClass](
                self, segmentRecord)
            segmentRecord.producerAlgorithm = producerAlgorithmMap[
                modelClass, algoName](self, segmentRecord)
            segmentRecord.producerParameters = self.producerAlgorithm[
                modelClass.__name__].parameters
            self.setProvenance(self.pmmlModel, algoName,
                               segmentRecord.producerAlgorithm,
                               segmentRecord.producerParameters)

            localTransformations = self.pmmlModel.child(
                pmml.LocalTransformations, exception=False)
            if localTransformations is not None:
                segmentRecord.aggregates = localTransformations.matches(
                    pmml.Aggregate, maxdepth=None)
                segmentRecord.aggregates.extend(
                    localTransformations.matches(pmml.X_ODG_AggregateReduce,
                                                 maxdepth=None))
            else:
                segmentRecord.aggregates = []
            for aggregate in segmentRecord.aggregates:
                aggregate.initialize(self.consumerUpdateScheme)

            self.segmentRecords.append(segmentRecord)
            self.metadata.data[
                "First segment model type"] = segmentRecord.pmmlModel.tag

        self.reinitialize()
Esempio n. 4
0
            try:
                last.value = contentCast(s)
            except ValueError, err:
                if self.exception:
                    raise ScoresError(
                        "Could not cast %s content (\"%s\") with %s: %s (line %d of %s)"
                        % (tag, s, contentCast, str(err), self.lineNumber,
                           self.fileName))
                else:
                    last.value = None
                    last.goodCast = False

        else:
            del last.value
            if s != "":
                last.children.append(xmlbase.XMLText(s))

        if len(self.stack) == 1:
            self.output = self.stack.pop()
        else:
            self.stack.pop()

    def processingInstruction(self, target, data):
        pass

    def comment(self, comment):
        pass

    def startCDATA(self):
        self.CDATA = True