def makeConfigs(inFile, outFile, inPMML, outPMML): #open data file inf = uni.UniTable().fromfile(inFile) #start the configuration file root = ET.Element("model") root.set("input", str(inPMML)) root.set("output", str(outPMML)) test = ET.SubElement(root, "test") test.set("field", "Automaker") test.set("weightField", "Count") test.set("testStatistic", "dDist") test.set("testType", "threshold") test.set("windowSize", "200") # note that for dDist test, threshold is really a 'ceiling' test.set("threshold", "0.15") # use a discrete distribution model for test baseline = ET.SubElement(test, "baseline") baseline.set("dist", "discrete") baseline.set("file", str(inFile)) baseline.set("type", "UniTable") #create the segmentation declarations for the two fields segmentation = ET.SubElement(test, "segmentation") makeSegment(inf, segmentation, "Color") #output the configurations tree = ET.ElementTree(root) tree.write(outFile)
def getInstructions(configFile): instructions = {'preprocessing':[],'postprocessing':[]} config = ET.parse(configFile) root = config.getroot() structure = root.find('Processing') if structure is not None : preprocessing = structure.find('Preprocess') consumer = structure.find('Consumer') if consumer is not None: instructions['consumer']=True producer = structure.find('Producer') postprocessing = structure.find('Postprocess') if postprocessing is not None: postSteps = postprocessing.findall('Instruction') if len(postSteps)>0: instructions['postprocessing']=[p.text for p in postSteps] else: preprocessing = None instructions['consumer'] = True if (preprocessing is not None): preSteps = preprocessing.findall('Instruction') if len(preSteps)>0: instructions['preprocessing']=[p.text for p in preSteps] return instructions
def getInstructions(configFile): instructions = {'preprocessing': [], 'postprocessing': []} config = ET.parse(configFile) root = config.getroot() structure = root.find('Processing') if structure is not None: preprocessing = structure.find('Preprocess') consumer = structure.find('Consumer') if consumer is not None: instructions['consumer'] = True producer = structure.find('Producer') postprocessing = structure.find('Postprocess') if postprocessing is not None: postSteps = postprocessing.findall('Instruction') if len(postSteps) > 0: instructions['postprocessing'] = [p.text for p in postSteps] else: preprocessing = None instructions['consumer'] = True if (preprocessing is not None): preSteps = preprocessing.findall('Instruction') if len(preSteps) > 0: instructions['preprocessing'] = [p.text for p in preSteps] return instructions
def inputConfigs(self, file): """TODO: Very much refactor this.""" if self.__timer: self.__timer.output("Inputting configurations") #input generic configurations tree = ET.parse(file) root = tree.getroot() self.__mode = root.get("mode") self.__input = root.get("input") self.__output = root.get("output") self.__batch = root.find("batch") if self.__batch is not None: self.__batch = True else: self.__batch = False self.__debugFile = root.find("debug") if self.__debugFile is not None: self.__debugFile = self.__debugFile.get("file") self.__skip = root.find("skip") if self.__skip is not None: self.__skip = long(self.__skip.get("number")) test = root.find("test") #input baseline and alternate distributions self._build = test[0] validation = root.find("validation") if validation is not None: #Decide on method of validating tests. validmethod = validation.get('method') validthreshold = validation.get('threshold') self.testValidation = Producer.testValidatingFunctions( validmethod, validthreshold) else: self.testValidation = Producer.testValidatingFunctions(None) start = 1 if len(test) > 1: next = test[1] start = 2 #TODO: Naive Bayes shouldn't have alternate. if next.tag != "alternate": start = 1 else: self._alternate = next #produce segmentation self._segments = [] for segmentDeclarations in test[start:]: self._makeSegments(segmentDeclarations) #prepare for statistics gathering self._baseDict = {(): None} if self._segments: for segment in self._segments: self._baseDict[Producer.tupelize(segment)] = None #remember the attributes of the test distribution self._attrs = {} for key in test.keys(): self._attrs[str(key)] = str(test.get(key))
def inputConfigs(self, file): """TODO: Very much refactor this. (Mostly done.) Only handle the data and pmml input here and handle the model specific stuff later in makeTests or whatever I rename that to.""" if self.__timer: self.__timer.output("Inputting configurations") #input generic configurations tree = ET.parse(file) root = tree.getroot() self.__mode = root.get("mode") self.__input = root.get("input") self.__output = root.get("output") self.__batch = root.find("batch") if self.__batch is not None: self.__batch = True else: self.__batch = False #Model specific stuff model = root.getchildren()[0] if model.tag == "rules": #Get ruleset model config values from the xml self._data = model.find('data') segmentations = model.findall('segmentation') self.__nboxes = int(model.get('nboxes')) try: self.__peel_alpha = float(model.get('peel_alpha')) except TypeError: self.__peel_alpha = 0.02 try: self.__paste_alpha = float(model.get('paste_alpha')) except TypeError: self.__paste_alpha = 0.02 self.__weight_field = model.get('weight_field') self._modelType = pmmlRuleSetModel else: raise ValueError( "Unable to determine what type of model you want produced. Element %s is not recognized." % (model.tag)) #produce segmentation self._segments = [] for segmentDeclarations in segmentations: self._makeSegments(segmentDeclarations) #prepare for statistics gathering self._baseDict = {(): None} if self._segments: for segment in self._segments: self._baseDict[Producer.tupelize(segment)] = None #remember the attributes of the model, they will be included in the PMML self._attrs = {} for key in model.keys(): self._attrs[str(key)] = str(model.get(key))
def inputConfigs(self, file): """TODO: Very much refactor this.""" if self.__timer: self.__timer.output("Inputting configurations") #input generic configurations tree = ET.parse(file) root = tree.getroot() self.__mode = root.get("mode") self.__input = root.get("input") self.__output = root.get("output") self.__batch = root.find("batch") if self.__batch is not None: self.__batch = True else: self.__batch = False self.__debugFile = root.find("debug") if self.__debugFile is not None: self.__debugFile = self.__debugFile.get("file") self.__skip = root.find("skip") if self.__skip is not None: self.__skip = long(self.__skip.get("number")) test = root.find("test") #input baseline and alternate distributions self._build = test[0] validation = root.find("validation") if validation is not None: #Decide on method of validating tests. validmethod = validation.get('method') validthreshold = validation.get('threshold') self.testValidation = Producer.testValidatingFunctions(validmethod,validthreshold) else: self.testValidation = Producer.testValidatingFunctions(None) start = 1 if len(test) > 1: next = test[1] start = 2 #TODO: Naive Bayes shouldn't have alternate. if next.tag != "alternate": start = 1 else: self._alternate = next #produce segmentation self._segments = [] for segmentDeclarations in test[start:]: self._makeSegments(segmentDeclarations) #prepare for statistics gathering self._baseDict = {():None} if self._segments: for segment in self._segments: self._baseDict[Producer.tupelize(segment)] = None #remember the attributes of the test distribution self._attrs = {} for key in test.keys(): self._attrs[str(key)] = str(test.get(key))
def inputConfigs(self, file): """TODO: Very much refactor this. (Mostly done.) Only handle the data and pmml input here and handle the model specific stuff later in makeTests or whatever I rename that to.""" if self.__timer: self.__timer.output("Inputting configurations") #input generic configurations tree = ET.parse(file) root = tree.getroot() self.__mode = root.get("mode") self.__input = root.get("input") self.__output = root.get("output") self.__batch = root.find("batch") if self.__batch is not None: self.__batch = True else: self.__batch = False #Model specific stuff model = root.getchildren()[0] if model.tag == "rules": #Get ruleset model config values from the xml self._data = model.find('data') segmentations = model.findall('segmentation') self.__nboxes = int(model.get('nboxes')) try: self.__peel_alpha = float(model.get('peel_alpha')) except TypeError: self.__peel_alpha = 0.02 try: self.__paste_alpha = float(model.get('paste_alpha')) except TypeError: self.__paste_alpha = 0.02 self.__weight_field = model.get('weight_field') self._modelType = pmmlRuleSetModel else: raise ValueError("Unable to determine what type of model you want produced. Element %s is not recognized." % (model.tag)) #produce segmentation self._segments = [] for segmentDeclarations in segmentations: self._makeSegments(segmentDeclarations) #prepare for statistics gathering self._baseDict = {():None} if self._segments: for segment in self._segments: self._baseDict[Producer.tupelize(segment)] = None #remember the attributes of the model, they will be included in the PMML self._attrs = {} for key in model.keys(): self._attrs[str(key)] = str(model.get(key))
def parse(configFile=None): consumer = os.path.dirname(os.path.abspath(configFile)) config = ET.parse(configFile) root = config.getroot() structure = root.find('DirectoryStructure') producerstructure = structure.find('Producer') producer = os.path.join(consumer, producerstructure.find('Home').text) try: producerConfig = producerstructure.find('Config').text except: producerConfig = None if producerConfig is not None: if not (os.path.isfile(producerConfig)): producerConfig = None if producerConfig is None: try: modelData = producerstructure.find('ModelData').text except: return 'FAIL : Neither the producer config nor the model data are specified!' # assume model data is relative to producer home #print modelData modelData = os.path.join(producer, modelData) #if not(os.path.isfile(modelData)): # #'Model Data needs to be specified as relative to producer home!' # return 1 postprocessing = os.path.join(consumer, structure.find('Postprocessing').text) reports = os.path.join(consumer, structure.find('Reports').text) # In the particular case of temp, if it doesn't work as # relative directory, try absolute case. temp = structure.find('Temp').text if not os.path.exists(os.path.join(consumer, temp)): temparea = temp else: temparea = os.path.join(consumer, temp) inputModel = root.find('inputModel') pmmlfile = os.path.join(consumer, inputModel.find('fromFile').attrib['name']) outputfile = None output = root.find('output') report = output.find('report') try: outputfile = report.find('toFile').attrib['name'] except: print 'No output file in use!' sys.exit(1) scoresdir = os.path.dirname(os.path.join(consumer, outputfile)) return consumer, producer, modelData, postprocessing, reports, temparea, scoresdir, pmmlfile
def inputConfigs(self, file): """TODO: Very much refactor this. (Mostly done.) Only handle the data and pmml input here and handle the model specific stuff later in makeTests or whatever I rename that too.""" if self.__timer: self.__timer.output("Inputting configurations") #input generic configurations tree = ET.parse(file) root = tree.getroot() self.__mode = root.get("mode") self.__input = root.get("input") self.__output = root.get("output") self.__batch = root.find("batch") if not self.__batch is None: self.__batch = True else: self.__batch = False self.__skip = root.find("skip") if not self.__skip is None: self.__skip = long(self.__skip.get("number")) #Model specific stuff model = root.getchildren()[0] if model.tag == "tree": #Do tree model stuff self._data = model.find('data') segmentations = model.findall('segmentation') self.__maxdepth = int(model.get('maxdepth')) self._modelType = pmmlTreeModel elif model.tag == "test": #Do baseline model stuff pass #input baseline and alternate distributions #produce segmentation self._segments = [] for segmentDeclarations in segmentations: self._makeSegments(segmentDeclarations) #prepare for statistics gathering self._baseDict = {(): None} if self._segments: for segment in self._segments: self._baseDict[Producer.tupelize(segment)] = [None] #remember the attributes of the model, they will be included in the PMML self._attrs = {} for key in model.keys(): self._attrs[str(key)] = str(model.get(key))
def inputConfigs(self, file): """TODO: Very much refactor this. (Mostly done.) Only handle the data and pmml input here and handle the model specific stuff later in makeTests or whatever I rename that too.""" if self.__timer: self.__timer.output("Inputting configurations") #input generic configurations tree = ET.parse(file) root = tree.getroot() self.__mode = root.get("mode") self.__input = root.get("input") self.__output = root.get("output") self.__batch = root.find("batch") if not self.__batch is None: self.__batch = True else: self.__batch = False self.__skip = root.find("skip") if not self.__skip is None: self.__skip = long(self.__skip.get("number")) #Model specific stuff model = root.getchildren()[0] if model.tag == "tree": #Do tree model stuff self._data = model.find('data') segmentations = model.findall('segmentation') self.__maxdepth = int(model.get('maxdepth')) self._modelType = pmmlTreeModel elif model.tag == "test": #Do baseline model stuff pass #input baseline and alternate distributions #produce segmentation self._segments = [] for segmentDeclarations in segmentations: self._makeSegments(segmentDeclarations) #prepare for statistics gathering self._baseDict = {():None} if self._segments: for segment in self._segments: self._baseDict[Producer.tupelize(segment)] = [None] #remember the attributes of the model, they will be included in the PMML self._attrs = {} for key in model.keys(): self._attrs[str(key)] = str(model.get(key))
def parse(configFile=None): consumer = os.path.dirname(os.path.abspath(configFile)) config = ET.parse(configFile) root = config.getroot() structure = root.find('DirectoryStructure') producerstructure = structure.find('Producer') producer = os.path.join(consumer,producerstructure.find('Home').text) try: producerConfig = producerstructure.find('Config').text except: producerConfig = None if producerConfig is not None: if not(os.path.isfile(producerConfig)): producerConfig = None if producerConfig is None: try: modelData = producerstructure.find('ModelData').text except: return 'FAIL : Neither the producer config nor the model data are specified!' # assume model data is relative to producer home #print modelData modelData = os.path.join(producer,modelData) #if not(os.path.isfile(modelData)): # #'Model Data needs to be specified as relative to producer home!' # return 1 postprocessing = os.path.join(consumer,structure.find('Postprocessing').text) reports = os.path.join(consumer,structure.find('Reports').text) # In the particular case of temp, if it doesn't work as # relative directory, try absolute case. temp = structure.find('Temp').text if not os.path.exists(os.path.join(consumer,temp)): temparea = temp else: temparea = os.path.join(consumer,temp) inputModel = root.find('inputModel') pmmlfile = os.path.join(consumer,inputModel.find('fromFile').attrib['name']) outputfile=None output = root.find('output') report = output.find('report') try: outputfile = report.find('toFile').attrib['name'] except: print 'No output file in use!' sys.exit(1) scoresdir = os.path.dirname(os.path.join(consumer,outputfile)) return consumer, producer, modelData, postprocessing, reports, temparea, scoresdir, pmmlfile
def makeConfigs(inFile, outFile, inPMML, outPMML): #open data file inf = uni.UniTable().fromfile(inFile) #start the configuration file root = ET.Element("model") root.set("input", str(inPMML)) root.set("output", str(outPMML)) test = ET.SubElement(root, "test") test.set("field", "volume") test.set("testStatistic", "zValue") test.set("testType", "threshold") test.set("threshold", "1.5") baseline = ET.SubElement(test, "baseline") baseline.set("dist", "gaussian") baseline.set("file", str(inFile)) baseline.set("type", "UniTable") #create the segmentation declarations for the two fields segmentation = ET.SubElement(test, "segmentation") makeSegment(inf, segmentation, "fielddeviceid") makeSegment(inf, segmentation, "locationtimestamp") #output the configurations tree = ET.ElementTree(root) tree.write(outFile)
def makePMML(outFile): #create the pmml root = ET.Element("PMML") root.set("version", "3.1") header = ET.SubElement(root, "Header") header.set("copyright", " ") dataDict = ET.SubElement(root, "DataDictionary") # Automaker is the test field dataField = ET.SubElement(dataDict, "DataField") dataField.set("name", "Automaker") dataField.set("optype", "categorical") dataField.set("dataType", "string") # Date is unused in this example #dataField = ET.SubElement(dataDict, "DataField") #dataField.set("name", "Date") #dataField.set("optype", "categorical") #dataField.set("dataType", "string") # Color is the field that defines seqments dataField = ET.SubElement(dataDict, "DataField") dataField.set("name", "Color") dataField.set("optype", "categorical") dataField.set("dataType", "string") baselineModel = ET.SubElement(root, "BaselineModel") baselineModel.set("functionName", "baseline") # mining 2 fields: segmentation and test field miningSchema = ET.SubElement(baselineModel, "MiningSchema") miningField = ET.SubElement(miningSchema, "MiningField") miningField.set("name", "Automaker") miningField = ET.SubElement(miningSchema, "MiningField") miningField.set("name", "Color") # Date is unused in this example #miningField = ET.SubElement(miningSchema, "MiningField") #miningField.set("name", "Date") #output to the file tree = ET.ElementTree(root) tree.write(outFile)
def inputConfigs(self, file): if self._timer: self._timer.output("Inputting configurations") #input basic configurations tree = ET.parse(file) root = tree.getroot() self.__mode = root.get("mode") self.__input = root.get("input") self._output = root.get("output") self._batch = True self.__debugFile = root.find("debug") if not self.__debugFile is None: self.__debugFile = self.__debugFile.get("file") self._skip = root.find("skip") if not self._skip is None: self._skip = long(self._skip.get("number")) test = root.find("test") #First sub-element is the 'build' element, specifying data. self._build = test[0] #produce segmentation start = 1 self._segments = [] for segmentDeclarations in test[start:]: self._makeSegments(segmentDeclarations) self._baseDict = {} if self._segments: for segment in self._segments: self._baseDict[CommonProducer.tupelize(segment)] = None else: self._baseDict[()] = None #remember the attributes of the test distribution self._attrs = {} for key in test.keys(): self._attrs[str(key)] = str(test.get(key)) #Validation methodology. validation = root.find("validation") if (validation is not None): validmethod = validation.get('method') validthreshold = validation.get('threshold') self.testValidation = CommonProducer.testValidatingFunctions( validmethod, validthreshold) else: self.testValidation = CommonProducer.testValidatingFunctions(None)
def inputConfigs(self, file): if self._timer: self._timer.output("Inputting configurations") #input basic configurations tree = ET.parse(file) root = tree.getroot() self.__mode = root.get("mode") self.__input = root.get("input") self._output = root.get("output") self._batch = True self.__debugFile = root.find("debug") if not self.__debugFile is None: self.__debugFile = self.__debugFile.get("file") self._skip = root.find("skip") if not self._skip is None: self._skip = long(self._skip.get("number")) test = root.find("test") #First sub-element is the 'build' element, specifying data. self._build = test[0] #produce segmentation start = 1 self._segments = [] for segmentDeclarations in test[start:]: self._makeSegments(segmentDeclarations) self._baseDict = {} if self._segments: for segment in self._segments: self._baseDict[CommonProducer.tupelize(segment)] = None else: self._baseDict[()] = None #remember the attributes of the test distribution self._attrs = {} for key in test.keys(): self._attrs[str(key)] = str(test.get(key)) #Validation methodology. validation = root.find("validation") if (validation is not None): validmethod = validation.get('method') validthreshold = validation.get('threshold') self.testValidation = CommonProducer.testValidatingFunctions(validmethod,validthreshold) else: self.testValidation = CommonProducer.testValidatingFunctions(None)
def _to_html_elementtree(self,tblattr=None,method=str): if tblattr is None: tblattr = {'border':'1'} from augustus.external.etree import Element, SubElement, ElementTree out = Element('table',**tblattr) out.text = out.tail = '\n' headings = SubElement(out,'tr') headings.tail = '\n' for key in self.keys(): heading = SubElement(headings,'th') heading.text=method(key) values = [export_string(value) for value in self.values()] cols = [] for col in values: if col is None: col = [col]*len(self) cols.append(col) for rownum in range(len(self)): datarow = SubElement(out,'tr') datarow.tail = '\n' for col in cols: datacell = SubElement(datarow,'td') datacell.text = method(col[rownum]) return ElementTree(out)
def _to_xml_elementtree(self,**kwargs): cfg = { 'tbl_element': 'table', 'row_element': 'row', } cfg.update(kwargs) from augustus.external.etree import Element, SubElement, ElementTree out = Element(cfg['tbl_element']) out.text = out.tail = '\n' values = [export_string(value) for value in self.values()] cols = [] for col in values: if col is None: col = [col]*len(self) cols.append(col) row_element = cfg['row_element'] keys = self.keys() for rownum in range(len(self)): datarow = SubElement(out,row_element) datarow.tail = '\n' for key,col in zip(keys,cols): datacell = SubElement(datarow,key) datacell.text = str(col[rownum]) return ElementTree(out)
def __init__(self, filename): tree = ET.parse(filename) self.__root = tree.getroot()
testStatisticAttrib = 'testStatistic' from augustus.kernel.unitable import * from augustus.external.etree import ElementTree as ET import sys import os import os.path import datetime import logging from math import * if __name__ == "__main__": s = logging.StreamHandler(sys.stdout) log = logging.Logger('root') log.addHandler(s) consumer_config = ET.parse(sys.argv[1]) config_root = consumer_config.getroot() context = config_root.getchildren() for config_element in context: if config_element.tag == 'inputModel': for _m in config_element.getchildren(): if (_m.tag == 'fromFile'): model = _m.attrib['name'] if config_element.tag == 'output': for _m in config_element.getchildren(): if (_m.tag == 'report'): for _r in _m.getchildren(): if (_r.tag == 'toFile'): outputScoresFile = _r.attrib['name'] consumer_output = ET.parse(outputScoresFile) # process pmml for expectations
from augustus.kernel.unitable import * from augustus.external.etree import ElementTree as ET import sys import os import os.path import datetime import logging from math import * if __name__ == "__main__": s = logging.StreamHandler(sys.stdout) log = logging.Logger('root') log.addHandler(s) #Determine files which were used for scoring. consumer_config = ET.parse(sys.argv[1]) config_root = consumer_config.getroot() context = config_root.getchildren() for config_element in context: if config_element.tag == 'inputModel': for _m in config_element.getchildren(): if (_m.tag == 'fromFile'): model = _m.attrib['name'] if config_element.tag == 'output': for _m in config_element.getchildren(): if (_m.tag == 'report'): for _r in _m.getchildren(): if (_r.tag == 'toFile'): outputScoresFile = _r.attrib['name'] consumer_output = ET.parse(outputScoresFile)
def inputConfigs(self, file): """TODO: Very much refactor this. (Mostly done.) Only handle the data and pmml input here and handle the model specific stuff later in makeTests or whatever I rename that to.""" if self.__timer: self.__timer.output("Inputting configurations") #input generic configurations tree = ET.parse(file) root = tree.getroot() self.__mode = root.get("mode") self.__input = root.get("input") self.__output = root.get("output") self.__batch = root.find("batch") if self.__batch is not None: self.__batch = True else: self.__batch = False #Model specific stuff model = root.getchildren()[0] if model.tag == "clustering": #Get ruleset model config values from the xml self._data = model.find('data') segmentations = model.findall('segmentation') self.seed = model.get('seed') if self.seed not in ("explicit", "randomPoints", "random"): raise ValueError('The seed must be one of ["explicit", "randomPoints", "random"], not "%s".' % self.seed) try: self.numberSeed = int(model.get('numberSeed')) except (TypeError, ValueError): if model.get('numberSeed') == "random": def urandom_seed(digits): return sum([a*b for a, b in zip(map(ord, os.urandom(digits)), [2**(8*i) for i in range(digits)])]) self.numberSeed = urandom_seed(10) else: raise ValueError("The random numberSeed must be specified as an integer or 'random' for a seed from os.urandom().") haltConditions = model.find('haltConditions') if haltConditions is None: raise ValueError("The haltConditions must be specified.") self.halt_convergence = False self.halt_smallStep = None self.halt_maxIterations = None for i in haltConditions: if i.tag == 'convergence': self.halt_convergence = True elif i.tag == 'smallStep': try: self.halt_smallStep = float(i.get('epsilon')) except ValueError: raise ValueError("The minimum step size in smallStep must be specified in 'epsilon' as a floating-point number.") elif i.tag == 'maxIterations': try: self.halt_maxIterations = int(i.get('max')) except ValueError: raise ValueError("The maxIterations must be specified in 'max' as an integer.") else: raise ValueError('Unrecognized haltCondition "%s".' % i.tag) if not self.halt_convergence and self.halt_maxIterations is None: raise ValueError("At least one haltCondition must be specified.") self._modelType = pmmlClusteringModel else: raise ValueError("Unable to determine what type of model you want produced. Element %s is not recognized." % (model.tag)) #produce segmentation self._segments = [] for segmentDeclarations in segmentations: self._makeSegments(segmentDeclarations) #prepare for statistics gathering self._baseDict = {():None} if self._segments: for segment in self._segments: self._baseDict[Producer.tupelize(segment)] = None #remember the attributes of the model, they will be included in the PMML self._attrs = {} for key in model.keys(): self._attrs[str(key)] = str(model.get(key))
def makeConfigs(config, dataFile, pmml, scores): #create the configurations root = ET.Element("pmmlDeployment") data = ET.SubElement(root, "inputData") ET.SubElement(data, "readOnce") temp = ET.SubElement(data, "fromFile") temp.set("name", str(dataFile)) temp.set("type", "UniTable") model = ET.SubElement(root, "inputModel") temp = ET.SubElement(model, "fromFile") temp.set("name", str(pmml)) output = ET.SubElement(root, "output") report = ET.SubElement(output, "report") report.set("name", "report") temp = ET.SubElement(report, "toFile") temp.set("name", str(scores)) row = ET.SubElement(report, "outputRow") row.set("name", "event") column = ET.SubElement(row, "score") column.set("name", "score") column = ET.SubElement(row, "alert") column.set("name", "alert") column = ET.SubElement(row, "ancillary") column.set("name", "distribution") column = ET.SubElement(row, "segments") column.set("name", "segments") logging = ET.SubElement(root, "logging") ET.SubElement(logging, "toStandardError") #output the configs tree = ET.ElementTree(root) tree.write(config)
def makePMML(outFile): #create the pmml root = ET.Element("PMML") root.set("version", "3.1") header = ET.SubElement(root, "Header") header.set("copyright", " ") dataDict = ET.SubElement(root, "DataDictionary") dataField = ET.SubElement(dataDict, "DataField") dataField.set("name", "volume") dataField.set("optype", "continuous") dataField.set("dataType", "float") dataField = ET.SubElement(dataDict, "DataField") dataField.set("name", "fielddeviceid") dataField.set("optype", "categorical") dataField.set("dataType", "string") dataField = ET.SubElement(dataDict, "DataField") dataField.set("name", "locationtimestamp") dataField.set("optype", "categorical") dataField.set("dataType", "string") baselineModel = ET.SubElement(root, "BaselineModel") baselineModel.set("functionName", "baseline") miningSchema = ET.SubElement(baselineModel, "MiningSchema") miningField = ET.SubElement(miningSchema, "MiningField") miningField.set("name", "volume") miningField = ET.SubElement(miningSchema, "MiningField") miningField.set("name", "fielddeviceid") miningField = ET.SubElement(miningSchema, "MiningField") miningField.set("name", "locationtimestamp") #output to the file tree = ET.ElementTree(root) tree.write(outFile)
def makeSegment(inf, segmentation, field): segments = ET.SubElement(segmentation, "explicitSegments") segments.set("field", field) for value in set(inf[field]): segment = ET.SubElement(segments, "segment") segment.set("value", str(value))