def makePMML(outFile): #create the pmml root = ET.Element("PMML") root.set("version", "3.1") header = ET.SubElement(root, "Header") header.set("copyright", " ") dataDict = ET.SubElement(root, "DataDictionary") dataField = ET.SubElement(dataDict, "DataField") dataField.set("name", "volume") dataField.set("optype", "continuous") dataField.set("dataType", "float") dataField = ET.SubElement(dataDict, "DataField") dataField.set("name", "fielddeviceid") dataField.set("optype", "categorical") dataField.set("dataType", "string") dataField = ET.SubElement(dataDict, "DataField") dataField.set("name", "locationtimestamp") dataField.set("optype", "categorical") dataField.set("dataType", "string") baselineModel = ET.SubElement(root, "BaselineModel") baselineModel.set("functionName", "baseline") miningSchema = ET.SubElement(baselineModel, "MiningSchema") miningField = ET.SubElement(miningSchema, "MiningField") miningField.set("name", "volume") miningField = ET.SubElement(miningSchema, "MiningField") miningField.set("name", "fielddeviceid") miningField = ET.SubElement(miningSchema, "MiningField") miningField.set("name", "locationtimestamp") #output to the file tree = ET.ElementTree(root) tree.write(outFile)
def makeConfigs(config, dataFile, pmml, scores): #create the configurations root = ET.Element("pmmlDeployment") data = ET.SubElement(root, "inputData") ET.SubElement(data, "readOnce") temp = ET.SubElement(data, "fromFile") temp.set("name", str(dataFile)) temp.set("type", "UniTable") model = ET.SubElement(root, "inputModel") temp = ET.SubElement(model, "fromFile") temp.set("name", str(pmml)) output = ET.SubElement(root, "output") report = ET.SubElement(output, "report") report.set("name", "report") temp = ET.SubElement(report, "toFile") temp.set("name", str(scores)) row = ET.SubElement(report, "outputRow") row.set("name", "event") column = ET.SubElement(row, "score") column.set("name", "score") column = ET.SubElement(row, "alert") column.set("name", "alert") column = ET.SubElement(row, "ancillary") column.set("name", "distribution") column = ET.SubElement(row, "segments") column.set("name", "segments") logging = ET.SubElement(root, "logging") ET.SubElement(logging, "toStandardError") #output the configs tree = ET.ElementTree(root) tree.write(config)
def makeConfigs(inFile, outFile, inPMML, outPMML): #open data file inf = uni.UniTable().fromfile(inFile) #start the configuration file root = ET.Element("model") root.set("input", str(inPMML)) root.set("output", str(outPMML)) test = ET.SubElement(root, "test") test.set("field", "Automaker") test.set("weightField", "Count") test.set("testStatistic", "dDist") test.set("testType", "threshold") test.set("windowSize", "200") # note that for dDist test, threshold is really a 'ceiling' test.set("threshold", "0.15") # use a discrete distribution model for test baseline = ET.SubElement(test, "baseline") baseline.set("dist", "discrete") baseline.set("file", str(inFile)) baseline.set("type", "UniTable") #create the segmentation declarations for the two fields segmentation = ET.SubElement(test, "segmentation") makeSegment(inf, segmentation, "Color") #output the configurations tree = ET.ElementTree(root) tree.write(outFile)
def makePMML(outFile): #create the pmml root = ET.Element("PMML") root.set("version", "3.1") header = ET.SubElement(root, "Header") header.set("copyright", " ") tracking = ET.SubElement(header, "Application") tracking.set("name", "auto weighted non-batch example") tracking.set("version", "v0.0.0") dataDict = ET.SubElement(root, "DataDictionary") # Automaker is the test field dataField = ET.SubElement(dataDict, "DataField") dataField.set("name", "Automaker") dataField.set("optype", "categorical") dataField.set("dataType", "string") # Date is unused in this example #dataField = ET.SubElement(dataDict, "DataField") #dataField.set("name", "Date") #dataField.set("optype", "categorical") #dataField.set("dataType", "string") # Color is the field that defines seqments dataField = ET.SubElement(dataDict, "DataField") dataField.set("name", "Color") dataField.set("optype", "categorical") dataField.set("dataType", "string") # Count is the field used for weighting dataField = ET.SubElement(dataDict, "DataField") dataField.set("name", "Count") dataField.set("optype", "continuous") dataField.set("dataType", "float") baselineModel = ET.SubElement(root, "BaselineModel") baselineModel.set("functionName", "baseline") # mining 3 fields: segmentation, weighting, and test field miningSchema = ET.SubElement(baselineModel, "MiningSchema") miningField = ET.SubElement(miningSchema, "MiningField") miningField.set("name", "Automaker") miningField = ET.SubElement(miningSchema, "MiningField") miningField.set("name", "Color") miningField = ET.SubElement(miningSchema, "MiningField") miningField.set("name", "Count") # Date is unused in this example #miningField = ET.SubElement(miningSchema, "MiningField") #miningField.set("name", "Date") #output to the file tree = ET.ElementTree(root) tree.write(outFile)
def makeConfigs(inFile, outFile, inPMML, outPMML): #open data file inf = uni.UniTable().fromfile(inFile) #start the configuration file root = ET.Element("model") root.set("input", str(inPMML)) root.set("output", str(outPMML)) test = ET.SubElement(root, "test") test.set("field", "volume") test.set("testStatistic", "zValue") test.set("testType", "threshold") test.set("threshold", "1.5") baseline = ET.SubElement(test, "baseline") baseline.set("dist", "gaussian") baseline.set("file", str(inFile)) baseline.set("type", "UniTable") #create the segmentation declarations for the two fields segmentation = ET.SubElement(test, "segmentation") makeSegment(inf, segmentation, "fielddeviceid") makeSegment(inf, segmentation, "locationtimestamp") #output the configurations tree = ET.ElementTree(root) tree.write(outFile)
def _to_xml_elementtree(self,**kwargs): cfg = { 'tbl_element': 'table', 'row_element': 'row', } cfg.update(kwargs) from augustus.external.etree import Element, SubElement, ElementTree out = Element(cfg['tbl_element']) out.text = out.tail = '\n' values = [export_string(value) for value in self.values()] cols = [] for col in values: if col is None: col = [col]*len(self) cols.append(col) row_element = cfg['row_element'] keys = self.keys() for rownum in range(len(self)): datarow = SubElement(out,row_element) datarow.tail = '\n' for key,col in zip(keys,cols): datacell = SubElement(datarow,key) datacell.text = str(col[rownum]) return ElementTree(out)
def _to_html_elementtree(self,tblattr=None,method=str): if tblattr is None: tblattr = {'border':'1'} from augustus.external.etree import Element, SubElement, ElementTree out = Element('table',**tblattr) out.text = out.tail = '\n' headings = SubElement(out,'tr') headings.tail = '\n' for key in self.keys(): heading = SubElement(headings,'th') heading.text=method(key) values = [export_string(value) for value in self.values()] cols = [] for col in values: if col is None: col = [col]*len(self) cols.append(col) for rownum in range(len(self)): datarow = SubElement(out,'tr') datarow.tail = '\n' for col in cols: datacell = SubElement(datarow,'td') datacell.text = method(col[rownum]) return ElementTree(out)
# in the data was noted. continue event = ET.SubElement(events, 'Event') event.attrib['score'] = _score _stamp = ET.SubElement(event, "EventTime") _stamp.text = datetime.datetime.now().__str__() _segment = ET.SubElement(event, "Segment") i = 0 for f in order[segSchema]: _segment.attrib[f] = s[i] i += 1 try: target = tests[tuple(s)] except: msg = 'Could not find segment for target' log.warning(msg + os.linesep) target = "" break _expected = ET.SubElement(event, "Expected") for d in target.getchildren()[0].getchildren(): if (score_type == 'zTest'): _expected.attrib['mean'] = d.attrib['mean'] _observed = ET.SubElement(event, "Observed") try: _observed.attrib['Count'] = observed[s] except: _observed.attrib['Count'] = 'Unknown' # Tie it all back together. tree = ET.ElementTree(output) tree.write(outputScoresFile + ".Report")