def makeConfigs(config, dataFile, pmml, scores): #create the configurations root = ET.Element("pmmlDeployment") data = ET.SubElement(root, "inputData") ET.SubElement(data, "readOnce") temp = ET.SubElement(data, "fromFile") temp.set("name", str(dataFile)) temp.set("type", "UniTable") model = ET.SubElement(root, "inputModel") temp = ET.SubElement(model, "fromFile") temp.set("name", str(pmml)) output = ET.SubElement(root, "output") report = ET.SubElement(output, "report") report.set("name", "report") temp = ET.SubElement(report, "toFile") temp.set("name", str(scores)) row = ET.SubElement(report, "outputRow") row.set("name", "event") column = ET.SubElement(row, "score") column.set("name", "score") column = ET.SubElement(row, "alert") column.set("name", "alert") column = ET.SubElement(row, "ancillary") column.set("name", "distribution") column = ET.SubElement(row, "segments") column.set("name", "segments") logging = ET.SubElement(root, "logging") ET.SubElement(logging, "toStandardError") #output the configs tree = ET.ElementTree(root) tree.write(config)
def makePMML(outFile): #create the pmml root = ET.Element("PMML") root.set("version", "3.1") header = ET.SubElement(root, "Header") header.set("copyright", " ") dataDict = ET.SubElement(root, "DataDictionary") dataField = ET.SubElement(dataDict, "DataField") dataField.set("name", "volume") dataField.set("optype", "continuous") dataField.set("dataType", "float") dataField = ET.SubElement(dataDict, "DataField") dataField.set("name", "fielddeviceid") dataField.set("optype", "categorical") dataField.set("dataType", "string") dataField = ET.SubElement(dataDict, "DataField") dataField.set("name", "locationtimestamp") dataField.set("optype", "categorical") dataField.set("dataType", "string") baselineModel = ET.SubElement(root, "BaselineModel") baselineModel.set("functionName", "baseline") miningSchema = ET.SubElement(baselineModel, "MiningSchema") miningField = ET.SubElement(miningSchema, "MiningField") miningField.set("name", "volume") miningField = ET.SubElement(miningSchema, "MiningField") miningField.set("name", "fielddeviceid") miningField = ET.SubElement(miningSchema, "MiningField") miningField.set("name", "locationtimestamp") #output to the file tree = ET.ElementTree(root) tree.write(outFile)
def makeConfigs(inFile, outFile, inPMML, outPMML): #open data file inf = uni.UniTable().fromfile(inFile) #start the configuration file root = ET.Element("model") root.set("input", str(inPMML)) root.set("output", str(outPMML)) test = ET.SubElement(root, "test") test.set("field", "Automaker") test.set("weightField", "Count") test.set("testStatistic", "dDist") test.set("testType", "threshold") test.set("windowSize", "200") # note that for dDist test, threshold is really a 'ceiling' test.set("threshold", "0.15") # use a discrete distribution model for test baseline = ET.SubElement(test, "baseline") baseline.set("dist", "discrete") baseline.set("file", str(inFile)) baseline.set("type", "UniTable") #create the segmentation declarations for the two fields segmentation = ET.SubElement(test, "segmentation") makeSegment(inf, segmentation, "Color") #output the configurations tree = ET.ElementTree(root) tree.write(outFile)
def makePMML(outFile): #create the pmml root = ET.Element("PMML") root.set("version", "3.1") header = ET.SubElement(root, "Header") header.set("copyright", " ") tracking = ET.SubElement(header, "Application") tracking.set("name", "auto weighted non-batch example") tracking.set("version", "v0.0.0") dataDict = ET.SubElement(root, "DataDictionary") # Automaker is the test field dataField = ET.SubElement(dataDict, "DataField") dataField.set("name", "Automaker") dataField.set("optype", "categorical") dataField.set("dataType", "string") # Date is unused in this example #dataField = ET.SubElement(dataDict, "DataField") #dataField.set("name", "Date") #dataField.set("optype", "categorical") #dataField.set("dataType", "string") # Color is the field that defines seqments dataField = ET.SubElement(dataDict, "DataField") dataField.set("name", "Color") dataField.set("optype", "categorical") dataField.set("dataType", "string") # Count is the field used for weighting dataField = ET.SubElement(dataDict, "DataField") dataField.set("name", "Count") dataField.set("optype", "continuous") dataField.set("dataType", "float") baselineModel = ET.SubElement(root, "BaselineModel") baselineModel.set("functionName", "baseline") # mining 3 fields: segmentation, weighting, and test field miningSchema = ET.SubElement(baselineModel, "MiningSchema") miningField = ET.SubElement(miningSchema, "MiningField") miningField.set("name", "Automaker") miningField = ET.SubElement(miningSchema, "MiningField") miningField.set("name", "Color") miningField = ET.SubElement(miningSchema, "MiningField") miningField.set("name", "Count") # Date is unused in this example #miningField = ET.SubElement(miningSchema, "MiningField") #miningField.set("name", "Date") #output to the file tree = ET.ElementTree(root) tree.write(outFile)
def makeConfigs(inFile, outFile, inPMML, outPMML): #open data file inf = uni.UniTable().fromfile(inFile) #start the configuration file root = ET.Element("model") root.set("input", str(inPMML)) root.set("output", str(outPMML)) test = ET.SubElement(root, "test") test.set("field", "volume") test.set("testStatistic", "zValue") test.set("testType", "threshold") test.set("threshold", "1.5") baseline = ET.SubElement(test, "baseline") baseline.set("dist", "gaussian") baseline.set("file", str(inFile)) baseline.set("type", "UniTable") #create the segmentation declarations for the two fields segmentation = ET.SubElement(test, "segmentation") makeSegment(inf, segmentation, "fielddeviceid") makeSegment(inf, segmentation, "locationtimestamp") #output the configurations tree = ET.ElementTree(root) tree.write(outFile)
_next = [] foundsegment = {} if (_sg.tag == modelSegmentTag): score_type = _sg.attrib[testStatisticAttrib] for seg in _sg.getchildren()[1].getchildren(): foundsegment[seg.attrib['field']] = seg.attrib['value'] fs = foundsegment.keys() fs.sort() if (len(fs) > 0): for _o in order[tuple(fs)]: _next.append(foundsegment[_o]) if (len(_next) > 0): tests[tuple(_next)] = _sg # Fill out xml structure for report. output = ET.Element("Report") head = ET.SubElement(output, "head") _stamp = ET.SubElement(head, "ProcessingTime") _stamp.text = datetime.datetime.now().__str__() _model = ET.SubElement(head, "Model") _model.text = model _config = ET.SubElement(head, "Config") _config.text = sys.argv[1] _version = ET.SubElement(head, "Version") _version.text = version events = ET.SubElement(output, "events") for segSchema in segments.keys(): if len(segSchema) == 0: log.info( "INFO: No evident segmentation scheme. Assume unsegmented scoring" )