Exemple #1
0
def makePMML(outFile):
    #create the pmml
    root = ET.Element("PMML")
    root.set("version", "3.1")
    header = ET.SubElement(root, "Header")
    header.set("copyright", " ")
    dataDict = ET.SubElement(root, "DataDictionary")
    dataField = ET.SubElement(dataDict, "DataField")
    dataField.set("name", "volume")
    dataField.set("optype", "continuous")
    dataField.set("dataType", "float")
    dataField = ET.SubElement(dataDict, "DataField")
    dataField.set("name", "fielddeviceid")
    dataField.set("optype", "categorical")
    dataField.set("dataType", "string")
    dataField = ET.SubElement(dataDict, "DataField")
    dataField.set("name", "locationtimestamp")
    dataField.set("optype", "categorical")
    dataField.set("dataType", "string")
    baselineModel = ET.SubElement(root, "BaselineModel")
    baselineModel.set("functionName", "baseline")
    miningSchema = ET.SubElement(baselineModel, "MiningSchema")
    miningField = ET.SubElement(miningSchema, "MiningField")
    miningField.set("name", "volume")
    miningField = ET.SubElement(miningSchema, "MiningField")
    miningField.set("name", "fielddeviceid")
    miningField = ET.SubElement(miningSchema, "MiningField")
    miningField.set("name", "locationtimestamp")
    #output to the file
    tree = ET.ElementTree(root)
    tree.write(outFile)
Exemple #2
0
def makeConfigs(inFile, outFile, inPMML, outPMML):
    #open data file
    inf = uni.UniTable().fromfile(inFile)
    #start the configuration file
    root = ET.Element("model")
    root.set("input", str(inPMML))
    root.set("output", str(outPMML))
    test = ET.SubElement(root, "test")
    test.set("field", "Automaker")
    test.set("weightField", "Count")
    test.set("testStatistic", "dDist")
    test.set("testType", "threshold")
    test.set("windowSize", "200")
    # note that for dDist test, threshold is really a 'ceiling'
    test.set("threshold", "0.15")
    # use a discrete distribution model for test
    baseline = ET.SubElement(test, "baseline")
    baseline.set("dist", "discrete")
    baseline.set("file", str(inFile))
    baseline.set("type", "UniTable")
    #create the segmentation declarations for the two fields
    segmentation = ET.SubElement(test, "segmentation")
    makeSegment(inf, segmentation, "Color")
    #output the configurations
    tree = ET.ElementTree(root)
    tree.write(outFile)
Exemple #3
0
def makePMML(outFile):
  #create the pmml
  root = ET.Element("PMML")
  root.set("version", "3.1")
  header = ET.SubElement(root, "Header")
  header.set("copyright", " ")
  dataDict = ET.SubElement(root, "DataDictionary")
  # Automaker is the test field
  dataField = ET.SubElement(dataDict, "DataField")
  dataField.set("name", "Automaker")
  dataField.set("optype", "categorical")
  dataField.set("dataType", "string")
  # Date is unused in this example
  #dataField = ET.SubElement(dataDict, "DataField")
  #dataField.set("name", "Date")
  #dataField.set("optype", "categorical")
  #dataField.set("dataType", "string")
  # Color is the field that defines seqments
  dataField = ET.SubElement(dataDict, "DataField")
  dataField.set("name", "Color")
  dataField.set("optype", "categorical")
  dataField.set("dataType", "string")
  # Count is the field used for weighting
  dataField = ET.SubElement(dataDict, "DataField")
  dataField.set("name", "Count")
  dataField.set("optype", "continuous")
  dataField.set("dataType", "float")
  baselineModel = ET.SubElement(root, "BaselineModel")
  baselineModel.set("functionName", "baseline")
  # mining 3 fields: segmentation, weighting, and test field
  miningSchema = ET.SubElement(baselineModel, "MiningSchema")
  miningField = ET.SubElement(miningSchema, "MiningField")
  miningField.set("name", "Automaker")
  miningField = ET.SubElement(miningSchema, "MiningField")
  miningField.set("name", "Color")
  miningField = ET.SubElement(miningSchema, "MiningField")
  miningField.set("name", "Count")
  # Date is unused in this example
  #miningField = ET.SubElement(miningSchema, "MiningField")
  #miningField.set("name", "Date")
  #output to the file
  tree = ET.ElementTree(root)
  tree.write(outFile)
Exemple #4
0
def makeConfigs(inFile, outFile, inPMML, outPMML):
    #open data file
    inf = uni.UniTable().fromfile(inFile)
    #start the configuration file
    root = ET.Element("model")
    root.set("input", str(inPMML))
    root.set("output", str(outPMML))
    test = ET.SubElement(root, "test")
    test.set("field", "volume")
    test.set("testStatistic", "zValue")
    test.set("testType", "threshold")
    test.set("threshold", "1.5")
    baseline = ET.SubElement(test, "baseline")
    baseline.set("dist", "gaussian")
    baseline.set("file", str(inFile))
    baseline.set("type", "UniTable")
    #create the segmentation declarations for the two fields
    segmentation = ET.SubElement(test, "segmentation")
    makeSegment(inf, segmentation, "fielddeviceid")
    makeSegment(inf, segmentation, "locationtimestamp")
    #output the configurations
    tree = ET.ElementTree(root)
    tree.write(outFile)
Exemple #5
0
                foundsegment = {}
                if (_sg.tag == modelSegmentTag):
                    score_type = _sg.attrib[testStatisticAttrib]
                    for seg in _sg.getchildren()[1].getchildren():
                        foundsegment[seg.attrib['field']] = seg.attrib['value']
                    fs = foundsegment.keys()
                    fs.sort()
                    if (len(fs) > 0):
                        for _o in order[tuple(fs)]:
                            _next.append(foundsegment[_o])
                    if (len(_next) > 0):
                        tests[tuple(_next)] = _sg

    # Fill out xml structure for report.
    output = ET.Element("Report")
    head = ET.SubElement(output, "head")
    _stamp = ET.SubElement(head, "ProcessingTime")
    _stamp.text = datetime.datetime.now().__str__()
    _model = ET.SubElement(head, "Model")
    _model.text = model
    _config = ET.SubElement(head, "Config")
    _config.text = sys.argv[1]
    _version = ET.SubElement(head, "Version")
    _version.text = version
    events = ET.SubElement(output, "events")
    for segSchema in segments.keys():
        if len(segSchema) == 0:
            log.info(
                "INFO: No evident segmentation scheme. Assume unsegmented scoring"
            )
Exemple #6
0
def makeConfigs(config, dataFile, pmml, scores):
    #create the configurations
    root = ET.Element("pmmlDeployment")
    data = ET.SubElement(root, "inputData")
    ET.SubElement(data, "readOnce")
    temp = ET.SubElement(data, "fromFile")
    temp.set("name", str(dataFile))
    temp.set("type", "UniTable")
    model = ET.SubElement(root, "inputModel")
    temp = ET.SubElement(model, "fromFile")
    temp.set("name", str(pmml))
    output = ET.SubElement(root, "output")
    report = ET.SubElement(output, "report")
    report.set("name", "report")
    temp = ET.SubElement(report, "toFile")
    temp.set("name", str(scores))
    row = ET.SubElement(report, "outputRow")
    row.set("name", "event")
    column = ET.SubElement(row, "score")
    column.set("name", "score")
    column = ET.SubElement(row, "alert")
    column.set("name", "alert")
    column = ET.SubElement(row, "ancillary")
    column.set("name", "distribution")
    column = ET.SubElement(row, "segments")
    column.set("name", "segments")
    logging = ET.SubElement(root, "logging")
    ET.SubElement(logging, "toStandardError")
    #output the configs
    tree = ET.ElementTree(root)
    tree.write(config)
Exemple #7
0
def makeSegment(inf, segmentation, field):
    segments = ET.SubElement(segmentation, "explicitSegments")
    segments.set("field", field)
    for value in set(inf[field]):
        segment = ET.SubElement(segments, "segment")
        segment.set("value", str(value))
Exemple #8
0
                if (_sg.tag == modelSegmentTag):
                    score_type = _sg.attrib[testStatisticAttrib]
                    for seg in _sg.getchildren()[1].getchildren():
                        foundsegment[seg.attrib['field']] = seg.attrib['value']
                    fs = foundsegment.keys()
                    fs.sort()
                    if (len(fs) > 0):
                        for _o in order[tuple(fs)]:
                            _next.append(foundsegment[_o])
                    if (len(_next) > 0):
                        tests[tuple(_next)] = _sg

    # Create output XML structure

    output = ET.Element("Report")
    head = ET.SubElement(output, "head")
    _stamp = ET.SubElement(head, "ProcessingTime")
    _stamp.text = datetime.datetime.now().__str__()
    _model = ET.SubElement(head, "Model")
    _model.text = model
    _config = ET.SubElement(head, "Config")
    _config.text = sys.argv[1]
    _version = ET.SubElement(head, "Version")
    _version.text = version

    events = ET.SubElement(output, "AlertingSegments")
    for segSchema in segments.keys():
        for segcounts in segments[segSchema]:
            s = tuple(segcounts)
            try:
                _score = scores[s]