Exemple #1
0
def makeConfigs(inFile, outFile, inPMML, outPMML):
    #open data file
    inf = uni.UniTable().fromfile(inFile)
    #start the configuration file
    root = ET.Element("model")
    root.set("input", str(inPMML))
    root.set("output", str(outPMML))
    test = ET.SubElement(root, "test")
    test.set("field", "Automaker")
    test.set("weightField", "Count")
    test.set("testStatistic", "dDist")
    test.set("testType", "threshold")
    test.set("windowSize", "200")
    # note that for dDist test, threshold is really a 'ceiling'
    test.set("threshold", "0.15")
    # use a discrete distribution model for test
    baseline = ET.SubElement(test, "baseline")
    baseline.set("dist", "discrete")
    baseline.set("file", str(inFile))
    baseline.set("type", "UniTable")
    #create the segmentation declarations for the two fields
    segmentation = ET.SubElement(test, "segmentation")
    makeSegment(inf, segmentation, "Color")
    #output the configurations
    tree = ET.ElementTree(root)
    tree.write(outFile)
def getInstructions(configFile):
  instructions = {'preprocessing':[],'postprocessing':[]}
  config = ET.parse(configFile)
  root = config.getroot()
  structure = root.find('Processing')
  if structure is not None :
    preprocessing = structure.find('Preprocess')
    consumer = structure.find('Consumer')
    if consumer is not None:
      instructions['consumer']=True  
    producer = structure.find('Producer')
    postprocessing = structure.find('Postprocess')
    if postprocessing is not None:
      postSteps = postprocessing.findall('Instruction')
      if len(postSteps)>0:
        instructions['postprocessing']=[p.text for p in postSteps]    
  else:
    preprocessing = None
    instructions['consumer'] = True
  if (preprocessing is not None):
    preSteps = preprocessing.findall('Instruction')
    if len(preSteps)>0:
      instructions['preprocessing']=[p.text for p in preSteps]    

  return instructions
Exemple #3
0
def getInstructions(configFile):
    instructions = {'preprocessing': [], 'postprocessing': []}
    config = ET.parse(configFile)
    root = config.getroot()
    structure = root.find('Processing')
    if structure is not None:
        preprocessing = structure.find('Preprocess')
        consumer = structure.find('Consumer')
        if consumer is not None:
            instructions['consumer'] = True
        producer = structure.find('Producer')
        postprocessing = structure.find('Postprocess')
        if postprocessing is not None:
            postSteps = postprocessing.findall('Instruction')
            if len(postSteps) > 0:
                instructions['postprocessing'] = [p.text for p in postSteps]
    else:
        preprocessing = None
        instructions['consumer'] = True
    if (preprocessing is not None):
        preSteps = preprocessing.findall('Instruction')
        if len(preSteps) > 0:
            instructions['preprocessing'] = [p.text for p in preSteps]

    return instructions
Exemple #4
0
    def inputConfigs(self, file):
        """TODO: Very much refactor this."""
        if self.__timer:
            self.__timer.output("Inputting configurations")
        #input generic configurations
        tree = ET.parse(file)
        root = tree.getroot()
        self.__mode = root.get("mode")
        self.__input = root.get("input")
        self.__output = root.get("output")
        self.__batch = root.find("batch")
        if self.__batch is not None:
            self.__batch = True
        else:
            self.__batch = False
        self.__debugFile = root.find("debug")
        if self.__debugFile is not None:
            self.__debugFile = self.__debugFile.get("file")
        self.__skip = root.find("skip")
        if self.__skip is not None:
            self.__skip = long(self.__skip.get("number"))
        test = root.find("test")

        #input baseline and alternate distributions
        self._build = test[0]
        validation = root.find("validation")
        if validation is not None:
            #Decide on method of validating tests.
            validmethod = validation.get('method')
            validthreshold = validation.get('threshold')
            self.testValidation = Producer.testValidatingFunctions(
                validmethod, validthreshold)
        else:
            self.testValidation = Producer.testValidatingFunctions(None)
        start = 1
        if len(test) > 1:
            next = test[1]
            start = 2
            #TODO: Naive Bayes shouldn't have alternate.
            if next.tag != "alternate":
                start = 1
            else:
                self._alternate = next

        #produce segmentation
        self._segments = []
        for segmentDeclarations in test[start:]:
            self._makeSegments(segmentDeclarations)

        #prepare for statistics gathering
        self._baseDict = {(): None}
        if self._segments:
            for segment in self._segments:
                self._baseDict[Producer.tupelize(segment)] = None

        #remember the attributes of the test distribution
        self._attrs = {}
        for key in test.keys():
            self._attrs[str(key)] = str(test.get(key))
Exemple #5
0
    def inputConfigs(self, file):
        """TODO: Very much refactor this. (Mostly done.)
    Only handle the data and pmml input here and handle the model
    specific stuff later in makeTests or whatever I rename that to."""
        if self.__timer:
            self.__timer.output("Inputting configurations")
        #input generic configurations
        tree = ET.parse(file)
        root = tree.getroot()
        self.__mode = root.get("mode")
        self.__input = root.get("input")
        self.__output = root.get("output")
        self.__batch = root.find("batch")
        if self.__batch is not None:
            self.__batch = True
        else:
            self.__batch = False

        #Model specific stuff
        model = root.getchildren()[0]
        if model.tag == "rules":
            #Get ruleset model config values from the xml
            self._data = model.find('data')
            segmentations = model.findall('segmentation')
            self.__nboxes = int(model.get('nboxes'))

            try:
                self.__peel_alpha = float(model.get('peel_alpha'))
            except TypeError:
                self.__peel_alpha = 0.02

            try:
                self.__paste_alpha = float(model.get('paste_alpha'))
            except TypeError:
                self.__paste_alpha = 0.02

            self.__weight_field = model.get('weight_field')
            self._modelType = pmmlRuleSetModel
        else:
            raise ValueError(
                "Unable to determine what type of model you want produced. Element %s is not recognized."
                % (model.tag))

        #produce segmentation
        self._segments = []
        for segmentDeclarations in segmentations:
            self._makeSegments(segmentDeclarations)

        #prepare for statistics gathering
        self._baseDict = {(): None}
        if self._segments:
            for segment in self._segments:
                self._baseDict[Producer.tupelize(segment)] = None

        #remember the attributes of the model, they will be included in the PMML
        self._attrs = {}
        for key in model.keys():
            self._attrs[str(key)] = str(model.get(key))
Exemple #6
0
 def inputConfigs(self, file):
   """TODO: Very much refactor this."""
   if self.__timer:
     self.__timer.output("Inputting configurations")
   #input generic configurations
   tree = ET.parse(file)
   root = tree.getroot()
   self.__mode = root.get("mode")
   self.__input = root.get("input")
   self.__output = root.get("output")
   self.__batch = root.find("batch")
   if self.__batch is not None:
     self.__batch = True
   else:
     self.__batch = False
   self.__debugFile = root.find("debug")
   if self.__debugFile is not None:
     self.__debugFile = self.__debugFile.get("file")
   self.__skip = root.find("skip")
   if self.__skip is not None:
     self.__skip = long(self.__skip.get("number"))
   test = root.find("test")
   
   #input baseline and alternate distributions
   self._build = test[0]
   validation = root.find("validation")
   if validation is not None:
     #Decide on method of validating tests.
     validmethod = validation.get('method')
     validthreshold = validation.get('threshold')
     self.testValidation = Producer.testValidatingFunctions(validmethod,validthreshold)
   else:
     self.testValidation = Producer.testValidatingFunctions(None)
   start = 1
   if len(test) > 1:
     next = test[1]
     start = 2
     #TODO: Naive Bayes shouldn't have alternate.
     if next.tag != "alternate":
       start = 1
     else:
       self._alternate = next
   
   #produce segmentation
   self._segments = []
   for segmentDeclarations in test[start:]:
     self._makeSegments(segmentDeclarations)
   
   #prepare for statistics gathering
   self._baseDict = {():None}
   if self._segments:
     for segment in self._segments:
       self._baseDict[Producer.tupelize(segment)] = None
   
   #remember the attributes of the test distribution
   self._attrs = {}
   for key in test.keys():
     self._attrs[str(key)] = str(test.get(key))
Exemple #7
0
 def inputConfigs(self, file):
   """TODO: Very much refactor this. (Mostly done.)
   Only handle the data and pmml input here and handle the model
   specific stuff later in makeTests or whatever I rename that to."""
   if self.__timer:
     self.__timer.output("Inputting configurations")
   #input generic configurations
   tree = ET.parse(file)
   root = tree.getroot()
   self.__mode = root.get("mode")
   self.__input = root.get("input")
   self.__output = root.get("output")
   self.__batch = root.find("batch")
   if self.__batch is not None:
     self.__batch = True
   else:
     self.__batch = False
   
   #Model specific stuff
   model = root.getchildren()[0]
   if model.tag == "rules":
     #Get ruleset model config values from the xml
     self._data = model.find('data')
     segmentations = model.findall('segmentation')
     self.__nboxes = int(model.get('nboxes'))
     
     try:
       self.__peel_alpha = float(model.get('peel_alpha'))
     except TypeError:
       self.__peel_alpha = 0.02
     
     try:
       self.__paste_alpha = float(model.get('paste_alpha'))
     except TypeError:
       self.__paste_alpha = 0.02
     
     self.__weight_field = model.get('weight_field')
     self._modelType = pmmlRuleSetModel
   else:
     raise ValueError("Unable to determine what type of model you want produced. Element %s is not recognized." % (model.tag))
   
   #produce segmentation
   self._segments = []
   for segmentDeclarations in segmentations:
     self._makeSegments(segmentDeclarations)
   
   #prepare for statistics gathering
   self._baseDict = {():None}
   if self._segments:
     for segment in self._segments:
       self._baseDict[Producer.tupelize(segment)] = None
   
   #remember the attributes of the model, they will be included in the PMML
   self._attrs = {}
   for key in model.keys():
     self._attrs[str(key)] = str(model.get(key))
Exemple #8
0
def parse(configFile=None):
    consumer = os.path.dirname(os.path.abspath(configFile))
    config = ET.parse(configFile)
    root = config.getroot()
    structure = root.find('DirectoryStructure')
    producerstructure = structure.find('Producer')
    producer = os.path.join(consumer, producerstructure.find('Home').text)
    try:
        producerConfig = producerstructure.find('Config').text
    except:
        producerConfig = None
    if producerConfig is not None:
        if not (os.path.isfile(producerConfig)):
            producerConfig = None
    if producerConfig is None:
        try:
            modelData = producerstructure.find('ModelData').text
        except:
            return 'FAIL : Neither the producer config nor the model data are specified!'
        # assume model data is relative to producer home
        #print modelData
        modelData = os.path.join(producer, modelData)
        #if not(os.path.isfile(modelData)):
        #  #'Model Data needs to be specified as relative to producer home!'
        #  return 1

    postprocessing = os.path.join(consumer,
                                  structure.find('Postprocessing').text)
    reports = os.path.join(consumer, structure.find('Reports').text)

    # In the particular case of temp, if it doesn't work as
    # relative directory, try absolute case.
    temp = structure.find('Temp').text
    if not os.path.exists(os.path.join(consumer, temp)):
        temparea = temp
    else:
        temparea = os.path.join(consumer, temp)

    inputModel = root.find('inputModel')
    pmmlfile = os.path.join(consumer,
                            inputModel.find('fromFile').attrib['name'])
    outputfile = None
    output = root.find('output')
    report = output.find('report')
    try:
        outputfile = report.find('toFile').attrib['name']
    except:
        print 'No output file in use!'
        sys.exit(1)

    scoresdir = os.path.dirname(os.path.join(consumer, outputfile))
    return consumer, producer, modelData, postprocessing, reports, temparea, scoresdir, pmmlfile
Exemple #9
0
    def inputConfigs(self, file):
        """TODO: Very much refactor this. (Mostly done.)
    Only handle the data and pmml input here and handle the model
    specific stuff later in makeTests or whatever I rename that too."""
        if self.__timer:
            self.__timer.output("Inputting configurations")
        #input generic configurations
        tree = ET.parse(file)
        root = tree.getroot()
        self.__mode = root.get("mode")
        self.__input = root.get("input")
        self.__output = root.get("output")
        self.__batch = root.find("batch")
        if not self.__batch is None:
            self.__batch = True
        else:
            self.__batch = False
        self.__skip = root.find("skip")
        if not self.__skip is None:
            self.__skip = long(self.__skip.get("number"))

        #Model specific stuff
        model = root.getchildren()[0]
        if model.tag == "tree":
            #Do tree model stuff
            self._data = model.find('data')
            segmentations = model.findall('segmentation')
            self.__maxdepth = int(model.get('maxdepth'))
            self._modelType = pmmlTreeModel
        elif model.tag == "test":
            #Do baseline model stuff
            pass

            #input baseline and alternate distributions

        #produce segmentation
        self._segments = []
        for segmentDeclarations in segmentations:
            self._makeSegments(segmentDeclarations)

        #prepare for statistics gathering
        self._baseDict = {(): None}
        if self._segments:
            for segment in self._segments:
                self._baseDict[Producer.tupelize(segment)] = [None]

        #remember the attributes of the model, they will be included in the PMML
        self._attrs = {}
        for key in model.keys():
            self._attrs[str(key)] = str(model.get(key))
Exemple #10
0
 def inputConfigs(self, file):
   """TODO: Very much refactor this. (Mostly done.)
   Only handle the data and pmml input here and handle the model
   specific stuff later in makeTests or whatever I rename that too."""
   if self.__timer:
     self.__timer.output("Inputting configurations")
   #input generic configurations
   tree = ET.parse(file)
   root = tree.getroot()
   self.__mode = root.get("mode")
   self.__input = root.get("input")
   self.__output = root.get("output")
   self.__batch = root.find("batch")
   if not self.__batch is None:
     self.__batch = True
   else:
     self.__batch = False
   self.__skip = root.find("skip")
   if not self.__skip is None:
     self.__skip = long(self.__skip.get("number"))
   
   #Model specific stuff
   model = root.getchildren()[0]
   if model.tag == "tree":
     #Do tree model stuff
     self._data = model.find('data')
     segmentations = model.findall('segmentation')
     self.__maxdepth = int(model.get('maxdepth'))
     self._modelType = pmmlTreeModel
   elif model.tag == "test":
     #Do baseline model stuff
     pass
   
     #input baseline and alternate distributions
   
   #produce segmentation
   self._segments = []
   for segmentDeclarations in segmentations:
     self._makeSegments(segmentDeclarations)
   
   #prepare for statistics gathering
   self._baseDict = {():None}
   if self._segments:
     for segment in self._segments:
       self._baseDict[Producer.tupelize(segment)] = [None]
   
   #remember the attributes of the model, they will be included in the PMML
   self._attrs = {}
   for key in model.keys():
     self._attrs[str(key)] = str(model.get(key))
Exemple #11
0
def parse(configFile=None):
  consumer = os.path.dirname(os.path.abspath(configFile))
  config = ET.parse(configFile)
  root = config.getroot()
  structure = root.find('DirectoryStructure')
  producerstructure = structure.find('Producer')
  producer = os.path.join(consumer,producerstructure.find('Home').text)
  try:
    producerConfig = producerstructure.find('Config').text
  except:
    producerConfig = None
  if producerConfig is not None:
    if not(os.path.isfile(producerConfig)):
      producerConfig = None
  if producerConfig is None:
    try:
      modelData = producerstructure.find('ModelData').text
    except:
      return 'FAIL : Neither the producer config nor the model data are specified!'
    # assume model data is relative to producer home
    #print modelData
    modelData = os.path.join(producer,modelData) 
    #if not(os.path.isfile(modelData)):
    #  #'Model Data needs to be specified as relative to producer home!'
    #  return 1

  postprocessing = os.path.join(consumer,structure.find('Postprocessing').text)
  reports = os.path.join(consumer,structure.find('Reports').text)

  # In the particular case of temp, if it doesn't work as
  # relative directory, try absolute case.
  temp = structure.find('Temp').text
  if not os.path.exists(os.path.join(consumer,temp)):
    temparea = temp
  else:
    temparea = os.path.join(consumer,temp)

  inputModel = root.find('inputModel')
  pmmlfile = os.path.join(consumer,inputModel.find('fromFile').attrib['name'])
  outputfile=None
  output = root.find('output')
  report = output.find('report')
  try:
    outputfile = report.find('toFile').attrib['name']
  except:
    print 'No output file in use!'
    sys.exit(1)

  scoresdir = os.path.dirname(os.path.join(consumer,outputfile))
  return consumer, producer, modelData, postprocessing, reports, temparea, scoresdir, pmmlfile
Exemple #12
0
def makeConfigs(inFile, outFile, inPMML, outPMML):
    #open data file
    inf = uni.UniTable().fromfile(inFile)
    #start the configuration file
    root = ET.Element("model")
    root.set("input", str(inPMML))
    root.set("output", str(outPMML))
    test = ET.SubElement(root, "test")
    test.set("field", "volume")
    test.set("testStatistic", "zValue")
    test.set("testType", "threshold")
    test.set("threshold", "1.5")
    baseline = ET.SubElement(test, "baseline")
    baseline.set("dist", "gaussian")
    baseline.set("file", str(inFile))
    baseline.set("type", "UniTable")
    #create the segmentation declarations for the two fields
    segmentation = ET.SubElement(test, "segmentation")
    makeSegment(inf, segmentation, "fielddeviceid")
    makeSegment(inf, segmentation, "locationtimestamp")
    #output the configurations
    tree = ET.ElementTree(root)
    tree.write(outFile)
Exemple #13
0
def makePMML(outFile):
    #create the pmml
    root = ET.Element("PMML")
    root.set("version", "3.1")
    header = ET.SubElement(root, "Header")
    header.set("copyright", " ")
    dataDict = ET.SubElement(root, "DataDictionary")
    # Automaker is the test field
    dataField = ET.SubElement(dataDict, "DataField")
    dataField.set("name", "Automaker")
    dataField.set("optype", "categorical")
    dataField.set("dataType", "string")
    # Date is unused in this example
    #dataField = ET.SubElement(dataDict, "DataField")
    #dataField.set("name", "Date")
    #dataField.set("optype", "categorical")
    #dataField.set("dataType", "string")
    # Color is the field that defines seqments
    dataField = ET.SubElement(dataDict, "DataField")
    dataField.set("name", "Color")
    dataField.set("optype", "categorical")
    dataField.set("dataType", "string")
    baselineModel = ET.SubElement(root, "BaselineModel")
    baselineModel.set("functionName", "baseline")
    # mining 2 fields: segmentation and test field
    miningSchema = ET.SubElement(baselineModel, "MiningSchema")
    miningField = ET.SubElement(miningSchema, "MiningField")
    miningField.set("name", "Automaker")
    miningField = ET.SubElement(miningSchema, "MiningField")
    miningField.set("name", "Color")
    # Date is unused in this example
    #miningField = ET.SubElement(miningSchema, "MiningField")
    #miningField.set("name", "Date")
    #output to the file
    tree = ET.ElementTree(root)
    tree.write(outFile)
Exemple #14
0
    def inputConfigs(self, file):
        if self._timer:
            self._timer.output("Inputting configurations")
        #input basic configurations
        tree = ET.parse(file)
        root = tree.getroot()
        self.__mode = root.get("mode")
        self.__input = root.get("input")
        self._output = root.get("output")
        self._batch = True
        self.__debugFile = root.find("debug")
        if not self.__debugFile is None:
            self.__debugFile = self.__debugFile.get("file")
        self._skip = root.find("skip")
        if not self._skip is None:
            self._skip = long(self._skip.get("number"))
        test = root.find("test")

        #First sub-element is the 'build' element, specifying data.
        self._build = test[0]

        #produce segmentation
        start = 1
        self._segments = []
        for segmentDeclarations in test[start:]:
            self._makeSegments(segmentDeclarations)
        self._baseDict = {}
        if self._segments:
            for segment in self._segments:
                self._baseDict[CommonProducer.tupelize(segment)] = None
        else:
            self._baseDict[()] = None
        #remember the attributes of the test distribution
        self._attrs = {}
        for key in test.keys():
            self._attrs[str(key)] = str(test.get(key))

        #Validation methodology.
        validation = root.find("validation")
        if (validation is not None):
            validmethod = validation.get('method')
            validthreshold = validation.get('threshold')
            self.testValidation = CommonProducer.testValidatingFunctions(
                validmethod, validthreshold)
        else:
            self.testValidation = CommonProducer.testValidatingFunctions(None)
Exemple #15
0
  def inputConfigs(self, file):
    if self._timer:
      self._timer.output("Inputting configurations")
    #input basic configurations
    tree = ET.parse(file)
    root = tree.getroot()    
    self.__mode = root.get("mode")
    self.__input = root.get("input")
    self._output = root.get("output")
    self._batch = True
    self.__debugFile = root.find("debug")
    if not self.__debugFile is None:
      self.__debugFile = self.__debugFile.get("file")
    self._skip = root.find("skip")
    if not self._skip is None:
      self._skip = long(self._skip.get("number"))
    test = root.find("test")

    #First sub-element is the 'build' element, specifying data.
    self._build = test[0]

    #produce segmentation
    start = 1
    self._segments = []
    for segmentDeclarations in test[start:]:
      self._makeSegments(segmentDeclarations)
    self._baseDict = {}
    if self._segments:
      for segment in self._segments:
        self._baseDict[CommonProducer.tupelize(segment)] = None
    else:
      self._baseDict[()] = None
    #remember the attributes of the test distribution
    self._attrs = {}
    for key in test.keys():
      self._attrs[str(key)] = str(test.get(key))

    #Validation methodology.
    validation = root.find("validation")
    if (validation is not None):
      validmethod = validation.get('method')
      validthreshold = validation.get('threshold')
      self.testValidation = CommonProducer.testValidatingFunctions(validmethod,validthreshold)
    else:
      self.testValidation = CommonProducer.testValidatingFunctions(None)
Exemple #16
0
 def _to_html_elementtree(self,tblattr=None,method=str):
   if tblattr is None:
     tblattr = {'border':'1'}
   from augustus.external.etree import Element, SubElement, ElementTree
   out = Element('table',**tblattr)
   out.text = out.tail = '\n'
   headings = SubElement(out,'tr')
   headings.tail = '\n'
   for key in self.keys():
     heading = SubElement(headings,'th')
     heading.text=method(key)
   values = [export_string(value) for value in self.values()]
   cols = []
   for col in values:
     if col is None:
       col = [col]*len(self)
     cols.append(col)
   for rownum in range(len(self)):
     datarow = SubElement(out,'tr')
     datarow.tail = '\n'
     for col in cols:
       datacell = SubElement(datarow,'td')
       datacell.text = method(col[rownum])
   return ElementTree(out)
Exemple #17
0
 def _to_xml_elementtree(self,**kwargs):
   cfg = {
     'tbl_element':  'table',
     'row_element':  'row',
   }
   cfg.update(kwargs)
   from augustus.external.etree import Element, SubElement, ElementTree
   out = Element(cfg['tbl_element'])
   out.text = out.tail = '\n'
   values = [export_string(value) for value in self.values()]
   cols = []
   for col in values:
     if col is None:
       col = [col]*len(self)
     cols.append(col)
   row_element = cfg['row_element']
   keys = self.keys()
   for rownum in range(len(self)):
     datarow = SubElement(out,row_element)
     datarow.tail = '\n'
     for key,col in zip(keys,cols):
       datacell = SubElement(datarow,key)
       datacell.text = str(col[rownum])
   return ElementTree(out)
Exemple #18
0
 def __init__(self, filename):
     tree = ET.parse(filename)
     self.__root = tree.getroot()
Exemple #19
0
 def __init__(self, filename):
   tree = ET.parse(filename)
   self.__root = tree.getroot()
Exemple #20
0
testStatisticAttrib = 'testStatistic'

from augustus.kernel.unitable import *
from augustus.external.etree import ElementTree as ET
import sys
import os
import os.path
import datetime
import logging
from math import *

if __name__ == "__main__":
    s = logging.StreamHandler(sys.stdout)
    log = logging.Logger('root')
    log.addHandler(s)
    consumer_config = ET.parse(sys.argv[1])
    config_root = consumer_config.getroot()
    context = config_root.getchildren()
    for config_element in context:
        if config_element.tag == 'inputModel':
            for _m in config_element.getchildren():
                if (_m.tag == 'fromFile'):
                    model = _m.attrib['name']
        if config_element.tag == 'output':
            for _m in config_element.getchildren():
                if (_m.tag == 'report'):
                    for _r in _m.getchildren():
                        if (_r.tag == 'toFile'):
                            outputScoresFile = _r.attrib['name']
                            consumer_output = ET.parse(outputScoresFile)
    # process pmml for expectations
Exemple #21
0
from augustus.kernel.unitable import *
from augustus.external.etree import ElementTree as ET
import sys
import os
import os.path
import datetime
import logging
from math import *

if __name__ == "__main__":
  s = logging.StreamHandler(sys.stdout)
  log = logging.Logger('root')
  log.addHandler(s)
  #Determine files which were used for scoring.
  consumer_config = ET.parse(sys.argv[1])
  config_root = consumer_config.getroot()
  context = config_root.getchildren()
  for config_element in context:
    if config_element.tag == 'inputModel':
      for _m in config_element.getchildren():
        if (_m.tag == 'fromFile'):
          model = _m.attrib['name']
    if config_element.tag == 'output':
      for _m in config_element.getchildren():
        if (_m.tag == 'report'):
          for _r in _m.getchildren():
            if (_r.tag == 'toFile'):
              outputScoresFile = _r.attrib['name']
              consumer_output = ET.parse(outputScoresFile)
Exemple #22
0
  def inputConfigs(self, file):
    """TODO: Very much refactor this. (Mostly done.)
    Only handle the data and pmml input here and handle the model
    specific stuff later in makeTests or whatever I rename that to."""
    if self.__timer:
      self.__timer.output("Inputting configurations")
    #input generic configurations
    tree = ET.parse(file)
    root = tree.getroot()
    self.__mode = root.get("mode")
    self.__input = root.get("input")
    self.__output = root.get("output")
    self.__batch = root.find("batch")
    if self.__batch is not None:
      self.__batch = True
    else:
      self.__batch = False
    
    #Model specific stuff
    model = root.getchildren()[0]

    if model.tag == "clustering":
      #Get ruleset model config values from the xml
      self._data = model.find('data')
      segmentations = model.findall('segmentation')

      self.seed = model.get('seed')

      if self.seed not in ("explicit", "randomPoints", "random"):
        raise ValueError('The seed must be one of ["explicit", "randomPoints", "random"], not "%s".' % self.seed)

      try:
        self.numberSeed = int(model.get('numberSeed'))
      except (TypeError, ValueError):
        if model.get('numberSeed') == "random":
          def urandom_seed(digits):
            return sum([a*b for a, b in zip(map(ord, os.urandom(digits)), [2**(8*i) for i in range(digits)])])
          self.numberSeed = urandom_seed(10)
        else:
          raise ValueError("The random numberSeed must be specified as an integer or 'random' for a seed from os.urandom().")

      haltConditions = model.find('haltConditions')
      if haltConditions is None:
        raise ValueError("The haltConditions must be specified.")

      self.halt_convergence = False
      self.halt_smallStep = None
      self.halt_maxIterations = None

      for i in haltConditions:
        if i.tag == 'convergence':
          self.halt_convergence = True

        elif i.tag == 'smallStep':
          try:
            self.halt_smallStep = float(i.get('epsilon'))
          except ValueError:
            raise ValueError("The minimum step size in smallStep must be specified in 'epsilon' as a floating-point number.")

        elif i.tag == 'maxIterations':
          try:
            self.halt_maxIterations = int(i.get('max'))
          except ValueError:
            raise ValueError("The maxIterations must be specified in 'max' as an integer.")
          
        else:
          raise ValueError('Unrecognized haltCondition "%s".' % i.tag)

      if not self.halt_convergence and self.halt_maxIterations is None:
        raise ValueError("At least one haltCondition must be specified.")      

      self._modelType = pmmlClusteringModel
    else:
      raise ValueError("Unable to determine what type of model you want produced. Element %s is not recognized." % (model.tag))

    #produce segmentation
    self._segments = []
    for segmentDeclarations in segmentations:
      self._makeSegments(segmentDeclarations)

    #prepare for statistics gathering
    self._baseDict = {():None}
    if self._segments:
      for segment in self._segments:
        self._baseDict[Producer.tupelize(segment)] = None
    
    #remember the attributes of the model, they will be included in the PMML
    self._attrs = {}
    for key in model.keys():
      self._attrs[str(key)] = str(model.get(key))
Exemple #23
0
def makeConfigs(config, dataFile, pmml, scores):
    #create the configurations
    root = ET.Element("pmmlDeployment")
    data = ET.SubElement(root, "inputData")
    ET.SubElement(data, "readOnce")
    temp = ET.SubElement(data, "fromFile")
    temp.set("name", str(dataFile))
    temp.set("type", "UniTable")
    model = ET.SubElement(root, "inputModel")
    temp = ET.SubElement(model, "fromFile")
    temp.set("name", str(pmml))
    output = ET.SubElement(root, "output")
    report = ET.SubElement(output, "report")
    report.set("name", "report")
    temp = ET.SubElement(report, "toFile")
    temp.set("name", str(scores))
    row = ET.SubElement(report, "outputRow")
    row.set("name", "event")
    column = ET.SubElement(row, "score")
    column.set("name", "score")
    column = ET.SubElement(row, "alert")
    column.set("name", "alert")
    column = ET.SubElement(row, "ancillary")
    column.set("name", "distribution")
    column = ET.SubElement(row, "segments")
    column.set("name", "segments")
    logging = ET.SubElement(root, "logging")
    ET.SubElement(logging, "toStandardError")
    #output the configs
    tree = ET.ElementTree(root)
    tree.write(config)
Exemple #24
0
def makePMML(outFile):
    #create the pmml
    root = ET.Element("PMML")
    root.set("version", "3.1")
    header = ET.SubElement(root, "Header")
    header.set("copyright", " ")
    dataDict = ET.SubElement(root, "DataDictionary")
    dataField = ET.SubElement(dataDict, "DataField")
    dataField.set("name", "volume")
    dataField.set("optype", "continuous")
    dataField.set("dataType", "float")
    dataField = ET.SubElement(dataDict, "DataField")
    dataField.set("name", "fielddeviceid")
    dataField.set("optype", "categorical")
    dataField.set("dataType", "string")
    dataField = ET.SubElement(dataDict, "DataField")
    dataField.set("name", "locationtimestamp")
    dataField.set("optype", "categorical")
    dataField.set("dataType", "string")
    baselineModel = ET.SubElement(root, "BaselineModel")
    baselineModel.set("functionName", "baseline")
    miningSchema = ET.SubElement(baselineModel, "MiningSchema")
    miningField = ET.SubElement(miningSchema, "MiningField")
    miningField.set("name", "volume")
    miningField = ET.SubElement(miningSchema, "MiningField")
    miningField.set("name", "fielddeviceid")
    miningField = ET.SubElement(miningSchema, "MiningField")
    miningField.set("name", "locationtimestamp")
    #output to the file
    tree = ET.ElementTree(root)
    tree.write(outFile)
Exemple #25
0
def makeSegment(inf, segmentation, field):
    segments = ET.SubElement(segmentation, "explicitSegments")
    segments.set("field", field)
    for value in set(inf[field]):
        segment = ET.SubElement(segments, "segment")
        segment.set("value", str(value))