def getInstructions(configFile): instructions = {'preprocessing':[],'postprocessing':[]} config = ET.parse(configFile) root = config.getroot() structure = root.find('Processing') if structure is not None : preprocessing = structure.find('Preprocess') consumer = structure.find('Consumer') if consumer is not None: instructions['consumer']=True producer = structure.find('Producer') postprocessing = structure.find('Postprocess') if postprocessing is not None: postSteps = postprocessing.findall('Instruction') if len(postSteps)>0: instructions['postprocessing']=[p.text for p in postSteps] else: preprocessing = None instructions['consumer'] = True if (preprocessing is not None): preSteps = preprocessing.findall('Instruction') if len(preSteps)>0: instructions['preprocessing']=[p.text for p in preSteps] return instructions
def getInstructions(configFile): instructions = {'preprocessing': [], 'postprocessing': []} config = ET.parse(configFile) root = config.getroot() structure = root.find('Processing') if structure is not None: preprocessing = structure.find('Preprocess') consumer = structure.find('Consumer') if consumer is not None: instructions['consumer'] = True producer = structure.find('Producer') postprocessing = structure.find('Postprocess') if postprocessing is not None: postSteps = postprocessing.findall('Instruction') if len(postSteps) > 0: instructions['postprocessing'] = [p.text for p in postSteps] else: preprocessing = None instructions['consumer'] = True if (preprocessing is not None): preSteps = preprocessing.findall('Instruction') if len(preSteps) > 0: instructions['preprocessing'] = [p.text for p in preSteps] return instructions
def inputConfigs(self, file): """TODO: Very much refactor this.""" if self.__timer: self.__timer.output("Inputting configurations") #input generic configurations tree = ET.parse(file) root = tree.getroot() self.__mode = root.get("mode") self.__input = root.get("input") self.__output = root.get("output") self.__batch = root.find("batch") if self.__batch is not None: self.__batch = True else: self.__batch = False self.__debugFile = root.find("debug") if self.__debugFile is not None: self.__debugFile = self.__debugFile.get("file") self.__skip = root.find("skip") if self.__skip is not None: self.__skip = long(self.__skip.get("number")) test = root.find("test") #input baseline and alternate distributions self._build = test[0] validation = root.find("validation") if validation is not None: #Decide on method of validating tests. validmethod = validation.get('method') validthreshold = validation.get('threshold') self.testValidation = Producer.testValidatingFunctions( validmethod, validthreshold) else: self.testValidation = Producer.testValidatingFunctions(None) start = 1 if len(test) > 1: next = test[1] start = 2 #TODO: Naive Bayes shouldn't have alternate. if next.tag != "alternate": start = 1 else: self._alternate = next #produce segmentation self._segments = [] for segmentDeclarations in test[start:]: self._makeSegments(segmentDeclarations) #prepare for statistics gathering self._baseDict = {(): None} if self._segments: for segment in self._segments: self._baseDict[Producer.tupelize(segment)] = None #remember the attributes of the test distribution self._attrs = {} for key in test.keys(): self._attrs[str(key)] = str(test.get(key))
def inputConfigs(self, file): """TODO: Very much refactor this.""" if self.__timer: self.__timer.output("Inputting configurations") #input generic configurations tree = ET.parse(file) root = tree.getroot() self.__mode = root.get("mode") self.__input = root.get("input") self.__output = root.get("output") self.__batch = root.find("batch") if self.__batch is not None: self.__batch = True else: self.__batch = False self.__debugFile = root.find("debug") if self.__debugFile is not None: self.__debugFile = self.__debugFile.get("file") self.__skip = root.find("skip") if self.__skip is not None: self.__skip = long(self.__skip.get("number")) test = root.find("test") #input baseline and alternate distributions self._build = test[0] validation = root.find("validation") if validation is not None: #Decide on method of validating tests. validmethod = validation.get('method') validthreshold = validation.get('threshold') self.testValidation = Producer.testValidatingFunctions(validmethod,validthreshold) else: self.testValidation = Producer.testValidatingFunctions(None) start = 1 if len(test) > 1: next = test[1] start = 2 #TODO: Naive Bayes shouldn't have alternate. if next.tag != "alternate": start = 1 else: self._alternate = next #produce segmentation self._segments = [] for segmentDeclarations in test[start:]: self._makeSegments(segmentDeclarations) #prepare for statistics gathering self._baseDict = {():None} if self._segments: for segment in self._segments: self._baseDict[Producer.tupelize(segment)] = None #remember the attributes of the test distribution self._attrs = {} for key in test.keys(): self._attrs[str(key)] = str(test.get(key))
def inputConfigs(self, file): """TODO: Very much refactor this. (Mostly done.) Only handle the data and pmml input here and handle the model specific stuff later in makeTests or whatever I rename that to.""" if self.__timer: self.__timer.output("Inputting configurations") #input generic configurations tree = ET.parse(file) root = tree.getroot() self.__mode = root.get("mode") self.__input = root.get("input") self.__output = root.get("output") self.__batch = root.find("batch") if self.__batch is not None: self.__batch = True else: self.__batch = False #Model specific stuff model = root.getchildren()[0] if model.tag == "rules": #Get ruleset model config values from the xml self._data = model.find('data') segmentations = model.findall('segmentation') self.__nboxes = int(model.get('nboxes')) try: self.__peel_alpha = float(model.get('peel_alpha')) except TypeError: self.__peel_alpha = 0.02 try: self.__paste_alpha = float(model.get('paste_alpha')) except TypeError: self.__paste_alpha = 0.02 self.__weight_field = model.get('weight_field') self._modelType = pmmlRuleSetModel else: raise ValueError( "Unable to determine what type of model you want produced. Element %s is not recognized." % (model.tag)) #produce segmentation self._segments = [] for segmentDeclarations in segmentations: self._makeSegments(segmentDeclarations) #prepare for statistics gathering self._baseDict = {(): None} if self._segments: for segment in self._segments: self._baseDict[Producer.tupelize(segment)] = None #remember the attributes of the model, they will be included in the PMML self._attrs = {} for key in model.keys(): self._attrs[str(key)] = str(model.get(key))
def inputConfigs(self, file): """TODO: Very much refactor this. (Mostly done.) Only handle the data and pmml input here and handle the model specific stuff later in makeTests or whatever I rename that to.""" if self.__timer: self.__timer.output("Inputting configurations") #input generic configurations tree = ET.parse(file) root = tree.getroot() self.__mode = root.get("mode") self.__input = root.get("input") self.__output = root.get("output") self.__batch = root.find("batch") if self.__batch is not None: self.__batch = True else: self.__batch = False #Model specific stuff model = root.getchildren()[0] if model.tag == "rules": #Get ruleset model config values from the xml self._data = model.find('data') segmentations = model.findall('segmentation') self.__nboxes = int(model.get('nboxes')) try: self.__peel_alpha = float(model.get('peel_alpha')) except TypeError: self.__peel_alpha = 0.02 try: self.__paste_alpha = float(model.get('paste_alpha')) except TypeError: self.__paste_alpha = 0.02 self.__weight_field = model.get('weight_field') self._modelType = pmmlRuleSetModel else: raise ValueError("Unable to determine what type of model you want produced. Element %s is not recognized." % (model.tag)) #produce segmentation self._segments = [] for segmentDeclarations in segmentations: self._makeSegments(segmentDeclarations) #prepare for statistics gathering self._baseDict = {():None} if self._segments: for segment in self._segments: self._baseDict[Producer.tupelize(segment)] = None #remember the attributes of the model, they will be included in the PMML self._attrs = {} for key in model.keys(): self._attrs[str(key)] = str(model.get(key))
def parse(configFile=None): consumer = os.path.dirname(os.path.abspath(configFile)) config = ET.parse(configFile) root = config.getroot() structure = root.find('DirectoryStructure') producerstructure = structure.find('Producer') producer = os.path.join(consumer, producerstructure.find('Home').text) try: producerConfig = producerstructure.find('Config').text except: producerConfig = None if producerConfig is not None: if not (os.path.isfile(producerConfig)): producerConfig = None if producerConfig is None: try: modelData = producerstructure.find('ModelData').text except: return 'FAIL : Neither the producer config nor the model data are specified!' # assume model data is relative to producer home #print modelData modelData = os.path.join(producer, modelData) #if not(os.path.isfile(modelData)): # #'Model Data needs to be specified as relative to producer home!' # return 1 postprocessing = os.path.join(consumer, structure.find('Postprocessing').text) reports = os.path.join(consumer, structure.find('Reports').text) # In the particular case of temp, if it doesn't work as # relative directory, try absolute case. temp = structure.find('Temp').text if not os.path.exists(os.path.join(consumer, temp)): temparea = temp else: temparea = os.path.join(consumer, temp) inputModel = root.find('inputModel') pmmlfile = os.path.join(consumer, inputModel.find('fromFile').attrib['name']) outputfile = None output = root.find('output') report = output.find('report') try: outputfile = report.find('toFile').attrib['name'] except: print 'No output file in use!' sys.exit(1) scoresdir = os.path.dirname(os.path.join(consumer, outputfile)) return consumer, producer, modelData, postprocessing, reports, temparea, scoresdir, pmmlfile
def inputConfigs(self, file): """TODO: Very much refactor this. (Mostly done.) Only handle the data and pmml input here and handle the model specific stuff later in makeTests or whatever I rename that too.""" if self.__timer: self.__timer.output("Inputting configurations") #input generic configurations tree = ET.parse(file) root = tree.getroot() self.__mode = root.get("mode") self.__input = root.get("input") self.__output = root.get("output") self.__batch = root.find("batch") if not self.__batch is None: self.__batch = True else: self.__batch = False self.__skip = root.find("skip") if not self.__skip is None: self.__skip = long(self.__skip.get("number")) #Model specific stuff model = root.getchildren()[0] if model.tag == "tree": #Do tree model stuff self._data = model.find('data') segmentations = model.findall('segmentation') self.__maxdepth = int(model.get('maxdepth')) self._modelType = pmmlTreeModel elif model.tag == "test": #Do baseline model stuff pass #input baseline and alternate distributions #produce segmentation self._segments = [] for segmentDeclarations in segmentations: self._makeSegments(segmentDeclarations) #prepare for statistics gathering self._baseDict = {():None} if self._segments: for segment in self._segments: self._baseDict[Producer.tupelize(segment)] = [None] #remember the attributes of the model, they will be included in the PMML self._attrs = {} for key in model.keys(): self._attrs[str(key)] = str(model.get(key))
def parse(configFile=None): consumer = os.path.dirname(os.path.abspath(configFile)) config = ET.parse(configFile) root = config.getroot() structure = root.find('DirectoryStructure') producerstructure = structure.find('Producer') producer = os.path.join(consumer,producerstructure.find('Home').text) try: producerConfig = producerstructure.find('Config').text except: producerConfig = None if producerConfig is not None: if not(os.path.isfile(producerConfig)): producerConfig = None if producerConfig is None: try: modelData = producerstructure.find('ModelData').text except: return 'FAIL : Neither the producer config nor the model data are specified!' # assume model data is relative to producer home #print modelData modelData = os.path.join(producer,modelData) #if not(os.path.isfile(modelData)): # #'Model Data needs to be specified as relative to producer home!' # return 1 postprocessing = os.path.join(consumer,structure.find('Postprocessing').text) reports = os.path.join(consumer,structure.find('Reports').text) # In the particular case of temp, if it doesn't work as # relative directory, try absolute case. temp = structure.find('Temp').text if not os.path.exists(os.path.join(consumer,temp)): temparea = temp else: temparea = os.path.join(consumer,temp) inputModel = root.find('inputModel') pmmlfile = os.path.join(consumer,inputModel.find('fromFile').attrib['name']) outputfile=None output = root.find('output') report = output.find('report') try: outputfile = report.find('toFile').attrib['name'] except: print 'No output file in use!' sys.exit(1) scoresdir = os.path.dirname(os.path.join(consumer,outputfile)) return consumer, producer, modelData, postprocessing, reports, temparea, scoresdir, pmmlfile
def inputConfigs(self, file): """TODO: Very much refactor this. (Mostly done.) Only handle the data and pmml input here and handle the model specific stuff later in makeTests or whatever I rename that too.""" if self.__timer: self.__timer.output("Inputting configurations") #input generic configurations tree = ET.parse(file) root = tree.getroot() self.__mode = root.get("mode") self.__input = root.get("input") self.__output = root.get("output") self.__batch = root.find("batch") if not self.__batch is None: self.__batch = True else: self.__batch = False self.__skip = root.find("skip") if not self.__skip is None: self.__skip = long(self.__skip.get("number")) #Model specific stuff model = root.getchildren()[0] if model.tag == "tree": #Do tree model stuff self._data = model.find('data') segmentations = model.findall('segmentation') self.__maxdepth = int(model.get('maxdepth')) self._modelType = pmmlTreeModel elif model.tag == "test": #Do baseline model stuff pass #input baseline and alternate distributions #produce segmentation self._segments = [] for segmentDeclarations in segmentations: self._makeSegments(segmentDeclarations) #prepare for statistics gathering self._baseDict = {(): None} if self._segments: for segment in self._segments: self._baseDict[Producer.tupelize(segment)] = [None] #remember the attributes of the model, they will be included in the PMML self._attrs = {} for key in model.keys(): self._attrs[str(key)] = str(model.get(key))
def inputConfigs(self, file): if self._timer: self._timer.output("Inputting configurations") #input basic configurations tree = ET.parse(file) root = tree.getroot() self.__mode = root.get("mode") self.__input = root.get("input") self._output = root.get("output") self._batch = True self.__debugFile = root.find("debug") if not self.__debugFile is None: self.__debugFile = self.__debugFile.get("file") self._skip = root.find("skip") if not self._skip is None: self._skip = long(self._skip.get("number")) test = root.find("test") #First sub-element is the 'build' element, specifying data. self._build = test[0] #produce segmentation start = 1 self._segments = [] for segmentDeclarations in test[start:]: self._makeSegments(segmentDeclarations) self._baseDict = {} if self._segments: for segment in self._segments: self._baseDict[CommonProducer.tupelize(segment)] = None else: self._baseDict[()] = None #remember the attributes of the test distribution self._attrs = {} for key in test.keys(): self._attrs[str(key)] = str(test.get(key)) #Validation methodology. validation = root.find("validation") if (validation is not None): validmethod = validation.get('method') validthreshold = validation.get('threshold') self.testValidation = CommonProducer.testValidatingFunctions( validmethod, validthreshold) else: self.testValidation = CommonProducer.testValidatingFunctions(None)
def inputConfigs(self, file): if self._timer: self._timer.output("Inputting configurations") #input basic configurations tree = ET.parse(file) root = tree.getroot() self.__mode = root.get("mode") self.__input = root.get("input") self._output = root.get("output") self._batch = True self.__debugFile = root.find("debug") if not self.__debugFile is None: self.__debugFile = self.__debugFile.get("file") self._skip = root.find("skip") if not self._skip is None: self._skip = long(self._skip.get("number")) test = root.find("test") #First sub-element is the 'build' element, specifying data. self._build = test[0] #produce segmentation start = 1 self._segments = [] for segmentDeclarations in test[start:]: self._makeSegments(segmentDeclarations) self._baseDict = {} if self._segments: for segment in self._segments: self._baseDict[CommonProducer.tupelize(segment)] = None else: self._baseDict[()] = None #remember the attributes of the test distribution self._attrs = {} for key in test.keys(): self._attrs[str(key)] = str(test.get(key)) #Validation methodology. validation = root.find("validation") if (validation is not None): validmethod = validation.get('method') validthreshold = validation.get('threshold') self.testValidation = CommonProducer.testValidatingFunctions(validmethod,validthreshold) else: self.testValidation = CommonProducer.testValidatingFunctions(None)
def __init__(self, filename): tree = ET.parse(filename) self.__root = tree.getroot()
testStatisticAttrib = 'testStatistic' from augustus.kernel.unitable import * from augustus.external.etree import ElementTree as ET import sys import os import os.path import datetime import logging from math import * if __name__ == "__main__": s = logging.StreamHandler(sys.stdout) log = logging.Logger('root') log.addHandler(s) consumer_config = ET.parse(sys.argv[1]) config_root = consumer_config.getroot() context = config_root.getchildren() for config_element in context: if config_element.tag == 'inputModel': for _m in config_element.getchildren(): if (_m.tag == 'fromFile'): model = _m.attrib['name'] if config_element.tag == 'output': for _m in config_element.getchildren(): if (_m.tag == 'report'): for _r in _m.getchildren(): if (_r.tag == 'toFile'): outputScoresFile = _r.attrib['name'] consumer_output = ET.parse(outputScoresFile) # process pmml for expectations
from augustus.kernel.unitable import * from augustus.external.etree import ElementTree as ET import sys import os import os.path import datetime import logging from math import * if __name__ == "__main__": s = logging.StreamHandler(sys.stdout) log = logging.Logger('root') log.addHandler(s) #Determine files which were used for scoring. consumer_config = ET.parse(sys.argv[1]) config_root = consumer_config.getroot() context = config_root.getchildren() for config_element in context: if config_element.tag == 'inputModel': for _m in config_element.getchildren(): if (_m.tag == 'fromFile'): model = _m.attrib['name'] if config_element.tag == 'output': for _m in config_element.getchildren(): if (_m.tag == 'report'): for _r in _m.getchildren(): if (_r.tag == 'toFile'): outputScoresFile = _r.attrib['name'] consumer_output = ET.parse(outputScoresFile)
def inputConfigs(self, file): """TODO: Very much refactor this. (Mostly done.) Only handle the data and pmml input here and handle the model specific stuff later in makeTests or whatever I rename that to.""" if self.__timer: self.__timer.output("Inputting configurations") #input generic configurations tree = ET.parse(file) root = tree.getroot() self.__mode = root.get("mode") self.__input = root.get("input") self.__output = root.get("output") self.__batch = root.find("batch") if self.__batch is not None: self.__batch = True else: self.__batch = False #Model specific stuff model = root.getchildren()[0] if model.tag == "clustering": #Get ruleset model config values from the xml self._data = model.find('data') segmentations = model.findall('segmentation') self.seed = model.get('seed') if self.seed not in ("explicit", "randomPoints", "random"): raise ValueError('The seed must be one of ["explicit", "randomPoints", "random"], not "%s".' % self.seed) try: self.numberSeed = int(model.get('numberSeed')) except (TypeError, ValueError): if model.get('numberSeed') == "random": def urandom_seed(digits): return sum([a*b for a, b in zip(map(ord, os.urandom(digits)), [2**(8*i) for i in range(digits)])]) self.numberSeed = urandom_seed(10) else: raise ValueError("The random numberSeed must be specified as an integer or 'random' for a seed from os.urandom().") haltConditions = model.find('haltConditions') if haltConditions is None: raise ValueError("The haltConditions must be specified.") self.halt_convergence = False self.halt_smallStep = None self.halt_maxIterations = None for i in haltConditions: if i.tag == 'convergence': self.halt_convergence = True elif i.tag == 'smallStep': try: self.halt_smallStep = float(i.get('epsilon')) except ValueError: raise ValueError("The minimum step size in smallStep must be specified in 'epsilon' as a floating-point number.") elif i.tag == 'maxIterations': try: self.halt_maxIterations = int(i.get('max')) except ValueError: raise ValueError("The maxIterations must be specified in 'max' as an integer.") else: raise ValueError('Unrecognized haltCondition "%s".' % i.tag) if not self.halt_convergence and self.halt_maxIterations is None: raise ValueError("At least one haltCondition must be specified.") self._modelType = pmmlClusteringModel else: raise ValueError("Unable to determine what type of model you want produced. Element %s is not recognized." % (model.tag)) #produce segmentation self._segments = [] for segmentDeclarations in segmentations: self._makeSegments(segmentDeclarations) #prepare for statistics gathering self._baseDict = {():None} if self._segments: for segment in self._segments: self._baseDict[Producer.tupelize(segment)] = None #remember the attributes of the model, they will be included in the PMML self._attrs = {} for key in model.keys(): self._attrs[str(key)] = str(model.get(key))