Esempi in Python per ElementTree.parse, esempi in Python per augustus.external.etree.ElementTree.parse

Esempio n. 1

0

Mostra file

File: parseDeployment.py Progetto: Huskyeder/augustus

def getInstructions(configFile):
  instructions = {'preprocessing':[],'postprocessing':[]}
  config = ET.parse(configFile)
  root = config.getroot()
  structure = root.find('Processing')
  if structure is not None :
    preprocessing = structure.find('Preprocess')
    consumer = structure.find('Consumer')
    if consumer is not None:
      instructions['consumer']=True  
    producer = structure.find('Producer')
    postprocessing = structure.find('Postprocess')
    if postprocessing is not None:
      postSteps = postprocessing.findall('Instruction')
      if len(postSteps)>0:
        instructions['postprocessing']=[p.text for p in postSteps]    
  else:
    preprocessing = None
    instructions['consumer'] = True
  if (preprocessing is not None):
    preSteps = preprocessing.findall('Instruction')
    if len(preSteps)>0:
      instructions['preprocessing']=[p.text for p in preSteps]    

  return instructions

Esempio n. 2

0

Mostra file

File: parseDeployment.py Progetto: soedjais/augustus

def getInstructions(configFile):
    instructions = {'preprocessing': [], 'postprocessing': []}
    config = ET.parse(configFile)
    root = config.getroot()
    structure = root.find('Processing')
    if structure is not None:
        preprocessing = structure.find('Preprocess')
        consumer = structure.find('Consumer')
        if consumer is not None:
            instructions['consumer'] = True
        producer = structure.find('Producer')
        postprocessing = structure.find('Postprocess')
        if postprocessing is not None:
            postSteps = postprocessing.findall('Instruction')
            if len(postSteps) > 0:
                instructions['postprocessing'] = [p.text for p in postSteps]
    else:
        preprocessing = None
        instructions['consumer'] = True
    if (preprocessing is not None):
        preSteps = preprocessing.findall('Instruction')
        if len(preSteps) > 0:
            instructions['preprocessing'] = [p.text for p in preSteps]

    return instructions

Esempio n. 3

0

Mostra file

    def inputConfigs(self, file):
        """TODO: Very much refactor this."""
        if self.__timer:
            self.__timer.output("Inputting configurations")
        #input generic configurations
        tree = ET.parse(file)
        root = tree.getroot()
        self.__mode = root.get("mode")
        self.__input = root.get("input")
        self.__output = root.get("output")
        self.__batch = root.find("batch")
        if self.__batch is not None:
            self.__batch = True
        else:
            self.__batch = False
        self.__debugFile = root.find("debug")
        if self.__debugFile is not None:
            self.__debugFile = self.__debugFile.get("file")
        self.__skip = root.find("skip")
        if self.__skip is not None:
            self.__skip = long(self.__skip.get("number"))
        test = root.find("test")

        #input baseline and alternate distributions
        self._build = test[0]
        validation = root.find("validation")
        if validation is not None:
            #Decide on method of validating tests.
            validmethod = validation.get('method')
            validthreshold = validation.get('threshold')
            self.testValidation = Producer.testValidatingFunctions(
                validmethod, validthreshold)
        else:
            self.testValidation = Producer.testValidatingFunctions(None)
        start = 1
        if len(test) > 1:
            next = test[1]
            start = 2
            #TODO: Naive Bayes shouldn't have alternate.
            if next.tag != "alternate":
                start = 1
            else:
                self._alternate = next

        #produce segmentation
        self._segments = []
        for segmentDeclarations in test[start:]:
            self._makeSegments(segmentDeclarations)

        #prepare for statistics gathering
        self._baseDict = {(): None}
        if self._segments:
            for segment in self._segments:
                self._baseDict[Producer.tupelize(segment)] = None

        #remember the attributes of the test distribution
        self._attrs = {}
        for key in test.keys():
            self._attrs[str(key)] = str(test.get(key))

Esempio n. 4

0

Mostra file

File: Producer.py Progetto: Huskyeder/augustus

 def inputConfigs(self, file):
   """TODO: Very much refactor this."""
   if self.__timer:
     self.__timer.output("Inputting configurations")
   #input generic configurations
   tree = ET.parse(file)
   root = tree.getroot()
   self.__mode = root.get("mode")
   self.__input = root.get("input")
   self.__output = root.get("output")
   self.__batch = root.find("batch")
   if self.__batch is not None:
     self.__batch = True
   else:
     self.__batch = False
   self.__debugFile = root.find("debug")
   if self.__debugFile is not None:
     self.__debugFile = self.__debugFile.get("file")
   self.__skip = root.find("skip")
   if self.__skip is not None:
     self.__skip = long(self.__skip.get("number"))
   test = root.find("test")
   
   #input baseline and alternate distributions
   self._build = test[0]
   validation = root.find("validation")
   if validation is not None:
     #Decide on method of validating tests.
     validmethod = validation.get('method')
     validthreshold = validation.get('threshold')
     self.testValidation = Producer.testValidatingFunctions(validmethod,validthreshold)
   else:
     self.testValidation = Producer.testValidatingFunctions(None)
   start = 1
   if len(test) > 1:
     next = test[1]
     start = 2
     #TODO: Naive Bayes shouldn't have alternate.
     if next.tag != "alternate":
       start = 1
     else:
       self._alternate = next
   
   #produce segmentation
   self._segments = []
   for segmentDeclarations in test[start:]:
     self._makeSegments(segmentDeclarations)
   
   #prepare for statistics gathering
   self._baseDict = {():None}
   if self._segments:
     for segment in self._segments:
       self._baseDict[Producer.tupelize(segment)] = None
   
   #remember the attributes of the test distribution
   self._attrs = {}
   for key in test.keys():
     self._attrs[str(key)] = str(test.get(key))

Esempio n. 5

0

Mostra file

File: Producer.py Progetto: soedjais/augustus

    def inputConfigs(self, file):
        """TODO: Very much refactor this. (Mostly done.)
    Only handle the data and pmml input here and handle the model
    specific stuff later in makeTests or whatever I rename that to."""
        if self.__timer:
            self.__timer.output("Inputting configurations")
        #input generic configurations
        tree = ET.parse(file)
        root = tree.getroot()
        self.__mode = root.get("mode")
        self.__input = root.get("input")
        self.__output = root.get("output")
        self.__batch = root.find("batch")
        if self.__batch is not None:
            self.__batch = True
        else:
            self.__batch = False

        #Model specific stuff
        model = root.getchildren()[0]
        if model.tag == "rules":
            #Get ruleset model config values from the xml
            self._data = model.find('data')
            segmentations = model.findall('segmentation')
            self.__nboxes = int(model.get('nboxes'))

            try:
                self.__peel_alpha = float(model.get('peel_alpha'))
            except TypeError:
                self.__peel_alpha = 0.02

            try:
                self.__paste_alpha = float(model.get('paste_alpha'))
            except TypeError:
                self.__paste_alpha = 0.02

            self.__weight_field = model.get('weight_field')
            self._modelType = pmmlRuleSetModel
        else:
            raise ValueError(
                "Unable to determine what type of model you want produced. Element %s is not recognized."
                % (model.tag))

        #produce segmentation
        self._segments = []
        for segmentDeclarations in segmentations:
            self._makeSegments(segmentDeclarations)

        #prepare for statistics gathering
        self._baseDict = {(): None}
        if self._segments:
            for segment in self._segments:
                self._baseDict[Producer.tupelize(segment)] = None

        #remember the attributes of the model, they will be included in the PMML
        self._attrs = {}
        for key in model.keys():
            self._attrs[str(key)] = str(model.get(key))

Esempio n. 6

0

Mostra file

File: Producer.py Progetto: Huskyeder/augustus

 def inputConfigs(self, file):
   """TODO: Very much refactor this. (Mostly done.)
   Only handle the data and pmml input here and handle the model
   specific stuff later in makeTests or whatever I rename that to."""
   if self.__timer:
     self.__timer.output("Inputting configurations")
   #input generic configurations
   tree = ET.parse(file)
   root = tree.getroot()
   self.__mode = root.get("mode")
   self.__input = root.get("input")
   self.__output = root.get("output")
   self.__batch = root.find("batch")
   if self.__batch is not None:
     self.__batch = True
   else:
     self.__batch = False
   
   #Model specific stuff
   model = root.getchildren()[0]
   if model.tag == "rules":
     #Get ruleset model config values from the xml
     self._data = model.find('data')
     segmentations = model.findall('segmentation')
     self.__nboxes = int(model.get('nboxes'))
     
     try:
       self.__peel_alpha = float(model.get('peel_alpha'))
     except TypeError:
       self.__peel_alpha = 0.02
     
     try:
       self.__paste_alpha = float(model.get('paste_alpha'))
     except TypeError:
       self.__paste_alpha = 0.02
     
     self.__weight_field = model.get('weight_field')
     self._modelType = pmmlRuleSetModel
   else:
     raise ValueError("Unable to determine what type of model you want produced. Element %s is not recognized." % (model.tag))
   
   #produce segmentation
   self._segments = []
   for segmentDeclarations in segmentations:
     self._makeSegments(segmentDeclarations)
   
   #prepare for statistics gathering
   self._baseDict = {():None}
   if self._segments:
     for segment in self._segments:
       self._baseDict[Producer.tupelize(segment)] = None
   
   #remember the attributes of the model, they will be included in the PMML
   self._attrs = {}
   for key in model.keys():
     self._attrs[str(key)] = str(model.get(key))

Esempio n. 7

0

Mostra file

File: parseDeployment.py Progetto: soedjais/augustus

def parse(configFile=None):
    consumer = os.path.dirname(os.path.abspath(configFile))
    config = ET.parse(configFile)
    root = config.getroot()
    structure = root.find('DirectoryStructure')
    producerstructure = structure.find('Producer')
    producer = os.path.join(consumer, producerstructure.find('Home').text)
    try:
        producerConfig = producerstructure.find('Config').text
    except:
        producerConfig = None
    if producerConfig is not None:
        if not (os.path.isfile(producerConfig)):
            producerConfig = None
    if producerConfig is None:
        try:
            modelData = producerstructure.find('ModelData').text
        except:
            return 'FAIL : Neither the producer config nor the model data are specified!'
        # assume model data is relative to producer home
        #print modelData
        modelData = os.path.join(producer, modelData)
        #if not(os.path.isfile(modelData)):
        #  #'Model Data needs to be specified as relative to producer home!'
        #  return 1

    postprocessing = os.path.join(consumer,
                                  structure.find('Postprocessing').text)
    reports = os.path.join(consumer, structure.find('Reports').text)

    # In the particular case of temp, if it doesn't work as
    # relative directory, try absolute case.
    temp = structure.find('Temp').text
    if not os.path.exists(os.path.join(consumer, temp)):
        temparea = temp
    else:
        temparea = os.path.join(consumer, temp)

    inputModel = root.find('inputModel')
    pmmlfile = os.path.join(consumer,
                            inputModel.find('fromFile').attrib['name'])
    outputfile = None
    output = root.find('output')
    report = output.find('report')
    try:
        outputfile = report.find('toFile').attrib['name']
    except:
        print 'No output file in use!'
        sys.exit(1)

    scoresdir = os.path.dirname(os.path.join(consumer, outputfile))
    return consumer, producer, modelData, postprocessing, reports, temparea, scoresdir, pmmlfile

Esempio n. 8

0

Mostra file

File: Producer.py Progetto: Huskyeder/augustus

 def inputConfigs(self, file):
   """TODO: Very much refactor this. (Mostly done.)
   Only handle the data and pmml input here and handle the model
   specific stuff later in makeTests or whatever I rename that too."""
   if self.__timer:
     self.__timer.output("Inputting configurations")
   #input generic configurations
   tree = ET.parse(file)
   root = tree.getroot()
   self.__mode = root.get("mode")
   self.__input = root.get("input")
   self.__output = root.get("output")
   self.__batch = root.find("batch")
   if not self.__batch is None:
     self.__batch = True
   else:
     self.__batch = False
   self.__skip = root.find("skip")
   if not self.__skip is None:
     self.__skip = long(self.__skip.get("number"))
   
   #Model specific stuff
   model = root.getchildren()[0]
   if model.tag == "tree":
     #Do tree model stuff
     self._data = model.find('data')
     segmentations = model.findall('segmentation')
     self.__maxdepth = int(model.get('maxdepth'))
     self._modelType = pmmlTreeModel
   elif model.tag == "test":
     #Do baseline model stuff
     pass
   
     #input baseline and alternate distributions
   
   #produce segmentation
   self._segments = []
   for segmentDeclarations in segmentations:
     self._makeSegments(segmentDeclarations)
   
   #prepare for statistics gathering
   self._baseDict = {():None}
   if self._segments:
     for segment in self._segments:
       self._baseDict[Producer.tupelize(segment)] = [None]
   
   #remember the attributes of the model, they will be included in the PMML
   self._attrs = {}
   for key in model.keys():
     self._attrs[str(key)] = str(model.get(key))

Esempio n. 9

0

Mostra file

File: parseDeployment.py Progetto: Huskyeder/augustus

def parse(configFile=None):
  consumer = os.path.dirname(os.path.abspath(configFile))
  config = ET.parse(configFile)
  root = config.getroot()
  structure = root.find('DirectoryStructure')
  producerstructure = structure.find('Producer')
  producer = os.path.join(consumer,producerstructure.find('Home').text)
  try:
    producerConfig = producerstructure.find('Config').text
  except:
    producerConfig = None
  if producerConfig is not None:
    if not(os.path.isfile(producerConfig)):
      producerConfig = None
  if producerConfig is None:
    try:
      modelData = producerstructure.find('ModelData').text
    except:
      return 'FAIL : Neither the producer config nor the model data are specified!'
    # assume model data is relative to producer home
    #print modelData
    modelData = os.path.join(producer,modelData) 
    #if not(os.path.isfile(modelData)):
    #  #'Model Data needs to be specified as relative to producer home!'
    #  return 1

  postprocessing = os.path.join(consumer,structure.find('Postprocessing').text)
  reports = os.path.join(consumer,structure.find('Reports').text)

  # In the particular case of temp, if it doesn't work as
  # relative directory, try absolute case.
  temp = structure.find('Temp').text
  if not os.path.exists(os.path.join(consumer,temp)):
    temparea = temp
  else:
    temparea = os.path.join(consumer,temp)

  inputModel = root.find('inputModel')
  pmmlfile = os.path.join(consumer,inputModel.find('fromFile').attrib['name'])
  outputfile=None
  output = root.find('output')
  report = output.find('report')
  try:
    outputfile = report.find('toFile').attrib['name']
  except:
    print 'No output file in use!'
    sys.exit(1)

  scoresdir = os.path.dirname(os.path.join(consumer,outputfile))
  return consumer, producer, modelData, postprocessing, reports, temparea, scoresdir, pmmlfile

Esempio n. 10

0

Mostra file

File: Producer.py Progetto: soedjais/augustus

    def inputConfigs(self, file):
        """TODO: Very much refactor this. (Mostly done.)
    Only handle the data and pmml input here and handle the model
    specific stuff later in makeTests or whatever I rename that too."""
        if self.__timer:
            self.__timer.output("Inputting configurations")
        #input generic configurations
        tree = ET.parse(file)
        root = tree.getroot()
        self.__mode = root.get("mode")
        self.__input = root.get("input")
        self.__output = root.get("output")
        self.__batch = root.find("batch")
        if not self.__batch is None:
            self.__batch = True
        else:
            self.__batch = False
        self.__skip = root.find("skip")
        if not self.__skip is None:
            self.__skip = long(self.__skip.get("number"))

        #Model specific stuff
        model = root.getchildren()[0]
        if model.tag == "tree":
            #Do tree model stuff
            self._data = model.find('data')
            segmentations = model.findall('segmentation')
            self.__maxdepth = int(model.get('maxdepth'))
            self._modelType = pmmlTreeModel
        elif model.tag == "test":
            #Do baseline model stuff
            pass

            #input baseline and alternate distributions

        #produce segmentation
        self._segments = []
        for segmentDeclarations in segmentations:
            self._makeSegments(segmentDeclarations)

        #prepare for statistics gathering
        self._baseDict = {(): None}
        if self._segments:
            for segment in self._segments:
                self._baseDict[Producer.tupelize(segment)] = [None]

        #remember the attributes of the model, they will be included in the PMML
        self._attrs = {}
        for key in model.keys():
            self._attrs[str(key)] = str(model.get(key))

Esempio n. 11

0

Mostra file

    def inputConfigs(self, file):
        if self._timer:
            self._timer.output("Inputting configurations")
        #input basic configurations
        tree = ET.parse(file)
        root = tree.getroot()
        self.__mode = root.get("mode")
        self.__input = root.get("input")
        self._output = root.get("output")
        self._batch = True
        self.__debugFile = root.find("debug")
        if not self.__debugFile is None:
            self.__debugFile = self.__debugFile.get("file")
        self._skip = root.find("skip")
        if not self._skip is None:
            self._skip = long(self._skip.get("number"))
        test = root.find("test")

        #First sub-element is the 'build' element, specifying data.
        self._build = test[0]

        #produce segmentation
        start = 1
        self._segments = []
        for segmentDeclarations in test[start:]:
            self._makeSegments(segmentDeclarations)
        self._baseDict = {}
        if self._segments:
            for segment in self._segments:
                self._baseDict[CommonProducer.tupelize(segment)] = None
        else:
            self._baseDict[()] = None
        #remember the attributes of the test distribution
        self._attrs = {}
        for key in test.keys():
            self._attrs[str(key)] = str(test.get(key))

        #Validation methodology.
        validation = root.find("validation")
        if (validation is not None):
            validmethod = validation.get('method')
            validthreshold = validation.get('threshold')
            self.testValidation = CommonProducer.testValidatingFunctions(
                validmethod, validthreshold)
        else:
            self.testValidation = CommonProducer.testValidatingFunctions(None)

Esempio n. 12

0

Mostra file

File: CommonProducer.py Progetto: Huskyeder/augustus

  def inputConfigs(self, file):
    if self._timer:
      self._timer.output("Inputting configurations")
    #input basic configurations
    tree = ET.parse(file)
    root = tree.getroot()    
    self.__mode = root.get("mode")
    self.__input = root.get("input")
    self._output = root.get("output")
    self._batch = True
    self.__debugFile = root.find("debug")
    if not self.__debugFile is None:
      self.__debugFile = self.__debugFile.get("file")
    self._skip = root.find("skip")
    if not self._skip is None:
      self._skip = long(self._skip.get("number"))
    test = root.find("test")

    #First sub-element is the 'build' element, specifying data.
    self._build = test[0]

    #produce segmentation
    start = 1
    self._segments = []
    for segmentDeclarations in test[start:]:
      self._makeSegments(segmentDeclarations)
    self._baseDict = {}
    if self._segments:
      for segment in self._segments:
        self._baseDict[CommonProducer.tupelize(segment)] = None
    else:
      self._baseDict[()] = None
    #remember the attributes of the test distribution
    self._attrs = {}
    for key in test.keys():
      self._attrs[str(key)] = str(test.get(key))

    #Validation methodology.
    validation = root.find("validation")
    if (validation is not None):
      validmethod = validation.get('method')
      validthreshold = validation.get('threshold')
      self.testValidation = CommonProducer.testValidatingFunctions(validmethod,validthreshold)
    else:
      self.testValidation = CommonProducer.testValidatingFunctions(None)

Esempio n. 13

0

Mostra file

 def __init__(self, filename):
     tree = ET.parse(filename)
     self.__root = tree.getroot()

Esempio n. 14

0

Mostra file

File: CommonProducer.py Progetto: Huskyeder/augustus

 def __init__(self, filename):
   tree = ET.parse(filename)
   self.__root = tree.getroot()

Esempio n. 15

0

Mostra file

File: postprocess.py Progetto: soedjais/augustus

testStatisticAttrib = 'testStatistic'

from augustus.kernel.unitable import *
from augustus.external.etree import ElementTree as ET
import sys
import os
import os.path
import datetime
import logging
from math import *

if __name__ == "__main__":
    s = logging.StreamHandler(sys.stdout)
    log = logging.Logger('root')
    log.addHandler(s)
    consumer_config = ET.parse(sys.argv[1])
    config_root = consumer_config.getroot()
    context = config_root.getchildren()
    for config_element in context:
        if config_element.tag == 'inputModel':
            for _m in config_element.getchildren():
                if (_m.tag == 'fromFile'):
                    model = _m.attrib['name']
        if config_element.tag == 'output':
            for _m in config_element.getchildren():
                if (_m.tag == 'report'):
                    for _r in _m.getchildren():
                        if (_r.tag == 'toFile'):
                            outputScoresFile = _r.attrib['name']
                            consumer_output = ET.parse(outputScoresFile)
    # process pmml for expectations

Esempio n. 16

0

Mostra file

File: postprocess.py Progetto: Huskyeder/augustus

from augustus.kernel.unitable import *
from augustus.external.etree import ElementTree as ET
import sys
import os
import os.path
import datetime
import logging
from math import *

if __name__ == "__main__":
  s = logging.StreamHandler(sys.stdout)
  log = logging.Logger('root')
  log.addHandler(s)
  #Determine files which were used for scoring.
  consumer_config = ET.parse(sys.argv[1])
  config_root = consumer_config.getroot()
  context = config_root.getchildren()
  for config_element in context:
    if config_element.tag == 'inputModel':
      for _m in config_element.getchildren():
        if (_m.tag == 'fromFile'):
          model = _m.attrib['name']
    if config_element.tag == 'output':
      for _m in config_element.getchildren():
        if (_m.tag == 'report'):
          for _r in _m.getchildren():
            if (_r.tag == 'toFile'):
              outputScoresFile = _r.attrib['name']
              consumer_output = ET.parse(outputScoresFile)

Esempio n. 17

0

Mostra file

  def inputConfigs(self, file):
    """TODO: Very much refactor this. (Mostly done.)
    Only handle the data and pmml input here and handle the model
    specific stuff later in makeTests or whatever I rename that to."""
    if self.__timer:
      self.__timer.output("Inputting configurations")
    #input generic configurations
    tree = ET.parse(file)
    root = tree.getroot()
    self.__mode = root.get("mode")
    self.__input = root.get("input")
    self.__output = root.get("output")
    self.__batch = root.find("batch")
    if self.__batch is not None:
      self.__batch = True
    else:
      self.__batch = False
    
    #Model specific stuff
    model = root.getchildren()[0]

    if model.tag == "clustering":
      #Get ruleset model config values from the xml
      self._data = model.find('data')
      segmentations = model.findall('segmentation')

      self.seed = model.get('seed')

      if self.seed not in ("explicit", "randomPoints", "random"):
        raise ValueError('The seed must be one of ["explicit", "randomPoints", "random"], not "%s".' % self.seed)

      try:
        self.numberSeed = int(model.get('numberSeed'))
      except (TypeError, ValueError):
        if model.get('numberSeed') == "random":
          def urandom_seed(digits):
            return sum([a*b for a, b in zip(map(ord, os.urandom(digits)), [2**(8*i) for i in range(digits)])])
          self.numberSeed = urandom_seed(10)
        else:
          raise ValueError("The random numberSeed must be specified as an integer or 'random' for a seed from os.urandom().")

      haltConditions = model.find('haltConditions')
      if haltConditions is None:
        raise ValueError("The haltConditions must be specified.")

      self.halt_convergence = False
      self.halt_smallStep = None
      self.halt_maxIterations = None

      for i in haltConditions:
        if i.tag == 'convergence':
          self.halt_convergence = True

        elif i.tag == 'smallStep':
          try:
            self.halt_smallStep = float(i.get('epsilon'))
          except ValueError:
            raise ValueError("The minimum step size in smallStep must be specified in 'epsilon' as a floating-point number.")

        elif i.tag == 'maxIterations':
          try:
            self.halt_maxIterations = int(i.get('max'))
          except ValueError:
            raise ValueError("The maxIterations must be specified in 'max' as an integer.")
          
        else:
          raise ValueError('Unrecognized haltCondition "%s".' % i.tag)

      if not self.halt_convergence and self.halt_maxIterations is None:
        raise ValueError("At least one haltCondition must be specified.")      

      self._modelType = pmmlClusteringModel
    else:
      raise ValueError("Unable to determine what type of model you want produced. Element %s is not recognized." % (model.tag))

    #produce segmentation
    self._segments = []
    for segmentDeclarations in segmentations:
      self._makeSegments(segmentDeclarations)

    #prepare for statistics gathering
    self._baseDict = {():None}
    if self._segments:
      for segment in self._segments:
        self._baseDict[Producer.tupelize(segment)] = None
    
    #remember the attributes of the model, they will be included in the PMML
    self._attrs = {}
    for key in model.keys():
      self._attrs[str(key)] = str(model.get(key))