Example #1
0
 def initialize_model(self):
   """This function actually inputs the file containing the PMML model"""
   self.logger.debug("Initializing")
   if self.model_input_info:
     input_type = self.model_input_info[0]
     if input_type.name == "fromFile" or input_type.name == "fromFifo":
       model_source = input_type.attr['name']
     elif input_type == "fromHTTP":
       pass #do stuff
     else:
       raise ConfigurationError("Unable to determine model input source.")
   else:
     raise ConfigurationError("inputModel tag missing from configuration XML.")
   self.logger.debug("Create reader for PMML")
   #Create pmmlReader to read in pmml file
   model_reader = pmmlReader()
   self.logger.debug("Parse PMML")
   #--------------------------
   if self.metadata is not None :
     parseStart = datetime.datetime.now()
   model_reader.parse(model_source, self.logger)
   if self.metadata is not None :
     self.logger.debug("Save PMML Parsing Time")
     self.metadata['Time Parsing PMML'] += datetime.datetime.now() - parseStart
     self.logger.debug("Calculate Resources")
     self.logger.debug("Save stacksize")
     self.metadata['Stacksize after Parsing PMML'] = ptools.stacksize()
     self.logger.debug("Save Resident Memory")
     self.metadata['Resident Memory after Parsing PMML'] = ptools.resident()/1e+9
     self.logger.debug("Save Memory after parsing PMML")
     self.metadata['Memory after Parsing PMML'] = ptools.memory()/1e+9
     self.logger.debug("Save User Time after Parsing PMML")
     if not (platform.system() in ('Windows', 'Win', 'Microsoft')):
         import resource
         resources = resource.getrusage(resource.RUSAGE_SELF)
         self.metadata['User Time after Parsing PMML'] = resources.ru_utime
   otherStart = datetime.datetime.now()
   self.myPMML = model_reader.root
   #----------------------
   self.logger.debug("Initialize Model")
   self.myPMML.initialize(self.get_data, self.needed_cols)
   self.logger.debug("find model")
   #Get the model
   if os.path.exists('pickledModel'):
     # The following section is used for testing idea of pickled models.
     self.model = cPickle.load(file('pickledModel'))
   else:
     self.model = self.myPMML.getChildrenOfType(pmmlModels)[0]
     # The following section is used for testing idea of pickled models.
     if False:
       savedModel = model_reader.root
       self.logger.debug(dir(savedModel))
       #savedModel.initialize(self.get_data, self.needed_cols)
       cPickle.dump(savedModel, file('pickledModel','w'))
   if self.metadata is not None:
     self.metadata['Consumer Initialization'] += datetime.datetime.now() - otherStart
     self.metadata['Stacksize after Consumer Initialization'] = ptools.stacksize()
     self.metadata['Resident Memory after Consumer Initialization'] = ptools.resident()/1e+9
     self.metadata['Memory after Consumer Initialization'] = ptools.memory()/1e+9
Example #2
0
def main(config, outfile=None, port=None):
  """Main function for controling scoring.  Config, if used should be a string containing a filename where a configuration file can be found."""
  #Read in a config file with a bunch of options describing where everything is
  consumer = pmmlConsumer()
  #The following two logging statements are worse than useless because 
  # they will cause 'No handlers could be found for logger "consumer"'
  # to be printed because we set up the logging handler while we're reading
  # the config file which happens at the end of this section.
  #consumer.logger.debug("Create Reader to get Configuration")
  config_reader = Reader(consumer.configure, source = str(config), magicheader = False, autoattr = False)
  #consumer.logger.debug("Read Config File")
  config_reader.read_once()

  #Overwrite the out file from the config file with the command line option if it was present.
  if outfile:
    consumer.output_filename = outfile
  #Create any reader or http server to read in data
  data_input = None
  run_forever = True
  run_daemon = False
  script_input = False
  
  #Check to make sure that we don't try to iterate over None
  if consumer.data_input_info is None:
    raise ConfigurationError("Data input source missing from configuration.")
  
  for item in consumer.data_input_info:
    if item.name == "readOnce":
      run_forever = False
    elif item.name == "batchScoring":
      consumer.batch_scoring = True
    elif item.name == "daemon":
      run_daemon = True
    elif data_input is not None:
      continue #Only process the first way that we are told to get the data.
    elif item.name == "fromFile" or item.name == "fromFifo":
      #No special treatment needed other than UniTable vs XML
      isUni = False
      filetype = None
      if 'type' in item.attr:
        filetype = item.attr['type']
      if filetype == "UniTable":
        isUni = True
      data_input = Reader(consumer.score, source = item.attr['name'], logger = consumer.logger, magicheader = False, unitable = isUni, framing='EOF')
    elif item.name == "fromFixedRecordFile":
      isUni = True
      types = None
      ffnames = []
      ffstarts = []
      ffends = []
      fftypes = []
      start = 0
      for field in item:
        ffnames.append(field.attr['name'])
        ffstarts.append(start)
        ffends.append(start + int(field.attr['length']))
        start += int(field.attr['length'])
      if 'cr' in item.attr:
        ffCR = item.attr['cr']
      else:
        ffCR = None
      data_input = Reader(consumer.score, source = item.attr['name'],
        types = None,
        logger = consumer.logger, magicheader = False, unitable = isUni, ffConvert = ffConfig(ffnames, ffstarts, ffends, ffCR))
    elif item.name == "fromCSVFile":
      #We have a CSV file that needs special treatment to read in correctly
      isUni = True
      header = None
      sep = None
      types = None
      if 'header' in item.attr:
        header = item.attr['header']
      if 'sep' in item.attr:
        sep = item.attr['sep']
      if 'types' in item.attr:
        types = item.attr['types']
      data_input = Reader(consumer.score, source = item.attr['name'], logger = consumer.logger, magicheader = False, unitable = isUni, header = header, sep = sep, types = types, framing = 'EOF')
    elif item.name == "fromStandardInput":
      isUni = False
      filetype = None
      sep = None
      types = None
      framing = 'EOF'
      if 'sep' in item.attr:
        sep = item.attr['sep']
      if 'types' in item.attr:
        types = item.attr['types']
      if 'type' in item.attr:
        filetype = item.attr['type']
      if filetype == "UniTable":
        isUni = True
      if 'framing' in item.attr:
        framing = item.attr['framing']
      consumer.logger.debug('...Test')
      data_input = Reader(consumer.score, source = "-", logger = consumer.logger, magicheader = False, unitable = isUni, sep = sep, types = types, framing = framing)
    elif item.name == "fromHTTP":
      #get the stuff we need to setup the server
      input_url = item.attr['url']
      if port:
        input_port = int(port)
      else:
        input_port = int(item.attr['port'])
      datatype = None
      if 'type' in item.attr:
        datatype = item.attr['type']
      if datatype == "UniTable":
        callback = consumer.score_http_uni
      else:
        callback = consumer.score_http_xml
      
      #Create the server
      data_input = HTTPInterfaceServer(('',input_port), logger = consumer.logger)
      #Add the callback
      data_input.register_callback(input_url, callback)
    elif item.name == "eventBased":
      script_input = True
      data_input = False #Dummy value to get past a check for None later.
    else:
      #Not recognized
      consumer.logger.debug("Element %s is not a recognized child element of inputData, ignoring." % (item.name))
  
  #TODO: ??? What does the following comment refer to?
  #If summary data is being requested, set it up
  
  if data_input is None:
    #We made it through the config information without finding a data input source.
    raise ConfigurationError("Unable to determine data input source.")
  
  consumer.logger.debug("Initialize model")
  #Initialize the model
  #TODO: ??? What does the following comment refer to?
  #this is after the data information is input so that batch scoring may be faster
  consumer.initialize_model()
  
  if script_input:
    #Another script has called main, return the consumer so it can handle how score is called.
    return consumer
  
  consumer.logger.warning("Ready to score")
  #Start scoring data
  if consumer.metadata:
    # By default, for now, enable collection of
    # metadata by data reader and model (consumer general metadata
    # is enabled earlier).
    data_input.enableMetaDataCollection()
    consumer.model.enableMetaDataCollection()
  if consumer.batch_scoring:
    if consumer.metadata:
      consumer.metadata.log.info('Batch Scoring -One Score Per Segment\n')
    consumer.logger.debug("Batch Scoring")
    if isinstance(data_input, Reader):
      data_input.read_once()
      report = consumer.format_results(consumer.model.batchScore())
      if consumer.output_filename:
        consumer.output_report_header(file_handle = consumer.out)
        consumer.out.write(report)
        consumer.output_report_footer(file_handle = consumer.out)
        consumer.out.close()
  elif run_forever:
    if consumer.metadata:
      consumer.metadata.log.info('Run Forever - One Score Per Event')
    consumer.logger.debug("Run Forever")
    if isinstance(data_input, Reader):
      consumer.output_report_header()
      data_input.read_forever()
      consumer.output_report_footer(consumer.out)
    elif isinstance(data_input, HTTPServer):
      data_input.serve_forever()
    else:
      consumer.logger.critical("Reading data failed.")
  else: #just read once
    finished = False
    while not finished:
      if consumer.metadata is not None:
        consumer.metadata.log.info('Run Once - One Score Per Event')
        consumer.metadata.log.info('Start at %s'%datetime.datetime.now().isoformat())
      consumer.logger.debug("Run Once")
      if isinstance(data_input, Reader):
        consumer.output_report_header()
        data_input.read_once()
        consumer.output_report_footer()
      elif isinstance(data_input, HTTPServer):
        data_input.handle_request()
      else:
        consumer.logger.critical("Reading data failed.")
      if consumer.metadata:
        consumer.metadata.log.info('End at %s'%datetime.datetime.now().isoformat())
      if run_daemon:
        signal.signal(signal.SIGALRM, daemonRestartHandler)
        signal.signal(signal.SIGUSR1, daemonRestartHandler)
        signal.pause() # unix only
        finished = False
      else:
        finished = True
  if consumer.metadata:
    consumer.metadata['Stacksize after Scoring'] = ptools.stacksize()
    consumer.metadata['Resident Memory after Scoring'] = ptools.resident()/1e+9 #Gb
    consumer.metadata['Memory after Scoring'] = ptools.memory()/1e+9 #Gb
    consumer.metadata.collected['DataInput'] = data_input.getMetaData()
    #consumer.metadata.collected['Scoring'] = consumer.metadata.getMetaData()
    consumer.metadata.collected['Scoring'] = consumer.getMetaData()
    consumer.metadata.collected[''] = consumer.model.getMetaData()
    consumer.metadata.report()