Beispiel #1
0
def main(config=None):
    """Main function for controling scoring.  Config, if used should be a string containing a filename where a configuration file can be found."""
    logging.basicConfig(level=logging.DEBUG)

    from optparse import OptionParser, make_option
    #define the options
    usage = "usage: %prog [options]"
    version = "%prog 0.3.3"
    options = [
        make_option("-c",
                    "--config",
                    metavar="config",
                    default="config.xml",
                    help="The configuration file name")
    ]
    parser = OptionParser(usage=usage, version=version, option_list=options)

    #parse the options
    if not config:
        (options, arguments) = parser.parse_args()
        config = options.config

    #Take in a bunch of options describing where everything is
    consumer = pmmlConsumer()
    consumer.logger.debug("Create Reader to get Configuration")
    config_reader = Reader(consumer.configure,
                           source=str(config),
                           magicheader=False,
                           autoattr=False)
    consumer.logger.debug("Read Config File")
    config_reader.read_once()

    #Create any reader or http server to read in data
    data_input = None
    run_forever = True

    #Check to make sure that we don't try to iterate over None
    if consumer.data_input_info is None:
        raise ConfigurationError(
            "Data input source missing from configuration.")

    for item in consumer.data_input_info:
        if item.name == "readOnce":
            run_forever = False
        elif item.name == "batchScoring":
            consumer.batch_scoring = True
        elif data_input is not None:
            continue  #Only process the first way that we are told to get the data.
        elif item.name == "fromFile" or item.name == "fromFifo":
            #No special treatment needed other than UniTable vs XML
            isUni = False
            filetype = None
            if 'type' in item.attr:
                filetype = item.attr['type']
            if filetype == "UniTable":
                isUni = True
            data_input = Reader(consumer.score,
                                source=item.attr['name'],
                                logger=consumer.logger,
                                magicheader=False,
                                unitable=isUni)
        elif item.name == "fromFixedRecordFile":
            isUni = True
            types = None
            ffnames = []
            ffstarts = []
            ffends = []
            fftypes = []
            start = 0
            for field in item:
                ffnames.append(field.attr['name'])
                ffstarts.append(start)
                ffends.append(start + int(field.attr['length']))
                start += int(field.attr['length'])
            if 'cr' in item.attr:
                ffCR = item.attr['cr']
            else:
                ffCR = None
            data_input = Reader(consumer.score,
                                source=item.attr['name'],
                                types=None,
                                logger=consumer.logger,
                                magicheader=False,
                                unitable=isUni,
                                ffConvert=ffConfig(ffnames, ffstarts, ffends,
                                                   ffCR))
        elif item.name == "fromCSVFile":
            #We have a CSV file that needs special treatment to read in correctly
            isUni = True
            header = None
            sep = None
            types = None
            if 'header' in item.attr:
                header = item.attr['header']
            if 'sep' in item.attr:
                sep = item.attr['sep']
            if 'types' in item.attr:
                types = item.attr['types']
            data_input = Reader(consumer.score,
                                source=item.attr['name'],
                                logger=consumer.logger,
                                magicheader=False,
                                unitable=isUni,
                                header=header,
                                sep=sep,
                                types=types)
        elif item.name == "fromStandardInput":
            isUni = False
            filetype = None
            if 'type' in item.attr:
                filetype = item.attr['type']
            if filetype == "UniTable":
                isUni = True
            data_input = Reader(consumer.score,
                                source="-",
                                logger=consumer.logger,
                                magicheader=False,
                                unitable=isUni)
        elif item.name == "fromHTTP":
            #get the stuff we need to setup the server
            input_url = item.attr['url']
            input_port = int(item.attr['port'])
            datatype = None
            if 'type' in item.attr:
                datatype = item.attr['type']
            if datatype == "UniTable":
                callback = consumer.score_http_uni
            else:
                callback = consumer.score_http_xml

            #Create the server
            data_input = HTTPInterfaceServer(('', input_port),
                                             logger=consumer.logger)
            #Add the callback
            data_input.register_callback(input_url, callback)
        else:
            #Not recognized
            consumer.logger.warning(
                "Element %s is not a recognized child element of inputData, ignoring."
                % (item.name))

    if data_input is None:
        raise ConfigurationError("Unable to determine data input source.")
    consumer.logger.debug("Initialize model")
    #Initalize the model
    #this is after the data information is input so that batch scoring may be faster
    consumer.initalize_model()
    consumer.logger.warning("Ready to score")
    #Start scoring data
    if consumer.batch_scoring:
        consumer.logger.debug("Batch Scoring")
        if isinstance(data_input, Reader):
            data_input.read_once()
            report = consumer.format_results(consumer.model.batchScore())
            if consumer.output_filename:
                out = open(consumer.output_filename, 'w')
                consumer.output_report_header(file_handle=out)
                out.write(report)
                consumer.output_report_footer(file_handle=out)
                out.close()
    elif run_forever:
        consumer.logger.debug("Run Forever")
        if isinstance(data_input, Reader):
            consumer.output_report_header()
            data_input.read_forever()
            consumer.output_report_footer()
        elif isinstance(data_input, HTTPServer):
            data_input.serve_forever()
        else:
            print "Reading data failed."
    else:  #just read once
        consumer.logger.debug("Run Once")
        if isinstance(data_input, Reader):
            consumer.output_report_header()
            data_input.read_once()
            consumer.output_report_footer()
        elif isinstance(data_input, HTTPServer):
            data_input.handle_request()
        else:
            print "Reading data failed."
Beispiel #2
0
def main(config=None):
  """Main function for controling scoring.  Config, if used should be a string containing a filename where a configuration file can be found."""
  logging.basicConfig(level=logging.DEBUG)
  
  from optparse import OptionParser, make_option
  #define the options
  usage = "usage: %prog [options]"
  version = "%prog 0.3.3"
  options = [
    make_option("-c","--config",metavar="config",default="config.xml",help="The configuration file name")]
  parser = OptionParser(usage=usage, version=version, option_list=options)
  
  #parse the options
  if not config:
    (options, arguments) = parser.parse_args()
    config = options.config
  
  #Take in a bunch of options describing where everything is
  consumer = pmmlConsumer()
  consumer.logger.debug("Create Reader to get Configuration")
  config_reader = Reader(consumer.configure, source = str(config), magicheader = False, autoattr = False)
  consumer.logger.debug("Read Config File")
  config_reader.read_once()
  
  #Create any reader or http server to read in data
  data_input = None
  run_forever = True
  
  #Check to make sure that we don't try to iterate over None
  if consumer.data_input_info is None:
    raise ConfigurationError("Data input source missing from configuration.")
  
  for item in consumer.data_input_info:
    if item.name == "readOnce":
      run_forever = False
    elif item.name == "batchScoring":
      consumer.batch_scoring = True
    elif data_input is not None:
      continue #Only process the first way that we are told to get the data.
    elif item.name == "fromFile" or item.name == "fromFifo":
      #No special treatment needed other than UniTable vs XML
      isUni = False
      filetype = None
      if 'type' in item.attr:
        filetype = item.attr['type']
      if filetype == "UniTable":
        isUni = True
      data_input = Reader(consumer.score, source = item.attr['name'], logger = consumer.logger, magicheader = False, unitable = isUni)
    elif item.name == "fromFixedRecordFile":
      isUni = True
      types = None
      ffnames = []
      ffstarts = []
      ffends = []
      fftypes = []
      start = 0
      for field in item:
        ffnames.append(field.attr['name'])
        ffstarts.append(start)
        ffends.append(start + int(field.attr['length']))
        start += int(field.attr['length'])
      if 'cr' in item.attr:
        ffCR = item.attr['cr']
      else:
        ffCR = None
      data_input = Reader(consumer.score, source = item.attr['name'],
        types = None,
        logger = consumer.logger, magicheader = False, unitable = isUni, ffConvert = ffConfig(ffnames, ffstarts, ffends, ffCR))
    elif item.name == "fromCSVFile":
      #We have a CSV file that needs special treatment to read in correctly
      isUni = True
      header = None
      sep = None
      types = None
      if 'header' in item.attr:
        header = item.attr['header']
      if 'sep' in item.attr:
        sep = item.attr['sep']
      if 'types' in item.attr:
        types = item.attr['types']
      data_input = Reader(consumer.score, source = item.attr['name'], logger = consumer.logger, magicheader = False, unitable = isUni, header = header, sep = sep, types = types)
    elif item.name == "fromStandardInput":
      isUni = False
      filetype = None
      if 'type' in item.attr:
        filetype = item.attr['type']
      if filetype == "UniTable":
        isUni = True
      data_input = Reader(consumer.score, source = "-", logger = consumer.logger, magicheader = False, unitable = isUni)
    elif item.name == "fromHTTP":
      #get the stuff we need to setup the server
      input_url = item.attr['url']
      input_port = int(item.attr['port'])
      datatype = None
      if 'type' in item.attr:
        datatype = item.attr['type']
      if datatype == "UniTable":
        callback = consumer.score_http_uni
      else:
        callback = consumer.score_http_xml
      
      #Create the server
      data_input = HTTPInterfaceServer(('',input_port), logger = consumer.logger)
      #Add the callback
      data_input.register_callback(input_url, callback)
    else:
      #Not recognized
      consumer.logger.warning("Element %s is not a recognized child element of inputData, ignoring." % (item.name))
    
  if data_input is None:
    raise ConfigurationError("Unable to determine data input source.")
  consumer.logger.debug("Initialize model")
  #Initalize the model
  #this is after the data information is input so that batch scoring may be faster
  consumer.initalize_model()  
  consumer.logger.warning("Ready to score")
  #Start scoring data
  if consumer.batch_scoring:
    consumer.logger.debug("Batch Scoring")
    if isinstance(data_input, Reader):
      data_input.read_once()
      report = consumer.format_results(consumer.model.batchScore())
      if consumer.output_filename:
        out = open(consumer.output_filename, 'w')
        consumer.output_report_header(file_handle = out)
        out.write(report)
        consumer.output_report_footer(file_handle = out)
        out.close()
  elif run_forever:
    consumer.logger.debug("Run Forever")
    if isinstance(data_input, Reader):
      consumer.output_report_header()
      data_input.read_forever()
      consumer.output_report_footer()
    elif isinstance(data_input, HTTPServer):
      data_input.serve_forever()
    else:
      print "Reading data failed."
  else: #just read once
    consumer.logger.debug("Run Once")
    if isinstance(data_input, Reader):
      consumer.output_report_header()
      data_input.read_once()
      consumer.output_report_footer()
    elif isinstance(data_input, HTTPServer):
      data_input.handle_request()
    else:
      print "Reading data failed."
Beispiel #3
0
def main(config, outfile=None, port=None):
  """Main function for controling scoring.  Config, if used should be a string containing a filename where a configuration file can be found."""
  #Read in a config file with a bunch of options describing where everything is
  consumer = pmmlConsumer()
  #The following two logging statements are worse than useless because 
  # they will cause 'No handlers could be found for logger "consumer"'
  # to be printed because we set up the logging handler while we're reading
  # the config file which happens at the end of this section.
  #consumer.logger.debug("Create Reader to get Configuration")
  config_reader = Reader(consumer.configure, source = str(config), magicheader = False, autoattr = False)
  #consumer.logger.debug("Read Config File")
  config_reader.read_once()

  #Overwrite the out file from the config file with the command line option if it was present.
  if outfile:
    consumer.output_filename = outfile
  #Create any reader or http server to read in data
  data_input = None
  run_forever = True
  run_daemon = False
  script_input = False
  
  #Check to make sure that we don't try to iterate over None
  if consumer.data_input_info is None:
    raise ConfigurationError("Data input source missing from configuration.")
  
  for item in consumer.data_input_info:
    if item.name == "readOnce":
      run_forever = False
    elif item.name == "batchScoring":
      consumer.batch_scoring = True
    elif item.name == "daemon":
      run_daemon = True
    elif data_input is not None:
      continue #Only process the first way that we are told to get the data.
    elif item.name == "fromFile" or item.name == "fromFifo":
      #No special treatment needed other than UniTable vs XML
      isUni = False
      filetype = None
      if 'type' in item.attr:
        filetype = item.attr['type']
      if filetype == "UniTable":
        isUni = True
      data_input = Reader(consumer.score, source = item.attr['name'], logger = consumer.logger, magicheader = False, unitable = isUni, framing='EOF')
    elif item.name == "fromFixedRecordFile":
      isUni = True
      types = None
      ffnames = []
      ffstarts = []
      ffends = []
      fftypes = []
      start = 0
      for field in item:
        ffnames.append(field.attr['name'])
        ffstarts.append(start)
        ffends.append(start + int(field.attr['length']))
        start += int(field.attr['length'])
      if 'cr' in item.attr:
        ffCR = item.attr['cr']
      else:
        ffCR = None
      data_input = Reader(consumer.score, source = item.attr['name'],
        types = None,
        logger = consumer.logger, magicheader = False, unitable = isUni, ffConvert = ffConfig(ffnames, ffstarts, ffends, ffCR))
    elif item.name == "fromCSVFile":
      #We have a CSV file that needs special treatment to read in correctly
      isUni = True
      header = None
      sep = None
      types = None
      if 'header' in item.attr:
        header = item.attr['header']
      if 'sep' in item.attr:
        sep = item.attr['sep']
      if 'types' in item.attr:
        types = item.attr['types']
      data_input = Reader(consumer.score, source = item.attr['name'], logger = consumer.logger, magicheader = False, unitable = isUni, header = header, sep = sep, types = types, framing = 'EOF')
    elif item.name == "fromStandardInput":
      isUni = False
      filetype = None
      sep = None
      types = None
      framing = 'EOF'
      if 'sep' in item.attr:
        sep = item.attr['sep']
      if 'types' in item.attr:
        types = item.attr['types']
      if 'type' in item.attr:
        filetype = item.attr['type']
      if filetype == "UniTable":
        isUni = True
      if 'framing' in item.attr:
        framing = item.attr['framing']
      consumer.logger.debug('...Test')
      data_input = Reader(consumer.score, source = "-", logger = consumer.logger, magicheader = False, unitable = isUni, sep = sep, types = types, framing = framing)
    elif item.name == "fromHTTP":
      #get the stuff we need to setup the server
      input_url = item.attr['url']
      if port:
        input_port = int(port)
      else:
        input_port = int(item.attr['port'])
      datatype = None
      if 'type' in item.attr:
        datatype = item.attr['type']
      if datatype == "UniTable":
        callback = consumer.score_http_uni
      else:
        callback = consumer.score_http_xml
      
      #Create the server
      data_input = HTTPInterfaceServer(('',input_port), logger = consumer.logger)
      #Add the callback
      data_input.register_callback(input_url, callback)
    elif item.name == "eventBased":
      script_input = True
      data_input = False #Dummy value to get past a check for None later.
    else:
      #Not recognized
      consumer.logger.debug("Element %s is not a recognized child element of inputData, ignoring." % (item.name))
  
  #TODO: ??? What does the following comment refer to?
  #If summary data is being requested, set it up
  
  if data_input is None:
    #We made it through the config information without finding a data input source.
    raise ConfigurationError("Unable to determine data input source.")
  
  consumer.logger.debug("Initialize model")
  #Initialize the model
  #TODO: ??? What does the following comment refer to?
  #this is after the data information is input so that batch scoring may be faster
  consumer.initialize_model()
  
  if script_input:
    #Another script has called main, return the consumer so it can handle how score is called.
    return consumer
  
  consumer.logger.warning("Ready to score")
  #Start scoring data
  if consumer.metadata:
    # By default, for now, enable collection of
    # metadata by data reader and model (consumer general metadata
    # is enabled earlier).
    data_input.enableMetaDataCollection()
    consumer.model.enableMetaDataCollection()
  if consumer.batch_scoring:
    if consumer.metadata:
      consumer.metadata.log.info('Batch Scoring -One Score Per Segment\n')
    consumer.logger.debug("Batch Scoring")
    if isinstance(data_input, Reader):
      data_input.read_once()
      report = consumer.format_results(consumer.model.batchScore())
      if consumer.output_filename:
        consumer.output_report_header(file_handle = consumer.out)
        consumer.out.write(report)
        consumer.output_report_footer(file_handle = consumer.out)
        consumer.out.close()
  elif run_forever:
    if consumer.metadata:
      consumer.metadata.log.info('Run Forever - One Score Per Event')
    consumer.logger.debug("Run Forever")
    if isinstance(data_input, Reader):
      consumer.output_report_header()
      data_input.read_forever()
      consumer.output_report_footer(consumer.out)
    elif isinstance(data_input, HTTPServer):
      data_input.serve_forever()
    else:
      consumer.logger.critical("Reading data failed.")
  else: #just read once
    finished = False
    while not finished:
      if consumer.metadata is not None:
        consumer.metadata.log.info('Run Once - One Score Per Event')
        consumer.metadata.log.info('Start at %s'%datetime.datetime.now().isoformat())
      consumer.logger.debug("Run Once")
      if isinstance(data_input, Reader):
        consumer.output_report_header()
        data_input.read_once()
        consumer.output_report_footer()
      elif isinstance(data_input, HTTPServer):
        data_input.handle_request()
      else:
        consumer.logger.critical("Reading data failed.")
      if consumer.metadata:
        consumer.metadata.log.info('End at %s'%datetime.datetime.now().isoformat())
      if run_daemon:
        signal.signal(signal.SIGALRM, daemonRestartHandler)
        signal.signal(signal.SIGUSR1, daemonRestartHandler)
        signal.pause() # unix only
        finished = False
      else:
        finished = True
  if consumer.metadata:
    consumer.metadata['Stacksize after Scoring'] = ptools.stacksize()
    consumer.metadata['Resident Memory after Scoring'] = ptools.resident()/1e+9 #Gb
    consumer.metadata['Memory after Scoring'] = ptools.memory()/1e+9 #Gb
    consumer.metadata.collected['DataInput'] = data_input.getMetaData()
    #consumer.metadata.collected['Scoring'] = consumer.metadata.getMetaData()
    consumer.metadata.collected['Scoring'] = consumer.getMetaData()
    consumer.metadata.collected[''] = consumer.model.getMetaData()
    consumer.metadata.report()