Beispiel #1
0
def main():
  parser = argparse.ArgumentParser(description='Learner')
  parser.add_argument('-m', '--module_properties', help='module properties file', required=True)
  parser.add_argument('-d', '--db_properties', help='database connection properties file', required=True)
  parser.add_argument('-s', '--sql_statements', help='sql statements file', required=True)
  args = parser.parse_args()
  
  module_properties = Utils.read_properties(args.module_properties)
  db_properties = Utils.read_properties(args.db_properties)
  sql_statements = Utils.read_properties(args.sql_statements)

  dao = DB(db_properties, sql_statements)  

  feature_generator = FeatureGenerator(dao, module_properties)
  feature_generator.run()
Beispiel #2
0
def main():
  parser = argparse.ArgumentParser(description='Learner')
  parser.add_argument('-m', '--module_properties', help='module properties file', required=True)
  parser.add_argument('-d', '--db_properties', help='database connection properties file', required=True)
  parser.add_argument('-s', '--sql_statements', help='sql statements file', required=True)
  args = parser.parse_args()
  
  module_properties = Utils.read_properties(args.module_properties)
  db_properties = Utils.read_properties(args.db_properties)
  sql_statements = Utils.read_properties(args.sql_statements)

  logging.basicConfig(filename=module_properties['log_file'], filemode='w', format='%(message)s', level=logging.DEBUG)

  dao = DB(db_properties, sql_statements)

  learner = LearnerFactory.new_learner(module_properties, dao)

  learner.run()
Beispiel #3
0
def main():
  parser = argparse.ArgumentParser(description='Registry')
  parser.add_argument('-m', '--module_properties', help='module properties file', required=True)
  args = parser.parse_args()
  
  module_properties = Utils.read_properties(args.module_properties)
  
  registry = Registry(module_properties)
  registry.run()
Beispiel #4
0
def main():
  parser = argparse.ArgumentParser(description='Deployer')
  parser.add_argument('-m', '--module_properties', help='module properties file', required=True)
  args = parser.parse_args()
  
  module_properties = Utils.read_properties(args.module_properties)
  
  logging.basicConfig(filename=module_properties['log_file'], filemode='w', format='%(message)s', level=logging.DEBUG)  

  deployer = Deployer(module_properties)
  deployer.run()
Beispiel #5
0
  def parse_line(self, line):
    # remove \n
    line = line.splitlines()[0]
    timestamp, rest = line.split('\t')
    fields = rest.split("|")
    clicked = int(fields[0])
    # depth of the session
    depth = int(fields[1])
    position = int(fields[2])
    userid = int(fields[3])
    # user gender indicator (-1 for male, 1 for female)
    gender = int(fields[4])
    if gender != 0:
      gender = int((gender - 1.5) * 2)
    # user age indicator:
    #   '1' for (0, 12],
    #   '2' for (12, 18],
    #   '3' for (18, 24],
    #   '4' for (24, 30],
    #   '5' for (30, 40], and
    #   '6' for greater than 40.
    age = int(fields[5])
    # list of token ids
    tokens = [int(xx) for xx in fields[6].split(",")]
    
    # hash tokens
    hashed_tokens = {}
    for i, token in enumerate(tokens):
      Utils.update_feature(token, 1, hashed_tokens, self._m)
      if userid > 0:
        user_token = str(token) + "_" + str(userid)
        Utils.update_feature(user_token, 1, hashed_tokens, self._m)

    hashed_tokens_arr = []
    for key in sorted(hashed_tokens):
      hashed_tokens_arr.append('%s:%s' % (key + self._regular_features, hashed_tokens[key]))
    
    tokens_as_str = ' '.join(token_f for token_f in hashed_tokens_arr)
    # timestamp, label, depth, position, gender, age, categorical_features
    parsed_line = '%s %s 0:%s 1:%s 2:%s 3:%s %s' % (timestamp, clicked, depth, position, gender, age, tokens_as_str)
    return parsed_line
Beispiel #6
0
  def __init__(self, module_properties, dao, clf):
    self._dao = dao
    self._id = module_properties['id']
    self._warmup_examples = module_properties['warmup_examples']
    self._checkpoint = module_properties['checkpoint']
    self._is_prequential = True if module_properties['eval_mode'] == 'prequential' else False
    self._parser = ParserFactory.new_parser(module_properties)
    self._evaluator = EvaluatorFactory.new_evaluator(module_properties)
    self._offline_test = None
    if self._is_prequential == False:
      self._offline_test = self._parser.parse(module_properties['offline_test'])
    self._classes = np.array(map(int, module_properties['classes'].split(',')))
    self._server_port = module_properties['server_port']

    ##### Recovery and adding learners on the fly #########
    self._clf, timestamp = self._dao.get_model(self._id)
    self._checkpointed = None
    # if there was a checkpoint, then see if there are some historical points beyond that and train
    if self._clf:
      self._checkpointed = True
      examples = self._dao.get_examples_greater_than(timestamp)
      if examples:
        print 'catching up checkpointed model with historical points...'
        X, y, timestamps = self._parser.parse_feature(examples)
        self._clf.partial_fit(X, y)
      else:
        print 'no historical points to catch up the checkpointed model'
        # will use the last metric saved
    else:
      self._checkpointed = False
      self._clf = clf
      examples = self._dao.get_examples()
      if examples:
        print 'catching up new model with historical points'
        X, y, timestamps = self._parser.parse_feature(examples)
        self._clf.partial_fit(X, y, self._classes)
      else:
        print 'no historical points to catch up the new model'

    #######################################        

    self._registry = RegistryClient(module_properties['registry'])
    hostname = socket.gethostname()
    address = Utils.get_address(hostname, self._server_port)
    self._stream_client_address = self._registry.reg(ComponentType.LEARNER, address)[0]    

    self._handler = LearnerHandler(self._stream_client_address, self._registry, self._parser, self._evaluator, self._dao, self._clf, self._classes, 
                              self._warmup_examples, self._id, self._checkpoint, self._is_prequential, 
                              self._checkpointed, self._offline_test)
    self._processor = StreamService.Processor(self._handler)
    self._stream_server = Server(self._processor, self._server_port, module_properties['multi_threading'])
Beispiel #7
0
 def __init__(self, dao, module_properties):
     self._dao = dao
     self._parser = ParserFactory.new_parser(module_properties)
     self._historical_batches = module_properties["historical_batches"]
     self._registry = RegistryClient(module_properties["registry"])
     self._server_port = module_properties["server_port"]
     hostname = socket.gethostname()
     address = Utils.get_address(hostname, self._server_port)
     self._stream_clients_addresses = self._registry.reg(ComponentType.FEATGEN, address)
     self._processor = StreamService.Processor(
         FeatureGeneratorHandler(
             self._registry, self._parser, self._dao, self._historical_batches, self._stream_clients_addresses
         )
     )
     self._stream_server = Server(self._processor, self._server_port, module_properties["multi_threading"])
Beispiel #8
0
 def get_model(self, id):
   print 'retrieving model %s' % id
   cur = self._conn.cursor()
   cur.execute(self._sql['get_model'], {'id' : id})
   tupl = cur.fetchone()
   model = None
   timestamp = None
   if tupl:
    timestamp = tupl[0]
    model = Utils.deserialize(tupl[1])
    print 'retrieved model %s' % id
   else:
     print 'not retrieved model %s' % id
   cur.close()
   return model, timestamp
Beispiel #9
0
 def run(self):
   hostname = socket.gethostname()
   address = Utils.get_address(hostname, self._server_port)
   self._registry.reg(ComponentType.DEPLOYER, address)
   self._stream_server.start()
Beispiel #10
0
 def __init__(self, address):
   host, port = Utils.get_address_components(address)
   self._host = host
   self._port = port
   self._address = address
Beispiel #11
0
 def update_model(self, id, timestamp, model):
   print 'updating model %s' % id
   cur = self._conn.cursor()
   cur.execute(self._sql['update_model'], {'id': id, 'timestamp': timestamp, 'data': Utils.serialize(model), 'cond' : id})
   self._conn.commit()
   cur.close()