Example #1
0
 def get(self):
   results = []
   for jt in JOB_TYPES:
     for e in ENGINES:
       for js in JOB_SIZE:
         q = Record.all()
         q.filter('benchmark =', jt)          
         q.filter('engine_type =', e)
         q.filter('num_entities =', js)
         # hope you didnt run 1k+ jobs
         ents = q.fetch(1000)
         if len(ents) == 0:
           continue
         n = 0
         sum_x = 0
         mean = 0
         stdev = 0
         maximum = 0
         minimum = 99999999
         points = []
         for ii in ents:
           if ii.total: # some jobs may have failed
             n += 1
             points.append(ii.total)
             if ii.total > maximum:
               maximum = ii.total
             if ii.total < minimum:
               minimum = ii.total
         if n != 0:
           sum_x = getTotal(points)
           mean = getAverage(points, sum_x)
           stdev = getStDev(points, mean)
           results.append(Job(n, sum_x, mean, maximum, minimum, stdev, jt, e, js))
   self.response.out.write(template.render('templates/jobs.html', {'jobs':results, 'jobs_len':len(results)}))
   return
Example #2
0
 def get(self):
   user = users.get_current_user()
   if not user:
     self.redirect(users.create_login_url(dest_url="/"))
     return
   q = SubSetDataSet.all()
   q.order('-start')
   results = q.fetch(1000)
   datasets = [result for result in results]
   datasets_len = len(datasets)
   q = Record.all()
   q.filter('benchmark =', "subset")
   q.order('-start')
   results = q.fetch(1000) 
   records = [result for result in results]
   records_len = len(records)
   fsm_count = get_fsm_count()
   pl_count = get_pl_count()
   mr_count = get_mr_count()
   self.response.out.write(template.render("templates/subset.html",
                                           {"user": user.email(),
                                            "datasets_len" : datasets_len,
                                            "datasets" : datasets,
                                            "fsm_count" : fsm_count,
                                            "pl_count" : pl_count,
                                            "mr_count" : mr_count,
                                            "records": records,
                                            "records_len" : records_len}))
Example #3
0
  def post(self):
    name = self.request.headers["mapreduce-id"]
    if not name:
      name = "NAME NOT FOUND"
    logging.info("MR CALLBACK " + name)
    q = Record.all()
    q.filter('mr_id =', name)
    q.fetch(1)
    if q:
      for ii in q:
        t = memcache.get('fsm_mapper_cleanup')
        if not t:
          logging.error("Unable to get datetime from memcache")
          return False
        dt, msec = t.split(".")
        dt = datetime.datetime.strptime(dt, '%Y-%m-%d %H:%M:%S')
        msec = datetime.timedelta(microseconds = int(msec))
        fullDatetime = dt + msec
        ii.end = fullDatetime

        delta = (ii.end - ii.start)
        ii.total = float(delta.days * 86400 + delta.seconds) + float(delta.microseconds)/1000000
        ii.state = "Done"
        ii.put()

        logging.info("updated: record for MR job id %s"%name)
    else:
      logging.info("Unable to find record for MR job id %s"%name)
Example #4
0
 def finalized(self):
   plid = self.pipeline_id
   q = Record.all()
   q.filter('pipeline_id =',plid) 
   items = q.fetch(1)
   for ii in items:
     ii.end = datetime.datetime.now() 
     delta = (ii.end - ii.start)
     ii.total = float(delta.days * 86400 + delta.seconds) + float(delta.microseconds)/1000000
     ii.state = "Done"
     ii.put() 
     logging.info("Updated aggregate pipeline record")
   logging.info("Done with aggregate pipeline")
Example #5
0
 def post(self):
   name = self.request.headers["mapreduce-id"]
   if not name:
     name = "NAME NOT FOUND"
   logging.info("MR CALLBACK " + name)
   q = Record.all()
   q.filter('mr_id =', name)
   q.fetch(1)
   if q:
     for ii in q:
       ii.end = datetime.datetime.now()
       delta = (ii.end - ii.start)
       ii.total = float(delta.days * 86400 + delta.seconds) + float(delta.microseconds)/1000000
       ii.state = "Done"
       ii.put()
       logging.info("updated: record for MR job id %s"%name)
   else:
     logging.info("Unable to find record for MR job id %s"%name)
Example #6
0
def fsm_calculate_run_time():
  """ Fantasm does not give call backs when its done. Must figure it out
      with another job using the last modified date on output entities
  """
  # Get the last job which was run for subset /fsm
  q = Record.all()
  q.filter('engine_type =','fsm')
  q.filter('benchmark =','subset')
  q.order('-start')
  results = q.fetch(1)
  if len(results) == 0:
    logging.error("Unable to find a record for fsm/subset")
    return False

  q = None
  record = None
  for ii in results:
    if ii.state == "Done":
      logging.error("Last FSM end time has already been calculated")
    logging.info(str(ii.num_entities))
    q = SSFSMSimpleCounterShard.all()
    if not q:
      logging.error("No query returned for SubSet results")
      return False
    record = ii

  max_date = None
  while True:
    results = q.fetch(1000)
    for ii in results:
      date = ii.modified
      if max_date == None or max_date < date:
        max_date = date
    if len(results) < 1000:
      break;
  if not max_date:
    logging.error("Unable to calculate the max date for FSM/subset")
    return False
  record.state = "Done"
  record.end = max_date
  delta = (record.end - record.start)
  record.total = float(delta.days * 86400 + delta.seconds) + float(delta.microseconds)/1000000
  record.put()
  return True
Example #7
0
def fsm_calculate_run_time():
  """ Fantasm does not give call backs when its done. Must figure it out
      with another job using the last modified date on output entities
  """
  # Get the last job which was run for grep/fsm
  q = Record.all() 
  q.filter('engine_type =','fsm')
  q.filter('benchmark =','grep')
  q.order('-start')
  results = q.fetch(1)
  if len(results) == 0:
    logging.error("Unable to find a record for fsm/grep")
    return False

  for ii in results:
    ii.state = "Calculating time"
    ii.put()
    shards = ii.num_entities/1000
    if shards < 1:
      shards = 1
    if shards > 256:
      shards = 256 # max amount of shards allowed

    kind  = "GrepResults" #get_output_class(ii.num_entities)
    mapreduce_id = control.start_map(
            name="FSM Grep cleanup",
            handler_spec="grep.fsm_mapper",
            reader_spec="mapreduce.input_readers.DatastoreInputReader",
            mapper_parameters={
                "entity_kind": "data.grep."+kind,
                "processing_rate": 500
            },
            mapreduce_parameters={model.MapreduceSpec.PARAM_DONE_CALLBACK:
                       '/grep/fsm/callback'},
            shard_count=shards,
            queue_name="default",
          )
    ii.mr_id = mapreduce_id
    ii.put()
  return True
Example #8
0
 def get(self):
   user = users.get_current_user()
   if not user:
     self.redirect(users.create_login_url(dest_url="/"))
     return
   q = GrepDataSet.all()
   q.order('-start')
   results = q.fetch(1000)
   datasets = [result for result in results]
   datasets_len = len(datasets)
   q = Record.all()
   q.filter('benchmark =', "grep")
   q.order('-start')
   results = q.fetch(1000) 
   records = [result for result in results]
   records_len = len(records)
   self.response.out.write(template.render("templates/grep.html",
                                           {"user": user.email(),
                                            "datasets_len" : datasets_len,
                                            "datasets" : datasets,
                                            "records": records,
                                            "records_len" : records_len}))
Example #9
0
def fsm_calculate_run_time():
  """ Fantasm does not give call backs when its done. Must figure it out
      with another job using the last modified date on output entities
  """
  # Get the last job which was run for aggregate /fsm
  q = Record.all()
  q.filter('engine_type =','fsm')
  q.filter('benchmark =','aggregate')
  q.order('-start')
  results = q.fetch(1)

  # There is a second type of fsm job that has a fan in state
  q2 = Record.all()
  q2.filter('engine_type =','fsm_fan_in')
  q2.filter('benchmark =','aggregate')
  q2.order('-start')
  results2 = q2.fetch(1)
   
  if len(results) == 0 and len(results2) == 0:
    logging.error("Unable to find a record for fsm/aggregate")
    return False

  # Take only the one which ran last
  if len(results) == 0:
    results = results2 #fsm with fan in ran last
  elif len(results2) == 0:
    pass
  elif results[0].start > results2[0].start:
    pass
  else:
    results = results2 #fsm with fan in ran last

  q = None
  record = None
  # There should only be one result
  for ii in results:
    if ii.state == "Done":
      logging.error("Last FSM end time has already been calculated")
    logging.info(str(ii.num_entities))
    q = FSMSimpleCounterShard.all()
    if not q:
      logging.error("No query returned for Aggregate results")
      return False
    record = ii

  max_date = None
  while True:
    results = q.fetch(1000)
    for ii in results:
      date = ii.modified
      if max_date == None or max_date < date:
        max_date = date
    if len(results) < 1000:
      break;
  if not max_date:
    logging.error("Unable to calculate the max date for FSM/aggregate")
    return False
  record.state = "Done"
  record.end = max_date
  delta = (record.end - record.start)
  record.total = float(delta.days * 86400 + delta.seconds) + float(delta.microseconds)/1000000
  record.put()
  return True