Ejemplo n.º 1
0
 def txn(): 
   index = random.randint(0, NUM_SHARDS - 1) 
   shard_name = "shard" + str(index) 
   counter = FSMSimpleCounterShard.get_by_key_name(shard_name) 
   if counter is None: 
     counter = FSMSimpleCounterShard(key_name=shard_name) 
   counter.count += value
   counter.put() 
Ejemplo n.º 2
0
  def post(self):
    if self.request.get("fsm_cleanup"):
      if fsm_calculate_run_time():
        self.redirect("/agg") 
      else:
        self.response.out.write("Error calculating run time of FSM/aggregate") 

    if self.request.get("reset_fsm_count"):
      for c in FSMSimpleCounterShard.all():
        c.delete()
      self.redirect('/agg')
      return
    if self.request.get("reset_mr_count"):
      for c in MRSimpleCounterShard.all():
        c.delete()
      self.redirect('/agg')
      return

    if self.request.get("reset_pl_count"):
      for c in PLSimpleCounterShard.all():
        c.delete()
      self.redirect('/agg')
      return

    if self.request.get("compute"):
      engine = self.request.get("engine")
      dataset = self.request.get("dataset")
      user = self.request.get('user')
      data = AggDataSet.get_by_key_name(dataset)
      
      record = Record(engine_type=engine, 
                      dataset=dataset,
                      benchmark="aggregate",
                      num_entities=data.num_entries,
                      entries_per_pipe=data.entries_per_pipe,
                      user=user,
                      state="Running")
      if engine == "fsm":
        record.put()
        # reset count
        for c in FSMSimpleCounterShard.all():
          c.delete()

        context = {}
        context['user'] = str(user)
        context['num_entries'] = int(data.num_entries)
        fsm.startStateMachine('Aggregate', [context])
        self.redirect('/agg')
      elif engine == "fsm_fan_in":
        """ This one uses fan in """
        record.put()
        # reset count
        for c in FSMSimpleCounterShard.all():
          c.delete()

        context = {}
        context['user'] = str(user)
        context['num_entries'] = int(data.num_entries)
        fsm.startStateMachine('Aggregate2', [context])
        self.redirect('/agg')
      elif engine == "pipeline":
        for c in PLSimpleCounterShard.all():
          c.delete()
        mypipeline = AggregatePipeline(data.num_entries)
        mypipeline.start()
        record.pipeline_id = mypipeline.pipeline_id
        record.put()
        self.redirect('/agg') 
        #self.redirect(mypipeline.base_path + "/status?root=" + mypipeline.pipeline_id)
      elif engine == "mr":
        for c in MRSimpleCounterShard.all():
          c.delete()
        # Why 1k each per shard or less? is this ideal?
        if data.num_entries > 1000: shards = data.num_entries/1000
        else: shards = 1

        kind = get_class(data.num_entries)
        mapreduce_id = control.start_map(
          name="Wordcount with just mappers",
          handler_spec="aggregate.mr.aggregate_mapper",
          reader_spec="mapreduce.input_readers.DatastoreInputReader",
          mapper_parameters={
              "entity_kind": "data.aggregate."+kind,
              "processing_rate": 500
          },
          mapreduce_parameters={model.MapreduceSpec.PARAM_DONE_CALLBACK:
                     '/agg/mr/callback'},
          shard_count=shards,
          queue_name="default",
        )

        record.mr_id = mapreduce_id
        record.put()
        self.redirect('/agg')
Ejemplo n.º 3
0
def fsm_calculate_run_time():
  """ Fantasm does not give call backs when its done. Must figure it out
      with another job using the last modified date on output entities
  """
  # Get the last job which was run for aggregate /fsm
  q = Record.all()
  q.filter('engine_type =','fsm')
  q.filter('benchmark =','aggregate')
  q.order('-start')
  results = q.fetch(1)

  # There is a second type of fsm job that has a fan in state
  q2 = Record.all()
  q2.filter('engine_type =','fsm_fan_in')
  q2.filter('benchmark =','aggregate')
  q2.order('-start')
  results2 = q2.fetch(1)
   
  if len(results) == 0 and len(results2) == 0:
    logging.error("Unable to find a record for fsm/aggregate")
    return False

  # Take only the one which ran last
  if len(results) == 0:
    results = results2 #fsm with fan in ran last
  elif len(results2) == 0:
    pass
  elif results[0].start > results2[0].start:
    pass
  else:
    results = results2 #fsm with fan in ran last

  q = None
  record = None
  # There should only be one result
  for ii in results:
    if ii.state == "Done":
      logging.error("Last FSM end time has already been calculated")
    logging.info(str(ii.num_entities))
    q = FSMSimpleCounterShard.all()
    if not q:
      logging.error("No query returned for Aggregate results")
      return False
    record = ii

  max_date = None
  while True:
    results = q.fetch(1000)
    for ii in results:
      date = ii.modified
      if max_date == None or max_date < date:
        max_date = date
    if len(results) < 1000:
      break;
  if not max_date:
    logging.error("Unable to calculate the max date for FSM/aggregate")
    return False
  record.state = "Done"
  record.end = max_date
  delta = (record.end - record.start)
  record.total = float(delta.days * 86400 + delta.seconds) + float(delta.microseconds)/1000000
  record.put()
  return True