예제 #1
0
def run_maptask(mapper, key_value_iterator, out_shards, markerfn, task_id):
  out_shard_num = len(out_shards)
  num_recs = 0

  for k, v in mapper.start_map(task_id):
    out_shards[base.shard_for_key(k, out_shard_num)][k] = v
    count('start-out')
  for key, value in key_value_iterator:
    if flags.grep_debug_key:
      if not flags.grep_debug_key in key:
        continue
      else:
        print "Processing: %s" % key
    if flags.log_every and num_recs % flags.log_every == 0:
      print count_line()
    count('map-in')
    num_recs += 1
    if flags.record_limit == num_recs:
      break
    for kv in mapper.process(key, value):
      if kv:
        k, v = kv
        out_shards[base.shard_for_key(k, out_shard_num)][k] = v
        count('map-out')
  for k, v in mapper.flush(task_id):
    out_shards[base.shard_for_key(k, out_shard_num)][k] = v
    count('flush-out')
  for out_shard in out_shards:
    if out_shard is not None:
      out_shard.close()
  file(markerfn, 'w').write('DONE')
예제 #2
0
 def __contains__(self, key):
   idx = base.shard_for_key(key, len(self._shards))
   return key in self._shards[idx]
예제 #3
0
 def get(self, key, default=None):
   idx = base.shard_for_key(key, len(self._shards))
   return self._shards[idx].get(key, default)
예제 #4
0
 def __getitem__(self, key):
   idx = base.shard_for_key(key, len(self._shards))
   return self._shards[idx][key]
예제 #5
0
 def __setitem__(self, key, value):
   idx = base.shard_for_key(key, len(self._shards))
   self._shards[idx][key] = value