class Mapper(Protocal): def __init__(self, id, task_info): Protocal.__init__(self) self.id = id self._parse_task_info(task_info) self.outBuffer = MapOutBuffer(self.id, self.reduce_num, self.combinefn) def sayhello(self): cmd, res = self.send_id_command('map_start') self.source = pickle.loads(res) def _mem_mode_map(self): res = self.mapfn(self.id, self.source) for kv in res: self.outBuffer.write(kv) self.outBuffer.merge_all() def _disk_mode_map(self): fobj = DfsClient().open(self.dfsfile) block = self.source reader = BlockReader(block, fobj) while True: data = reader.read_line() if data is None: break res = self.mapfn(self.id, data) for kv in res: self.outBuffer.write(kv) self.outBuffer.merge_all() def start(self): print 'ready, map %d starting' % self.id logging.info('ready, map %d starting' % self.id) self.sayhello() if self.mode == 0x01: self._disk_mode_map() else: self._mem_mode_map() print 'map %d work down' % self.id logging.info('map %d work down' % self.id) cmd, msg = self.send_id_command('map_down') if msg =='map_exit': logging.info('map %d exit' % self.id )
def __init__(self, id, task_info): Protocal.__init__(self) self.id = id self._parse_task_info(task_info) self.outBuffer = MapOutBuffer(self.id, self.reduce_num, self.combinefn)