def __create_unary_process(self, table: _DTable, func): operand = self.__create_storage_locator_from_dtable(table) task_info = self.__create_task_info(func=func, is_in_place_computing=table.get_in_place_computing()) return processor_pb2.UnaryProcess(info=task_info, operand=operand, conf=processor_pb2.ProcessConf(namingPolicy=self.eggroll_context.get_naming_policy().name))
def reduce(self, _table, func): func_id, func_bytes = self.serialize_and_hash_func(func) rtn = None results = [] for partition in range(_table.partition): operand = EggRoll.__get_storage_locator(_table, partition) proc_id = partition % len(self.proc_list) channel, stub = self.proc_list[proc_id] unary_p = processor_pb2.UnaryProcess( operand=operand, info=processor_pb2.TaskInfo(task_id=self.job_id, function_id=func_id, function_bytes=func_bytes)) results = results + list(stub.reduce(unary_p)) rs = [] for val in results: if len(val.value) > 0: rs.append(self._serdes.deserialize(val.value)) rs = [r for r in filter(partial(is_not, None), rs)] if len(results) <= 0: return rtn rtn = rs[0] for r in rs[1:]: rtn = func(rtn, r) return rtn
def __create_unary_process(self, table: _DTable, func): operand = self.__create_storage_locator_from_dtable(table) task_info = self.__create_task_info( func=func, is_in_place_computing=table.get_in_place_computing()) return processor_pb2.UnaryProcess( info=task_info, operand=operand, session=self.eggroll_session.to_protobuf())
def process_wrapper(req_type, func, result, req): try: req_pb = processor_pb2.UnaryProcess( ) if req_type == "UnaryProcess" else processor_pb2.BinaryProcess( ) req_pb.ParseFromString(req) #TODO context serialize? func(req_pb, None) result.put("ok") except: err_str = traceback.format_exc() LOGGER.error(err_str) result.put("error:" + err_str)
def glom(self, _table): results = [] func_id = str(uuid.uuid1()) for p in range(_table.partition): operand = EggRoll.__get_storage_locator(_table, p) unary_p = processor_pb2.UnaryProcess(operand=operand, info=processor_pb2.TaskInfo( task_id=self.job_id, function_id=func_id)) proc_id = p % len(self.proc_list) channel, stub = self.proc_list[proc_id] results.append(stub.glom.future(unary_p)) for r in results: result = r.result() return _DTable(self, result.type, result.namespace, result.name, _table.partition)
def mapValues(self, _table, func): func_id, func_bytes = self.serialize_and_hash_func(func) results = [] for partition in range(_table.partition): operand = EggRoll.__get_storage_locator(_table, partition) unary_p = processor_pb2.UnaryProcess( operand=operand, info=processor_pb2.TaskInfo(task_id=self.job_id, function_id=func_id, function_bytes=func_bytes)) proc_id = partition % len(self.proc_list) channel, stub = self.proc_list[proc_id] results.append(stub.mapValues.future(unary_p)) for r in results: result = r.result() return _DTable(self, result.type, result.namespace, result.name, _table.partition)
def sample(self, _table, fraction, seed): if fraction < 0 or fraction > 1: raise ValueError("fraction must be in [0, 1]") func_bytes = self._serdes.serialize((fraction, seed)) results = [] func_id = str(uuid.uuid1()) for p in range(_table.partition): operand = EggRoll.__get_storage_locator(_table, p) unary_p = processor_pb2.UnaryProcess( operand=operand, info=processor_pb2.TaskInfo(task_id=self.job_id, function_id=func_id, function_bytes=func_bytes)) proc_id = p % len(self.proc_list) channel, stub = self.proc_list[proc_id] results.append(stub.sample.future(unary_p)) for r in results: result = r.result() return _DTable(self, result.type, result.namespace, result.name, _table.partition)