def reduce(self, request, context): LOGGER.debug( gen_log(LogStage.START, request.session, 'reduce', request)) task_info = request.info _reducer, _serdes = self.get_function_and_serdes(task_info) op = request.operand value = None source_db_path = Processor.get_path(op) with MDBEnv(source_db_path, create_if_missing=True) as src_env: result_key_bytes = None with src_env.begin( db=Processor.get_default_db(src_env)) as src_txn: cursor = src_txn.cursor() for k_bytes, v_bytes in cursor: v = _serdes.deserialize(v_bytes) if value is None: value = v else: value = _reducer(value, v) result_key_bytes = k_bytes rtn = kv_pb2.Operand(key=result_key_bytes, value=_serdes.serialize(value)) yield rtn LOGGER.debug( gen_log(LogStage.END, request.session, 'reduce', value))
def get(self, _table, k_list): res = [] for k in k_list: p, i = self.__get_index(k, _table.partition) stub = self.egg_list[i] op = stub.get(kv_pb2.Operand(key=self._serdes.serialize(k)), metadata=self.__get_meta(_table, str(p))) res.append(self.__get_pair(op)) return res
def put_if_absent(self, _table, k, v): p, i = self.__get_index(k, _table.partition) stub = self.egg_list[i] meta = self.__get_meta(_table, str(p)) rtn = stub.putIfAbsent(kv_pb2.Operand(key=self._serdes.serialize(k), value=self._serdes.serialize(v)), metadata=meta).value rtn = self._serdes.deserialize(rtn) if len(rtn) > 0 else None return rtn
def reduce_wrapper(src_it, src_serde, dst_serde, functor, is_in_place_computing): if is_in_place_computing: raise NotImplementedError() value = None result_key_bytes = None for k_bytes, v_bytes in src_it: v = src_serde.deserialize(v_bytes) if value is None: value = v else: value = functor(value, v) result_key_bytes = k_bytes return kv_pb2.Operand(key=result_key_bytes, value=dst_serde.serialize(value))
def put(self, key, value): item = kv_pb2.Operand(key=key, value=value) self.kv_stub.put(item)
def get(self, key): item = self.kv_stub.get(kv_pb2.Operand(key=key)) return item.value
def put(self, k, v): self.cache.append(kv_pb2.Operand(key=k, value=v)) if len(self.cache) > 100000: self.write()
def get(self, _table, k, use_serialize=True): k = self.kv_to_bytes(k=k, use_serialize=use_serialize) operand = self.kv_stub.get(kv_pb2.Operand(key=k), metadata=_get_meta(_table)) return self._deserialize_operand(operand, use_serialize=use_serialize)
def put_if_absent(self, _table, k, v, use_serialize=True): k, v = self.kv_to_bytes(k=k, v=v, use_serialize=use_serialize) operand = self.kv_stub.putIfAbsent(kv_pb2.Operand(key=k, value=v), metadata=_get_meta(_table)) return self._deserialize_operand(operand, use_serialize=use_serialize)
def put(self, _table, k, v, use_serialize=True): k, v = self.kv_to_bytes(k=k, v=v, use_serialize=use_serialize) self.kv_stub.put(kv_pb2.Operand(key=k, value=v), metadata=_get_meta(_table))
def __generate_operand(kvs: Iterable, use_serialize=True): for k, v in kvs: yield kv_pb2.Operand(key=_EggRoll.value_serdes.serialize(k) if use_serialize else bytes_to_string(k), value=_EggRoll.value_serdes.serialize(v) if use_serialize else v)
def delete(self, _table, k): p, i = self.__get_index(k, _table.partition) stub = self.egg_list[i] op = stub.delOne(kv_pb2.Operand(key=self._serdes.serialize(k)), metadata=self.__get_meta(_table, str(p))) return self.__get_pair(op)
def dispatch_gen(self, _iter: Iterable, p, total): for k, v in _iter: _p, i = self.__get_index(k, total) if _p == p: yield kv_pb2.Operand(key=self._serdes.serialize(k), value=self._serdes.serialize(v))