def __get_locator(self, obj, name=None): if isinstance(obj, _DTable): return storage_basic_pb2.StorageLocator(type=obj._type, namespace=obj._namespace, name=obj._name, fragment=obj._partitions) else: return storage_basic_pb2.StorageLocator(type=storage_basic_pb2.LMDB, namespace=self.job_id, name=name)
def parallelize(self, data: Iterable, include_key=False, name=None, partition=1, namespace=None, create_if_missing=True, error_if_exist=False, persistent=False, chunk_size=100000, in_place_computing=False): if namespace is None: namespace = _EggRoll.get_instance().job_id if name is None: name = str(uuid.uuid1()) storage_locator = storage_basic_pb2.StorageLocator( type=storage_basic_pb2.LMDB, namespace=namespace, name=name) if persistent else storage_basic_pb2.StorageLocator( type=storage_basic_pb2.IN_MEMORY, namespace=namespace, name=name) create_table_info = kv_pb2.CreateTableInfo( storageLocator=storage_locator, fragmentCount=partition) _table = self._create_table(create_table_info) _table.set_in_place_computing(in_place_computing) _iter = data if include_key else enumerate(data) _table.put_all(_iter, chunk_size=chunk_size) LOGGER.debug("created table: %s", _table) return _table
def parallelize(self, data: Iterable, include_key=False, name=None, partition=1, namespace=None, create_if_missing=True, error_if_exist=False, persistent=False, chunk_size=100000, in_place_computing=False, persistent_engine=StoreType.LMDB): if namespace is None: namespace = _EggRoll.get_instance().session_id if name is None: name = str(uuid.uuid1()) if not persistent: self.eggroll_session._gc_table.put(name, 1) _type = to_pb_store_type(persistent_engine, persistent) storage_locator = storage_basic_pb2.StorageLocator(type=_type, namespace=namespace, name=name) create_table_info = kv_pb2.CreateTableInfo( storageLocator=storage_locator, fragmentCount=partition) _table = self._create_table(create_table_info) _table.set_in_place_computing(in_place_computing) _table.put_all(data, chunk_size=chunk_size, include_key=include_key) LOGGER.debug("created table: %s", _table) return _table
def __create_output_storage_locator(self, src_op, task_info, session, is_in_place_computing_effective): if is_in_place_computing_effective: if self.__get_in_place_computing_from_task_info(task_info): return src_op naming_policy = session.namingPolicy LOGGER.info('naming policy in processor: {}'.format(naming_policy)) if naming_policy == 'ITER_AWARE': storage_name = DELIMETER.join([ src_op.namespace, src_op.name, storage_basic_pb2.StorageType.Name(src_op.type) ]) name_ba = bytearray(storage_name.encode()) name_ba.extend(DELIMETER_ENCODED) name_ba.extend(task_info.function_bytes) name = hashlib.md5(name_ba).hexdigest() else: name = task_info.function_id return storage_basic_pb2.StorageLocator( namespace=task_info.task_id, name=name, fragment=src_op.fragment, type=storage_basic_pb2.IN_MEMORY)
def _create_output_storage_locator(self, src_op, task_info, process_conf, support_inplace): if support_inplace and task_info.isInPlaceComputing: return src_op name = task_info.function_id return storage_basic_pb2.StorageLocator( namespace=task_info.task_id, name=name, fragment=src_op.fragment, type=storage_basic_pb2.IN_MEMORY)
def table(self, name, namespace, partition=1, create_if_missing=True, error_if_exist=False, persistent=True, in_place_computing=False): _type = storage_basic_pb2.LMDB if persistent else storage_basic_pb2.IN_MEMORY storage_locator = storage_basic_pb2.StorageLocator(type=_type, namespace=namespace, name=name) create_table_info = kv_pb2.CreateTableInfo(storageLocator=storage_locator, fragmentCount=partition) _table = self._create_table(create_table_info) _table.set_in_place_computing(in_place_computing) LOGGER.debug("created table: %s", _table) return _table
def cleanup(self, name, namespace, persistent): if namespace is None or name is None: raise ValueError("neither name nor namespace can be None") _type = storage_basic_pb2.LMDB if persistent else storage_basic_pb2.IN_MEMORY storage_locator = storage_basic_pb2.StorageLocator(type=_type, namespace=namespace, name=name) _table = _DTable(storage_locator=storage_locator) self.destroy_all(_table) LOGGER.debug("cleaned up: %s", _table)
def cleanup(self, name, namespace, persistent, persistent_engine=StoreType.LMDB): if namespace is None or name is None: raise ValueError("neither name nor namespace can be None") _type = to_pb_store_type(persistent_engine, persistent) storage_locator = storage_basic_pb2.StorageLocator(type=_type, namespace=namespace, name=name) _table = _DTable(storage_locator=storage_locator) self.destroy_all(_table) LOGGER.debug("cleaned up: %s", _table)
def __create_storage_locator(self, namespace, name, _type): return storage_basic_pb2.StorageLocator(namespace=namespace, name=name, type=_type)
def _get_storage_locator(table): return storage_basic_pb2.StorageLocator(type=table._type, namespace=table._namespace, name=table._name, fragment=table._partitions)