def parallelize(self, data: Iterable, include_key=False, name=None, partition=1, namespace=None, create_if_missing=True, error_if_exist=False, persistent=False, chunk_size=100000, in_place_computing=False, persistent_engine=StoreType.LMDB): if namespace is None: namespace = _EggRoll.get_instance().session_id if name is None: name = str(uuid.uuid1()) _type = to_pb_store_type(persistent_engine, persistent) storage_locator = storage_basic_pb2.StorageLocator(type=_type, namespace=namespace, name=name) create_table_info = kv_pb2.CreateTableInfo( storageLocator=storage_locator, fragmentCount=partition) _table = self._create_table(create_table_info) _table.set_in_place_computing(in_place_computing) _iter = data if include_key else enumerate(data) _table.put_all(_iter, chunk_size=chunk_size) LOGGER.debug("created table: %s", _table) return _table
def table(self, name, namespace, partition=1, create_if_missing=True, error_if_exist=False, persistent=True, in_place_computing=False): _type = storage_basic_pb2.LMDB if persistent else storage_basic_pb2.IN_MEMORY storage_locator = storage_basic_pb2.StorageLocator(type=_type, namespace=namespace, name=name) create_table_info = kv_pb2.CreateTableInfo(storageLocator=storage_locator, fragmentCount=partition) _table = self._create_table(create_table_info) _table.set_in_place_computing(in_place_computing) LOGGER.debug("created table: %s", _table) return _table
def _create_table_from_locator(self, storage_locator, template: _DTable): create_table_info = kv_pb2.CreateTableInfo( storageLocator=storage_locator, fragmentCount=template._partitions) result = self._create_table(create_table_info) result.set_in_place_computing(template.get_in_place_computing()) return result