Esempio n. 1
0
 def __get_locator(self, obj, name=None):
     if isinstance(obj, _DTable):
         return storage_basic_pb2.StorageLocator(type=obj._type, namespace=obj._namespace, name=obj._name,
                                                 fragment=obj._partitions)
     else:
         return storage_basic_pb2.StorageLocator(type=storage_basic_pb2.LMDB, namespace=self.job_id,
                                                 name=name)
Esempio n. 2
0
 def parallelize(self,
                 data: Iterable,
                 include_key=False,
                 name=None,
                 partition=1,
                 namespace=None,
                 create_if_missing=True,
                 error_if_exist=False,
                 persistent=False,
                 chunk_size=100000,
                 in_place_computing=False):
     if namespace is None:
         namespace = _EggRoll.get_instance().job_id
     if name is None:
         name = str(uuid.uuid1())
     storage_locator = storage_basic_pb2.StorageLocator(
         type=storage_basic_pb2.LMDB, namespace=namespace,
         name=name) if persistent else storage_basic_pb2.StorageLocator(
             type=storage_basic_pb2.IN_MEMORY,
             namespace=namespace,
             name=name)
     create_table_info = kv_pb2.CreateTableInfo(
         storageLocator=storage_locator, fragmentCount=partition)
     _table = self._create_table(create_table_info)
     _table.set_in_place_computing(in_place_computing)
     _iter = data if include_key else enumerate(data)
     _table.put_all(_iter, chunk_size=chunk_size)
     LOGGER.debug("created table: %s", _table)
     return _table
Esempio n. 3
0
    def parallelize(self,
                    data: Iterable,
                    include_key=False,
                    name=None,
                    partition=1,
                    namespace=None,
                    create_if_missing=True,
                    error_if_exist=False,
                    persistent=False,
                    chunk_size=100000,
                    in_place_computing=False,
                    persistent_engine=StoreType.LMDB):
        if namespace is None:
            namespace = _EggRoll.get_instance().session_id
        if name is None:
            name = str(uuid.uuid1())
        if not persistent:
            self.eggroll_session._gc_table.put(name, 1)
        _type = to_pb_store_type(persistent_engine, persistent)

        storage_locator = storage_basic_pb2.StorageLocator(type=_type,
                                                           namespace=namespace,
                                                           name=name)
        create_table_info = kv_pb2.CreateTableInfo(
            storageLocator=storage_locator, fragmentCount=partition)
        _table = self._create_table(create_table_info)
        _table.set_in_place_computing(in_place_computing)
        _table.put_all(data, chunk_size=chunk_size, include_key=include_key)
        LOGGER.debug("created table: %s", _table)
        return _table
Esempio n. 4
0
    def __create_output_storage_locator(self, src_op, task_info, session,
                                        is_in_place_computing_effective):
        if is_in_place_computing_effective:
            if self.__get_in_place_computing_from_task_info(task_info):
                return src_op

        naming_policy = session.namingPolicy
        LOGGER.info('naming policy in processor: {}'.format(naming_policy))
        if naming_policy == 'ITER_AWARE':
            storage_name = DELIMETER.join([
                src_op.namespace, src_op.name,
                storage_basic_pb2.StorageType.Name(src_op.type)
            ])
            name_ba = bytearray(storage_name.encode())
            name_ba.extend(DELIMETER_ENCODED)
            name_ba.extend(task_info.function_bytes)

            name = hashlib.md5(name_ba).hexdigest()
        else:
            name = task_info.function_id

        return storage_basic_pb2.StorageLocator(
            namespace=task_info.task_id,
            name=name,
            fragment=src_op.fragment,
            type=storage_basic_pb2.IN_MEMORY)
Esempio n. 5
0
 def _create_output_storage_locator(self, src_op, task_info, process_conf,
                                    support_inplace):
     if support_inplace and task_info.isInPlaceComputing:
         return src_op
     name = task_info.function_id
     return storage_basic_pb2.StorageLocator(
         namespace=task_info.task_id,
         name=name,
         fragment=src_op.fragment,
         type=storage_basic_pb2.IN_MEMORY)
Esempio n. 6
0
 def table(self, name, namespace, partition=1,
           create_if_missing=True, error_if_exist=False,
           persistent=True, in_place_computing=False):
     _type = storage_basic_pb2.LMDB if persistent else storage_basic_pb2.IN_MEMORY
     storage_locator = storage_basic_pb2.StorageLocator(type=_type, namespace=namespace, name=name)
     create_table_info = kv_pb2.CreateTableInfo(storageLocator=storage_locator, fragmentCount=partition)
     _table = self._create_table(create_table_info)
     _table.set_in_place_computing(in_place_computing)
     LOGGER.debug("created table: %s", _table)
     return _table
Esempio n. 7
0
    def cleanup(self, name, namespace, persistent):
        if namespace is None or name is None:
            raise ValueError("neither name nor namespace can be None")

        _type = storage_basic_pb2.LMDB if persistent else storage_basic_pb2.IN_MEMORY

        storage_locator = storage_basic_pb2.StorageLocator(type=_type, namespace=namespace, name=name)
        _table = _DTable(storage_locator=storage_locator)

        self.destroy_all(_table)

        LOGGER.debug("cleaned up: %s", _table)
Esempio n. 8
0
    def cleanup(self,
                name,
                namespace,
                persistent,
                persistent_engine=StoreType.LMDB):
        if namespace is None or name is None:
            raise ValueError("neither name nor namespace can be None")

        _type = to_pb_store_type(persistent_engine, persistent)

        storage_locator = storage_basic_pb2.StorageLocator(type=_type,
                                                           namespace=namespace,
                                                           name=name)
        _table = _DTable(storage_locator=storage_locator)

        self.destroy_all(_table)

        LOGGER.debug("cleaned up: %s", _table)
Esempio n. 9
0
 def __create_storage_locator(self, namespace, name, _type):
     return storage_basic_pb2.StorageLocator(namespace=namespace,
                                             name=name,
                                             type=_type)
Esempio n. 10
0
def _get_storage_locator(table):
    return storage_basic_pb2.StorageLocator(type=table._type,
                                            namespace=table._namespace,
                                            name=table._name,
                                            fragment=table._partitions)