Esempio n. 1
0
 def on_init(created: bool):
     if created:
         if asyncable is None:
             raise ValueError()
         with transaction_context(self._env, write=True) as (txn, _, _):
             self._status = 'submitted'
             self._result = None
             self._error = None
             self._created_timestamp = time.time_ns()
             self._start_timestamp = None
             self._end_timestamp = None
             self._pid = None
             self._node_uid = None
             self._asyncable = asyncable
             self._asyncable_uuid = asyncable.uuid
             self._args = args if args is not None else ()
             self._kwargs = kwargs if kwargs is not None else {}
             key1 = ':'.join([asyncable.uuid, self.name])
             key2 = ':'.join([self.name, asyncable.uuid])
             assert txn.put(key=key1.encode('utf-8'),
                            value=b'',
                            append=False)
             assert txn.put(key=key2.encode('utf-8'),
                            value=b'',
                            append=False)
             submit_queue = Queue(constants.SUBMIT_QUEUE_PATH,
                                  create=True)
             submit_queue.put(self)
Esempio n. 2
0
 def __setstate__(self, from_wire: Any):
     self._site_uuid, self._namespace, name = from_wire
     self._storage_path = get_storage_path(self._site_uuid)
     self._create = False
     self._encname = name.encode('utf-8')
     _, self._env, self._namedb, self._attrdb, self._versdb, self._descdb = \
     get_environment_threadsafe(
         self._storage_path,
         self._namespace,
         create = False
     )
     self._userdb = []
     with transaction_context(self._env, write=False) as (txn, _, _):
         self._uuid_bytes = txn.get(key=self._encname, db=self._namedb)
         self._uuid_bytes = bytes(self._uuid_bytes) \
         if isinstance(self._uuid_bytes, memoryview) else self._uuid_bytes
         if self._uuid_bytes is None:
             raise ObjectNotFoundError()
         result = txn.get(key=self._uuid_bytes, db=self._descdb)
         result = bytes(result) if isinstance(result,
                                              memoryview) else result
         descriptor = orjson.loads(result)
         self._versioned = descriptor['versioned']
     self.__bind_databases(descriptor=descriptor)
     self.__class__.__initialize_class__()
Esempio n. 3
0
def wait(*args):
    args = list(args)
    if not args:
        raise ValueError()
    if isinstance(args[-1], types.FunctionType):
        condition = args.pop()
    else:
        condition = lambda: True
    if not all(isinstance(arg, Entity) for arg in args):
        raise ValueError()
    if len(args) > 0:
        if [(arg.site_uuid, arg.namespace) for arg in args].count(
            (args[0].site_uuid, args[0].namespace)) != len(args):
            raise ValueError()
    versions = []
    for _ in polling_loop(
            getenv(constants.ADAPTER_POLLING_INTERVAL_ENVNAME, float)):
        if len(args) > 0:
            _, env, _, _, _, _ = get_environment_threadsafe(
                args[0].storage_path, args[0].namespace, create=False)
            with transaction_context(env, write=False):
                if not versions:
                    versions = [arg.version for arg in args]
                    if condition():
                        break
                else:
                    if any(versions[i] != arg.version
                           for i, arg in enumerate(args)):
                        if condition():
                            break
                        versions = [arg.version for arg in args]
        else:
            if condition():
                break
Esempio n. 4
0
 def set_content(self, value: Any):
     if not self._closed:
         raise ValueError()
     with transaction_context(self._env, write = True):
         metadata = self.metadata
         for key in reserved_metadata_keys:
             if key in metadata:
                 del metadata[key]
         if isinstance(value, str):
             self._size = len(value)
             self._content_binary = memoryview(value.encode(self.encoding))
             metadata['content-type'] = 'text/plain'
             metadata['content-encoding'] = self.encoding
         elif isinstance(value, (memoryview, bytes, bytearray)):
             self._size = len(value)
             self._content_binary = memoryview(value)
             metadata['content-type'] = 'application/octet-stream'
         elif isinstance(value, pd.DataFrame):
             self.__set_pandas_dataframe(value, metadata)
         elif isinstance(value, np.ndarray):
             self.__set_numpy_ndarray(value, metadata)
         else:
             pickled = pickle.dumps(value)
             self._size = len(pickled)
             self._content_binary = memoryview(pickled)
             metadata['content-type'] = 'application/python-pickle'
             metadata['content-properties'] = dict(
                 type = get_qualified_class_name(value)
             )
         metadata['last-modified'] = str(datetime.datetime.now())
         super(File, self.__class__).metadata.fset(self, metadata) # type: ignore
Esempio n. 5
0
 def __call__(self, *args, **kwargs) -> Optional[Any]:
     if 'sync' in kwargs:
         sync = bool(kwargs['sync'])
         del kwargs['sync']
     else:
         sync = self.__default_sync
     if self.__disable_sync or not sync:
         if self.__async_limit is not None:
             _, env, _, _, _, _ = get_environment_threadsafe(
                 self.storage_path, constants.TASK_NAMESPACE, create=True)
             count = 0
             with transaction_context(env, write=True):
                 for task in self.tasks():
                     if task.status in ['submitted', 'running']:
                         count += 1
                         if count == self.__async_limit:
                             return None
                 return Task(asyncable=self,
                             args=args,
                             kwargs=kwargs,
                             site_uuid=self.site_uuid,
                             create=True,
                             bind=False)
         else:
             return Task(asyncable=self,
                         args=args,
                         kwargs=kwargs,
                         site_uuid=self.site_uuid,
                         create=True,
                         bind=False)
     return self.invoke(args=args, kwargs=kwargs)
Esempio n. 6
0
 def get_content(
     self,
     *,
     zero_copy: bool = True
 ) -> Optional[Any]:
     if not self._closed:
         raise ValueError()
     with transaction_context(self._env, write = False):
         try:
             metadata = self.metadata
             return {
                 'application/octet-stream': lambda: self.__get_octet_stream(
                     zero_copy
                 ),
                 'application/python-pickle': lambda: pickle.loads(self._content_binary),
                 'application/python-pandas-dataframe': lambda: self.__get_pandas_dataframe(),
                 'application/python-numpy-ndarray': lambda: self.__get_numpy_ndarray(
                     self._content_binary,
                     metadata,
                     zero_copy
                 ),
                 'text/plain': lambda: codecs.decode(
                     self._content_binary, encoding = self.encoding
                 )
             }[metadata['content-type']]()
         except KeyError:
             return None
     raise ValueError()
Esempio n. 7
0
 def _save_buffer(self):
     with transaction_context(self._env, write=True):
         if isinstance(self._buffer, mmap.mmap):
             self._size = self._extent
             self._content_binary = memoryview(self._buffer)[0:self._extent]
         else:
             content = self._buffer.getvalue()
             self._size = len(content)
             self._content_binary = content.encode(self._encoding)
Esempio n. 8
0
def get_concurrency(*, site_uuid: Optional[str] = None) -> int:
    state = Dict(
        constants.CLUSTER_STATE_DICT_PATH, site_uuid = site_uuid,
        create = True, bind = True
    )
    _, env, _, _, _, _ = get_environment_threadsafe(
        state.storage_path, state.namespace, create = False
    )
    with transaction_context(env, write = True):
        if 'concurrency' not in state:
            state['concurrency'] = getenv(constants.CLUSTER_CONCURRENCY_ENVNAME, int)
    return state['concurrency']
Esempio n. 9
0
 def __create_or_bind(self,
                      *,
                      db_properties: List[LMDBProperties],
                      versioned: bool,
                      metadata: Dict[str, Any],
                      on_init: Optional[Callable[[bool], None]],
                      bind: bool = True):
     with transaction_context(self._env, write=True) as (txn, _, _):
         obj_uuid = txn.get(key=self._encname, db=self._namedb)
         if obj_uuid:
             if bind:
                 return self.__bind_or_create(db_properties=db_properties,
                                              versioned=versioned,
                                              metadata=metadata,
                                              on_init=on_init,
                                              create=False)
             raise ObjectExistsError()
         obj_uuid = uuid.uuid4().bytes
         assert txn.put(key=self._encname, value=obj_uuid, db=self._namedb)
         assert txn.put(key=obj_uuid,
                        value=struct.pack('@N', 0),
                        db=self._versdb)
         basename = str(uuid.uuid4())
         descriptor: Descriptor = dict(databases=list(
             zip([
                 create_string_digest(''.join([basename, str(i)]))
                 for i in range(len(db_properties))
             ], db_properties)),
                                       uuid=str(uuid.UUID(bytes=obj_uuid)),
                                       versioned=versioned,
                                       created=str(datetime.datetime.now()),
                                       type=get_qualified_class_name(self),
                                       metadata=metadata)
         assert txn.put(key=obj_uuid,
                        value=orjson.dumps(descriptor),
                        db=self._descdb)
         for dbuid, props in descriptor['databases']:
             self._userdb.append(
                 open_database_threadsafe(txn,
                                          self._env,
                                          dbuid,
                                          props,
                                          create=True))
         self._uuid_bytes = obj_uuid
         self._versioned = descriptor['versioned']
         if on_init:
             self._create = True
             on_init(True)
             self._create = False
         return None
Esempio n. 10
0
 def attributes(self) -> Iterator[str]:
     with transaction_context(self._env, write=False,
                              iterator=True) as (_, cursors, _):
         cursor = cursors[self._attrdb]
         if cursor.set_range(self._uuid_bytes):
             while True:
                 key = cursor.key()
                 key = bytes(key) if isinstance(key, memoryview) else key
                 if key.startswith(self._uuid_bytes):
                     key = key[len(self._uuid_bytes):]
                     yield self.decode_attr_key(key)
                     if cursor.next():
                         continue
                 return
Esempio n. 11
0
 def metadata(self, value: Dict[str, Any]):
     with transaction_context(self._env, write = True):
         metadata = {
             k: v
             for k, v in self.metadata.items()
             if k in reserved_metadata_keys
         }
         value = {
             k: v
             for k, v in value.items()
             if k not in reserved_metadata_keys
         }
         metadata.update(value)
         super(File, self.__class__).metadata.fset(self, metadata) # type: ignore
Esempio n. 12
0
 def cancel(self):
     if self._status not in ['running', 'submitted']:
         return
     node_uid = pid = None
     with transaction_context(self._env, write=True):
         status = self._status
         if status in ['submitted', 'running']:
             self._status = 'cancelled'
         if status == 'running':
             node_uid = self._node_uid
             pid = self._pid
             assert node_uid is not None and pid is not None
     if node_uid is not None:
         terminate_node(node_uid, pid)
Esempio n. 13
0
 def status(self) -> str:
     with transaction_context(self._env, write=False):
         status = self._status
         if status == 'running':
             node_uid = self._node_uid
             pid = self._pid
             assert node_uid is not None and pid is not None
             status = self._running_cache.get(
                 (node_uid, pid),
                 default = \
                 lambda key: 'running' if is_running(key[0], key[1]) else 'crashed'
             )
             if status == 'crashed':
                 self._running_cache.set((node_uid, pid), 'crashed', ttl=0)
         return status
Esempio n. 14
0
 def get(
     self,
     name: str
 ):
     _, env, name_db, _, _, descriptor_db = get_environment_threadsafe(
         self._storage_path, self._path, create = self._create
     )
     with transaction_context(env, write = False, iterator = True) as (_, cursors, _):
         obj = load_entity(
             name_db, descriptor_db, cursors, self._site_uuid,
             self._path, name = name
         )
         if obj is not None:
             return obj
         raise ObjectNotFoundError()
Esempio n. 15
0
 def on_init(created: bool):
     if created:
         self.__default_sync = default_sync if default_sync is not None else False
         self.__async_limit = async_limit if async_limit is not None else None
         self.__latest = None
         self.__disable_sync = disable_sync if disable_sync is not None else False
         load_target()
     else:
         with transaction_context(self._env, write=True):
             if default_sync is not None:
                 self.__default_sync = default_sync
             if disable_sync is not None:
                 self.__disable_sync = disable_sync
             if async_limit is not None:
                 self.__async_limit = async_limit
             load_target()
Esempio n. 16
0
 def get_snapshot(self) -> \
 typing.Dict[int, typing.Dict[str, Union[float, Optional[str]]]]:
     with transaction_context(self._env, write=True):
         active_pids = []
         recorded_pids = list(self.keys())
         for proc in psutil.process_iter(['create_time', 'pid']):
             try:
                 pid = proc.info['pid']
                 active_pids.append(pid)
                 if pid in recorded_pids:
                     create_time = self[pid]['create_time']
                     if create_time < proc.info['create_time']:
                         del self[pid]
             except (psutil.NoSuchProcess, psutil.AccessDenied):
                 pass
         for pid in set(recorded_pids).difference(set(active_pids)):
             del self[pid]
         return dict(self)
Esempio n. 17
0
 def __bind_databases(self,
                      *,
                      descriptor: Descriptor,
                      on_init: Optional[Callable[[bool], None]] = None):
     for dbuid, _ in descriptor['databases']:
         self._userdb.append(get_database_threadsafe(dbuid))
     if any(db is None for db in self._userdb):
         with transaction_context(self._env, write=True) as (txn, _, _):
             for index, (dbuid,
                         properties) in enumerate(descriptor['databases']):
                 if not self._userdb[index]:
                     self._userdb[index] = open_database_threadsafe(
                         txn, self._env, dbuid, properties, create=False)
             if on_init:
                 on_init(False)
     else:
         if on_init:
             on_init(False)
Esempio n. 18
0
    def is_scheduled(self) -> bool:

        with transaction_context(self._env, write=True):

            now_ns = time.time_ns()

            if self.__count == self.__max_times:
                return False

            assert self.__frequency is not None and self.__period is not None
            interval_ns = get_interval(self.__frequency, self.__period)

            def get_next_ns(start_ns):
                relative_ns = now_ns - start_ns
                offset_ns = relative_ns % interval_ns
                return now_ns + (interval_ns - offset_ns)

            if self.__last_run_ns is None:
                if self.__start_ns is None or self.__start_ns <= now_ns:
                    self.__count += 1
                    self.__last_run_ns = now_ns
                    if self.__count < self.__max_times:
                        if self.__start_ns is None:
                            self.__start_ns = now_ns
                        else:
                            self.__start_ns = self.__start_ns
                        self.__next_run_ns = get_next_ns(self.__start_ns)
                    return True
                return False

            assert isinstance(self.__next_run_ns, int)
            if self.__next_run_ns <= now_ns:
                self.__count += 1
                self.__last_run_ns = now_ns
                if self.__count < self.__max_times:
                    self.__next_run_ns = get_next_ns(self.__start_ns)
                else:
                    self.__next_run_ns = None
                return True
            return False
Esempio n. 19
0
 def drop(self):
     node_uid = pid = None
     with transaction_context(self._env, write=True) as (txn, _, _):
         if self._status == 'running':
             node_uid = self._node_uid
             pid = self._pid
             assert node_uid is not None and pid is not None
         cursor = txn.cursor()
         assert cursor.set_range(self.name.encode('utf-8'))
         key_bytes = cursor.key()
         key_bytes = bytes(key_bytes) if isinstance(
             key_bytes, memoryview) else key_bytes
         key = key_bytes.decode('utf-8')
         assert key.startswith(self.name) and ':' in key
         asyncable_uuid = key.split(':')[1]
         assert txn.delete(
             key=':'.join([asyncable_uuid, self.name]).encode('utf-8'))
         assert txn.delete(
             key=':'.join([self.name, asyncable_uuid]).encode('utf-8'))
         super().drop()
     if node_uid is not None:
         terminate_node(node_uid, pid)
Esempio n. 20
0
 def tasks(self) -> Iterator[Task]:
     _, env, _, _, _, _ = get_environment_threadsafe(
         self.storage_path, constants.TASK_NAMESPACE, create=True)
     with transaction_context(env, write=False) as (txn, _, _):
         cursor = txn.cursor()
         namespace = Namespace(constants.TASK_NAMESPACE,
                               site_uuid=self.site_uuid)
         if cursor.set_range(self.uuid.encode('utf-8')):
             while True:
                 key_bytes = cursor.key()
                 key_bytes = bytes(key_bytes) if isinstance(
                     key_bytes, memoryview) else key_bytes
                 key = key_bytes.decode('utf-8')
                 if key.startswith(self.uuid):
                     name = key.split(':')[1]
                     try:
                         entity = namespace.get(name)
                         if isinstance(entity, Task):
                             yield entity
                     except (KeyError, ObjectNotFoundError):
                         pass
                     if cursor.next():
                         continue
                 break
Esempio n. 21
0
 def __bind_or_create(self,
                      *,
                      db_properties: List[LMDBProperties],
                      versioned: bool,
                      metadata: Dict[str, Any],
                      on_init: Optional[Callable[[bool], None]] = None,
                      create: bool = True):
     with transaction_context(self._env, write=False) as (txn, _, _):
         result = txn.get(key=self._encname, db=self._namedb)
         if result:
             obj_uuid = bytes(result) if isinstance(result,
                                                    memoryview) else result
             result = txn.get(key=obj_uuid, db=self._descdb)
             result = bytes(result) if isinstance(result,
                                                  memoryview) else result
             descriptor = orjson.loads(result)
             my_class_name = get_qualified_class_name(self)
             if descriptor['type'] != my_class_name:
                 try:
                     if my_class_name not in \
                     get_qualified_base_names(create_class(descriptor['type'])):
                         raise TypeError()
                 except AttributeError as exc:
                     if my_class_name != 'parkit.storage.entity.Entity':
                         raise TypeError() from exc
             self._uuid_bytes = obj_uuid
             self._versioned = descriptor['versioned']
             self.__bind_databases(descriptor=descriptor, on_init=on_init)
             return None
     if create:
         return self.__create_or_bind(db_properties=db_properties,
                                      versioned=versioned,
                                      metadata=metadata,
                                      on_init=on_init,
                                      bind=True)
     raise ObjectNotFoundError()
Esempio n. 22
0
            if thread.local.default_site is not None:
                storage_path, _ = thread.local.default_site
            else:
                raise SiteNotSpecifiedError()
    elif isinstance(obj, Namespace):
        namespace = obj.path
        storage_path = obj.storage_path
    elif isinstance(obj, Entity):
        namespace = obj.namespace
        storage_path = obj.storage_path
    else:
        raise ValueError()
    _, env, _, _, _, _ = get_environment_threadsafe(storage_path,
                                                    namespace,
                                                    create=False)
    return transaction_context(env, write=True)


def snapshot(obj: Optional[Union[str, Namespace, Entity]] = None,
             /,
             *,
             site_uuid: Optional[str] = None) -> ContextManager:
    if obj is None or isinstance(obj, str):
        namespace = resolve_namespace(obj)
        if site_uuid is not None:
            storage_path = get_storage_path(site_uuid)
        else:
            if thread.local.default_site is not None:
                storage_path, _ = thread.local.default_site
            else:
                raise SiteNotSpecifiedError()
Esempio n. 23
0
        include_hidden: Optional[bool] = None
    ) -> Iterator[Tuple[str, Dict[str, Any]]]:
        for name, descriptor in self.descriptors(
            include_hidden = include_hidden if include_hidden is not None else self._include_hidden
        ):
            yield (name, descriptor['metadata'])

    def descriptors(
        self,
        /, *,
        include_hidden: Optional[bool] = None
    ) -> Iterator[Tuple[str, Descriptor]]:
        _, env, name_db, _, _, descriptor_db = get_environment_threadsafe(
            self._storage_path, self._path, create = self._create
        )
        with transaction_context(env, write = False, iterator = True) as (_, cursors, _):
            return descriptor_iter(
                name_db,
                descriptor_db,
                cursors,
                include_hidden = include_hidden \
                if include_hidden is not None else self._include_hidden
            )

    def names(
        self,
        /, *,
        include_hidden: Optional[bool] = None
    ) -> Iterator[str]:
        _, env, name_db, _, _, _ = get_environment_threadsafe(
            self._storage_path, self._path, create = self._create
Esempio n. 24
0
 def next_counter_value(self):
     with transaction_context(self._env, write=True):
         value = self.__counter
         self.__counter += 1
         return value
Esempio n. 25
0
        polling_interval = getenv(constants.WORKER_POLLING_INTERVAL_ENVNAME,
                                  float)

        for i in polling_loop(polling_interval):
            while True:
                try:
                    if len(termination_queue):
                        _ = termination_queue.get()
                        logger.info('worker (%s) terminating on request',
                                    node_uid)
                        sys.exit(0)
                except queue.Empty:
                    pass
                try:
                    if len(submit_queue):
                        with transaction_context(environment, write=True):
                            task = submit_queue.get()
                            if task._status == 'submitted':
                                task._status = 'running'
                                task._pid = os.getpid()
                                task._node_uid = node_uid
                                task._start_timestamp = time.time_ns()
                            else:
                                continue
                    else:
                        break
                except queue.Empty:
                    break
                try:
                    result = error = None
                    setenv(constants.SELF_ENVNAME,
Esempio n. 26
0
# pylint: disable = protected-access
import logging

from typing import (Any, Iterator)

from parkit.adapters.array import Array
from parkit.storage.context import transaction_context
from parkit.storage.wait import wait

logger = logging.getLogger(__name__)


def stream(source: Array, /, *, batch: bool = False) -> Iterator[Any]:

    with transaction_context(source._env, write=False):
        version = source.version
        index = len(source)

    while True:
        wait(source, lambda: source.version > version)
        with transaction_context(source._env, write=False):
            cache = []
            n_new_entries = source.version - version
            if source.maxsize is None:
                for _ in range(n_new_entries):
                    if batch:
                        cache.append(source[index])
                    else:
                        yield source[index]
                    index += 1
                if batch and cache:
Esempio n. 27
0
        assert isinstance(spec.loader, importlib.abc.Loader)
        module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(module)
        logger.info('reloaded %s.%s on pid %i', module_name, function_name,
                    os.getpid())
        if isinstance(getattr(module, function_name), Asyncable):
            return getattr(module, function_name).function
        return getattr(module, function_name)

    def invoke(self,
               /,
               *,
               args: Optional[Tuple[Any, ...]] = None,
               kwargs: Optional[Dict[str, Any]] = None) -> Any:
        assert self.__latest is not None
        with transaction_context(self._env, write=False):
            target_digest, _ = self.__latest
            if target_digest.startswith('bytecode'):
                target = self.__bytecode_cache(target_digest)
            else:
                target = self.__module_cache(target_digest)
        args = () if args is None else args
        kwargs = {} if kwargs is None else kwargs
        return target(*args, **kwargs)

    @property
    def function(self) -> Optional[Callable[..., Any]]:
        return self._target_function

    def tasks(self) -> Iterator[Task]:
        _, env, _, _, _, _ = get_environment_threadsafe(