def on_init(created: bool): if created: if asyncable is None: raise ValueError() with transaction_context(self._env, write=True) as (txn, _, _): self._status = 'submitted' self._result = None self._error = None self._created_timestamp = time.time_ns() self._start_timestamp = None self._end_timestamp = None self._pid = None self._node_uid = None self._asyncable = asyncable self._asyncable_uuid = asyncable.uuid self._args = args if args is not None else () self._kwargs = kwargs if kwargs is not None else {} key1 = ':'.join([asyncable.uuid, self.name]) key2 = ':'.join([self.name, asyncable.uuid]) assert txn.put(key=key1.encode('utf-8'), value=b'', append=False) assert txn.put(key=key2.encode('utf-8'), value=b'', append=False) submit_queue = Queue(constants.SUBMIT_QUEUE_PATH, create=True) submit_queue.put(self)
def __setstate__(self, from_wire: Any): self._site_uuid, self._namespace, name = from_wire self._storage_path = get_storage_path(self._site_uuid) self._create = False self._encname = name.encode('utf-8') _, self._env, self._namedb, self._attrdb, self._versdb, self._descdb = \ get_environment_threadsafe( self._storage_path, self._namespace, create = False ) self._userdb = [] with transaction_context(self._env, write=False) as (txn, _, _): self._uuid_bytes = txn.get(key=self._encname, db=self._namedb) self._uuid_bytes = bytes(self._uuid_bytes) \ if isinstance(self._uuid_bytes, memoryview) else self._uuid_bytes if self._uuid_bytes is None: raise ObjectNotFoundError() result = txn.get(key=self._uuid_bytes, db=self._descdb) result = bytes(result) if isinstance(result, memoryview) else result descriptor = orjson.loads(result) self._versioned = descriptor['versioned'] self.__bind_databases(descriptor=descriptor) self.__class__.__initialize_class__()
def wait(*args): args = list(args) if not args: raise ValueError() if isinstance(args[-1], types.FunctionType): condition = args.pop() else: condition = lambda: True if not all(isinstance(arg, Entity) for arg in args): raise ValueError() if len(args) > 0: if [(arg.site_uuid, arg.namespace) for arg in args].count( (args[0].site_uuid, args[0].namespace)) != len(args): raise ValueError() versions = [] for _ in polling_loop( getenv(constants.ADAPTER_POLLING_INTERVAL_ENVNAME, float)): if len(args) > 0: _, env, _, _, _, _ = get_environment_threadsafe( args[0].storage_path, args[0].namespace, create=False) with transaction_context(env, write=False): if not versions: versions = [arg.version for arg in args] if condition(): break else: if any(versions[i] != arg.version for i, arg in enumerate(args)): if condition(): break versions = [arg.version for arg in args] else: if condition(): break
def set_content(self, value: Any): if not self._closed: raise ValueError() with transaction_context(self._env, write = True): metadata = self.metadata for key in reserved_metadata_keys: if key in metadata: del metadata[key] if isinstance(value, str): self._size = len(value) self._content_binary = memoryview(value.encode(self.encoding)) metadata['content-type'] = 'text/plain' metadata['content-encoding'] = self.encoding elif isinstance(value, (memoryview, bytes, bytearray)): self._size = len(value) self._content_binary = memoryview(value) metadata['content-type'] = 'application/octet-stream' elif isinstance(value, pd.DataFrame): self.__set_pandas_dataframe(value, metadata) elif isinstance(value, np.ndarray): self.__set_numpy_ndarray(value, metadata) else: pickled = pickle.dumps(value) self._size = len(pickled) self._content_binary = memoryview(pickled) metadata['content-type'] = 'application/python-pickle' metadata['content-properties'] = dict( type = get_qualified_class_name(value) ) metadata['last-modified'] = str(datetime.datetime.now()) super(File, self.__class__).metadata.fset(self, metadata) # type: ignore
def __call__(self, *args, **kwargs) -> Optional[Any]: if 'sync' in kwargs: sync = bool(kwargs['sync']) del kwargs['sync'] else: sync = self.__default_sync if self.__disable_sync or not sync: if self.__async_limit is not None: _, env, _, _, _, _ = get_environment_threadsafe( self.storage_path, constants.TASK_NAMESPACE, create=True) count = 0 with transaction_context(env, write=True): for task in self.tasks(): if task.status in ['submitted', 'running']: count += 1 if count == self.__async_limit: return None return Task(asyncable=self, args=args, kwargs=kwargs, site_uuid=self.site_uuid, create=True, bind=False) else: return Task(asyncable=self, args=args, kwargs=kwargs, site_uuid=self.site_uuid, create=True, bind=False) return self.invoke(args=args, kwargs=kwargs)
def get_content( self, *, zero_copy: bool = True ) -> Optional[Any]: if not self._closed: raise ValueError() with transaction_context(self._env, write = False): try: metadata = self.metadata return { 'application/octet-stream': lambda: self.__get_octet_stream( zero_copy ), 'application/python-pickle': lambda: pickle.loads(self._content_binary), 'application/python-pandas-dataframe': lambda: self.__get_pandas_dataframe(), 'application/python-numpy-ndarray': lambda: self.__get_numpy_ndarray( self._content_binary, metadata, zero_copy ), 'text/plain': lambda: codecs.decode( self._content_binary, encoding = self.encoding ) }[metadata['content-type']]() except KeyError: return None raise ValueError()
def _save_buffer(self): with transaction_context(self._env, write=True): if isinstance(self._buffer, mmap.mmap): self._size = self._extent self._content_binary = memoryview(self._buffer)[0:self._extent] else: content = self._buffer.getvalue() self._size = len(content) self._content_binary = content.encode(self._encoding)
def get_concurrency(*, site_uuid: Optional[str] = None) -> int: state = Dict( constants.CLUSTER_STATE_DICT_PATH, site_uuid = site_uuid, create = True, bind = True ) _, env, _, _, _, _ = get_environment_threadsafe( state.storage_path, state.namespace, create = False ) with transaction_context(env, write = True): if 'concurrency' not in state: state['concurrency'] = getenv(constants.CLUSTER_CONCURRENCY_ENVNAME, int) return state['concurrency']
def __create_or_bind(self, *, db_properties: List[LMDBProperties], versioned: bool, metadata: Dict[str, Any], on_init: Optional[Callable[[bool], None]], bind: bool = True): with transaction_context(self._env, write=True) as (txn, _, _): obj_uuid = txn.get(key=self._encname, db=self._namedb) if obj_uuid: if bind: return self.__bind_or_create(db_properties=db_properties, versioned=versioned, metadata=metadata, on_init=on_init, create=False) raise ObjectExistsError() obj_uuid = uuid.uuid4().bytes assert txn.put(key=self._encname, value=obj_uuid, db=self._namedb) assert txn.put(key=obj_uuid, value=struct.pack('@N', 0), db=self._versdb) basename = str(uuid.uuid4()) descriptor: Descriptor = dict(databases=list( zip([ create_string_digest(''.join([basename, str(i)])) for i in range(len(db_properties)) ], db_properties)), uuid=str(uuid.UUID(bytes=obj_uuid)), versioned=versioned, created=str(datetime.datetime.now()), type=get_qualified_class_name(self), metadata=metadata) assert txn.put(key=obj_uuid, value=orjson.dumps(descriptor), db=self._descdb) for dbuid, props in descriptor['databases']: self._userdb.append( open_database_threadsafe(txn, self._env, dbuid, props, create=True)) self._uuid_bytes = obj_uuid self._versioned = descriptor['versioned'] if on_init: self._create = True on_init(True) self._create = False return None
def attributes(self) -> Iterator[str]: with transaction_context(self._env, write=False, iterator=True) as (_, cursors, _): cursor = cursors[self._attrdb] if cursor.set_range(self._uuid_bytes): while True: key = cursor.key() key = bytes(key) if isinstance(key, memoryview) else key if key.startswith(self._uuid_bytes): key = key[len(self._uuid_bytes):] yield self.decode_attr_key(key) if cursor.next(): continue return
def metadata(self, value: Dict[str, Any]): with transaction_context(self._env, write = True): metadata = { k: v for k, v in self.metadata.items() if k in reserved_metadata_keys } value = { k: v for k, v in value.items() if k not in reserved_metadata_keys } metadata.update(value) super(File, self.__class__).metadata.fset(self, metadata) # type: ignore
def cancel(self): if self._status not in ['running', 'submitted']: return node_uid = pid = None with transaction_context(self._env, write=True): status = self._status if status in ['submitted', 'running']: self._status = 'cancelled' if status == 'running': node_uid = self._node_uid pid = self._pid assert node_uid is not None and pid is not None if node_uid is not None: terminate_node(node_uid, pid)
def status(self) -> str: with transaction_context(self._env, write=False): status = self._status if status == 'running': node_uid = self._node_uid pid = self._pid assert node_uid is not None and pid is not None status = self._running_cache.get( (node_uid, pid), default = \ lambda key: 'running' if is_running(key[0], key[1]) else 'crashed' ) if status == 'crashed': self._running_cache.set((node_uid, pid), 'crashed', ttl=0) return status
def get( self, name: str ): _, env, name_db, _, _, descriptor_db = get_environment_threadsafe( self._storage_path, self._path, create = self._create ) with transaction_context(env, write = False, iterator = True) as (_, cursors, _): obj = load_entity( name_db, descriptor_db, cursors, self._site_uuid, self._path, name = name ) if obj is not None: return obj raise ObjectNotFoundError()
def on_init(created: bool): if created: self.__default_sync = default_sync if default_sync is not None else False self.__async_limit = async_limit if async_limit is not None else None self.__latest = None self.__disable_sync = disable_sync if disable_sync is not None else False load_target() else: with transaction_context(self._env, write=True): if default_sync is not None: self.__default_sync = default_sync if disable_sync is not None: self.__disable_sync = disable_sync if async_limit is not None: self.__async_limit = async_limit load_target()
def get_snapshot(self) -> \ typing.Dict[int, typing.Dict[str, Union[float, Optional[str]]]]: with transaction_context(self._env, write=True): active_pids = [] recorded_pids = list(self.keys()) for proc in psutil.process_iter(['create_time', 'pid']): try: pid = proc.info['pid'] active_pids.append(pid) if pid in recorded_pids: create_time = self[pid]['create_time'] if create_time < proc.info['create_time']: del self[pid] except (psutil.NoSuchProcess, psutil.AccessDenied): pass for pid in set(recorded_pids).difference(set(active_pids)): del self[pid] return dict(self)
def __bind_databases(self, *, descriptor: Descriptor, on_init: Optional[Callable[[bool], None]] = None): for dbuid, _ in descriptor['databases']: self._userdb.append(get_database_threadsafe(dbuid)) if any(db is None for db in self._userdb): with transaction_context(self._env, write=True) as (txn, _, _): for index, (dbuid, properties) in enumerate(descriptor['databases']): if not self._userdb[index]: self._userdb[index] = open_database_threadsafe( txn, self._env, dbuid, properties, create=False) if on_init: on_init(False) else: if on_init: on_init(False)
def is_scheduled(self) -> bool: with transaction_context(self._env, write=True): now_ns = time.time_ns() if self.__count == self.__max_times: return False assert self.__frequency is not None and self.__period is not None interval_ns = get_interval(self.__frequency, self.__period) def get_next_ns(start_ns): relative_ns = now_ns - start_ns offset_ns = relative_ns % interval_ns return now_ns + (interval_ns - offset_ns) if self.__last_run_ns is None: if self.__start_ns is None or self.__start_ns <= now_ns: self.__count += 1 self.__last_run_ns = now_ns if self.__count < self.__max_times: if self.__start_ns is None: self.__start_ns = now_ns else: self.__start_ns = self.__start_ns self.__next_run_ns = get_next_ns(self.__start_ns) return True return False assert isinstance(self.__next_run_ns, int) if self.__next_run_ns <= now_ns: self.__count += 1 self.__last_run_ns = now_ns if self.__count < self.__max_times: self.__next_run_ns = get_next_ns(self.__start_ns) else: self.__next_run_ns = None return True return False
def drop(self): node_uid = pid = None with transaction_context(self._env, write=True) as (txn, _, _): if self._status == 'running': node_uid = self._node_uid pid = self._pid assert node_uid is not None and pid is not None cursor = txn.cursor() assert cursor.set_range(self.name.encode('utf-8')) key_bytes = cursor.key() key_bytes = bytes(key_bytes) if isinstance( key_bytes, memoryview) else key_bytes key = key_bytes.decode('utf-8') assert key.startswith(self.name) and ':' in key asyncable_uuid = key.split(':')[1] assert txn.delete( key=':'.join([asyncable_uuid, self.name]).encode('utf-8')) assert txn.delete( key=':'.join([self.name, asyncable_uuid]).encode('utf-8')) super().drop() if node_uid is not None: terminate_node(node_uid, pid)
def tasks(self) -> Iterator[Task]: _, env, _, _, _, _ = get_environment_threadsafe( self.storage_path, constants.TASK_NAMESPACE, create=True) with transaction_context(env, write=False) as (txn, _, _): cursor = txn.cursor() namespace = Namespace(constants.TASK_NAMESPACE, site_uuid=self.site_uuid) if cursor.set_range(self.uuid.encode('utf-8')): while True: key_bytes = cursor.key() key_bytes = bytes(key_bytes) if isinstance( key_bytes, memoryview) else key_bytes key = key_bytes.decode('utf-8') if key.startswith(self.uuid): name = key.split(':')[1] try: entity = namespace.get(name) if isinstance(entity, Task): yield entity except (KeyError, ObjectNotFoundError): pass if cursor.next(): continue break
def __bind_or_create(self, *, db_properties: List[LMDBProperties], versioned: bool, metadata: Dict[str, Any], on_init: Optional[Callable[[bool], None]] = None, create: bool = True): with transaction_context(self._env, write=False) as (txn, _, _): result = txn.get(key=self._encname, db=self._namedb) if result: obj_uuid = bytes(result) if isinstance(result, memoryview) else result result = txn.get(key=obj_uuid, db=self._descdb) result = bytes(result) if isinstance(result, memoryview) else result descriptor = orjson.loads(result) my_class_name = get_qualified_class_name(self) if descriptor['type'] != my_class_name: try: if my_class_name not in \ get_qualified_base_names(create_class(descriptor['type'])): raise TypeError() except AttributeError as exc: if my_class_name != 'parkit.storage.entity.Entity': raise TypeError() from exc self._uuid_bytes = obj_uuid self._versioned = descriptor['versioned'] self.__bind_databases(descriptor=descriptor, on_init=on_init) return None if create: return self.__create_or_bind(db_properties=db_properties, versioned=versioned, metadata=metadata, on_init=on_init, bind=True) raise ObjectNotFoundError()
if thread.local.default_site is not None: storage_path, _ = thread.local.default_site else: raise SiteNotSpecifiedError() elif isinstance(obj, Namespace): namespace = obj.path storage_path = obj.storage_path elif isinstance(obj, Entity): namespace = obj.namespace storage_path = obj.storage_path else: raise ValueError() _, env, _, _, _, _ = get_environment_threadsafe(storage_path, namespace, create=False) return transaction_context(env, write=True) def snapshot(obj: Optional[Union[str, Namespace, Entity]] = None, /, *, site_uuid: Optional[str] = None) -> ContextManager: if obj is None or isinstance(obj, str): namespace = resolve_namespace(obj) if site_uuid is not None: storage_path = get_storage_path(site_uuid) else: if thread.local.default_site is not None: storage_path, _ = thread.local.default_site else: raise SiteNotSpecifiedError()
include_hidden: Optional[bool] = None ) -> Iterator[Tuple[str, Dict[str, Any]]]: for name, descriptor in self.descriptors( include_hidden = include_hidden if include_hidden is not None else self._include_hidden ): yield (name, descriptor['metadata']) def descriptors( self, /, *, include_hidden: Optional[bool] = None ) -> Iterator[Tuple[str, Descriptor]]: _, env, name_db, _, _, descriptor_db = get_environment_threadsafe( self._storage_path, self._path, create = self._create ) with transaction_context(env, write = False, iterator = True) as (_, cursors, _): return descriptor_iter( name_db, descriptor_db, cursors, include_hidden = include_hidden \ if include_hidden is not None else self._include_hidden ) def names( self, /, *, include_hidden: Optional[bool] = None ) -> Iterator[str]: _, env, name_db, _, _, _ = get_environment_threadsafe( self._storage_path, self._path, create = self._create
def next_counter_value(self): with transaction_context(self._env, write=True): value = self.__counter self.__counter += 1 return value
polling_interval = getenv(constants.WORKER_POLLING_INTERVAL_ENVNAME, float) for i in polling_loop(polling_interval): while True: try: if len(termination_queue): _ = termination_queue.get() logger.info('worker (%s) terminating on request', node_uid) sys.exit(0) except queue.Empty: pass try: if len(submit_queue): with transaction_context(environment, write=True): task = submit_queue.get() if task._status == 'submitted': task._status = 'running' task._pid = os.getpid() task._node_uid = node_uid task._start_timestamp = time.time_ns() else: continue else: break except queue.Empty: break try: result = error = None setenv(constants.SELF_ENVNAME,
# pylint: disable = protected-access import logging from typing import (Any, Iterator) from parkit.adapters.array import Array from parkit.storage.context import transaction_context from parkit.storage.wait import wait logger = logging.getLogger(__name__) def stream(source: Array, /, *, batch: bool = False) -> Iterator[Any]: with transaction_context(source._env, write=False): version = source.version index = len(source) while True: wait(source, lambda: source.version > version) with transaction_context(source._env, write=False): cache = [] n_new_entries = source.version - version if source.maxsize is None: for _ in range(n_new_entries): if batch: cache.append(source[index]) else: yield source[index] index += 1 if batch and cache:
assert isinstance(spec.loader, importlib.abc.Loader) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) logger.info('reloaded %s.%s on pid %i', module_name, function_name, os.getpid()) if isinstance(getattr(module, function_name), Asyncable): return getattr(module, function_name).function return getattr(module, function_name) def invoke(self, /, *, args: Optional[Tuple[Any, ...]] = None, kwargs: Optional[Dict[str, Any]] = None) -> Any: assert self.__latest is not None with transaction_context(self._env, write=False): target_digest, _ = self.__latest if target_digest.startswith('bytecode'): target = self.__bytecode_cache(target_digest) else: target = self.__module_cache(target_digest) args = () if args is None else args kwargs = {} if kwargs is None else kwargs return target(*args, **kwargs) @property def function(self) -> Optional[Callable[..., Any]]: return self._target_function def tasks(self) -> Iterator[Task]: _, env, _, _, _, _ = get_environment_threadsafe(