class MapImageExporter(MapExporter): def __init__(self, nodes, ways, min_lat, max_lat, min_lon, max_lon, *args, node_color=(0, 0, 0), way_color="allrandom", bg_color="white", enlargement=50000): """Export map data (nodes and ways) as a map like image. Params: nodes - The raw nodes as read by any OSM file reader ways - The raw ways as read by any OSM file reader min_lat - The southern border of the map max_lat - The northern border of the map min_lon - The western border of the map max_lon - The eastern border of the map node_color - The colour of the nodes in the image way_color - The colour of the ways in the image bg_color - The colour of the image background enlargement - Multiplication factor from map coordinate to pixel coordinate. Determines image size. """ super(MapImageExporter, self).__init__(min_lat, max_lat, min_lon, max_lon, bg_color, enlargement) self.logger = logging.getLogger('.'.join((__name__, type(self).__name__))) self.nodes = WeakValueDictionary(nodes) self.ways = WeakValueDictionary(ways) self.node_color = node_color self.way_color = way_color def export(self, filename="export.png"): """Export the information to an image file Params: filename - The filename to export to, must have a valid image extention. Default: export.png """ self.logger.info('Exporting a map image to %s', filename) # Draw all ways self.logger.info('Drawing the ways') for id, way in self.ways.items(): coords = [ ((self.nodes[node].lon - self.min_lon) * self.enlargement, (self.nodes[node].lat - self.min_lat) * self.enlargement) for node in way.nodes] self.draw.line(coords, fill=self.way_color) # draw all nodes as points self.logger.info('Drawing the nodes') for id, node in self.nodes.items(): self.draw.point( ((node.lon - self.min_lon) * self.enlargement, (node.lat - self.min_lat) * self.enlargement), fill=self.node_color) self._save_image(filename)
class ParserManager(object): def __init__(self): self._companies = {} self._parsers = WeakValueDictionary() def register_parser(self, company, new_parser): """ Register the new parser :param Company company: The new company :param Parser new_parser: The new parser """ if not isinstance(company, Company): raise TypeError('The company must be Company!') if not issubclass(new_parser, Parser): raise TypeError('The new_parser must be Parser!') self._companies[company.code] = company self._parsers[company.code] = new_parser def __getitem__(self, company_code): return self._companies[company_code], self._parsers[company_code] def __iter__(self): """ Registered parsers and companies :return: The list of company object and parser object :rtype: dict """ for k, v in self._parsers.items(): yield (self._companies[k], v) def __len__(self): return len(self._parsers)
class Signal: """A signal and slots implementation. Only methods may be used as slots. Two equivalent methods with the same function and object can't be added at the same time. The order in which the methods will be called is undefined. """ def __init__(self): self._dict = WeakValueDictionary() def __call__(self, *args, **kwargs): """Call all connected methods with the given arguments.""" for key, obj in self._dict.items(): key[0](obj, *args, **kwargs) def __len__(self): """Return the number of connected methods.""" return len(self._dict) def connect(self, method): """Connect a method to this slot.""" key = (method.__func__, id(method.__self__)) self._dict[key] = method.__self__ def disconnect(self, method): """Disconnect a method from this slot.""" key = (method.__func__, id(method.__self__)) if key in self._dict: del self._dict[key]
class StorePool(object): """ Store pool that maintains a single store per request context. """ def __init__(self): self._local = threading.local() self._all_stores = WeakValueDictionary() self._databases = {} self.uris = {} def add(self, name, uri): if not isinstance(uri, URI): uri = URI(uri) self.uris.setdefault(name, uri) self._databases.setdefault(name, create_database(uri)) def getstore(self, name, fresh=False): try: stores = self._local.stores except AttributeError: stores = self._local.stores = WeakValueDictionary() if fresh: return self._getstore_fresh(name) try: return stores[name] except KeyError: return stores.setdefault(name, self._getstore_fresh(name)) def _getstore_fresh(self, name): """ Return a fresh store object """ store = Store(self._databases[name]) self._all_stores[id(store)] = store return store def disconnect(self): """ Disconnect all stores. Any pending transactions will be rolled back and the stores' connections closed. Attempts to use objects bound to a store will raise an exception. Subsequent calls to ``getstore`` will return a fresh store object """ self._local = threading.local() for key, store in self._all_stores.items(): del self._all_stores[key] store.rollback() store.close() def __repr__(self): return "<%s %r, active=%d>" % (self.__class__.__name__, self.dsn, len(self._all_stores))
class ObjectPool(object): """ This class allows to fetch mvc model objects using their UUID. This requires to model to have a property called "uuid". All class inheriting from the base 'Model' class will have this. If implementing a custom model, the UUID property is responsible for the removal and addition to the pool when it changes values. Also see the UUIDProperty descriptor for an example implementation. We can use this to store complex relations between objects where references to each other can be replaced with the UUID. For a multi-threaded version see ThreadedObjectPool. """ def __init__(self, *args, **kwargs): object.__init__(self) self._objects = WeakValueDictionary() def add_or_get_object(self, obj): try: self.add_object(obj, force=False, silent=False) return obj except KeyError: return self.get_object(obj.uuid) def add_object(self, obj, force=False, fail_on_duplicate=False): if not obj.uuid in self._objects or force: self._objects[obj.uuid] = obj elif fail_on_duplicate: raise KeyError( "UUID %s is already taken by another object %s, cannot add object %s" % (obj.uuid, self._objects[obj.uuid], obj)) else: # Just change the objects uuid, will break refs, but # it prevents issues with inherited properties etc. logger.warning( "A duplicate UUID was passed to an ObjectPool for a %s object." % obj) obj.uuid = get_new_uuid() def change_all_uuids(self): # first get a copy off all uuids & objects: items = list(self._objects.items()) for uuid, obj in items: # @UnusedVariable obj.uuid = get_new_uuid() def remove_object(self, obj): if obj.uuid in self._objects and self._objects[obj.uuid] == obj: del self._objects[obj.uuid] def get_object(self, uuid): obj = self._objects.get(uuid, None) return obj def clear(self): self._objects.clear()
class ObjectPool(object): """ This class allows to fetch mvc model objects using their UUID. This requires to model to have a property called "uuid". All class inheriting from the base 'Model' class will have this. If implementing a custom model, the UUID property is responsible for the removal and addition to the pool when it changes values. Also see the UUIDPropIntel class for an example implementation. We can use this to store complex relations between objects where references to each other can be replaced with the UUID. For a multi-threaded version see ThreadedObjectPool. """ def __init__(self, *args, **kwargs): object.__init__(self) self._objects = WeakValueDictionary() def add_or_get_object(self, obj): try: self.add_object(obj, force=False, silent=False) return obj except KeyError: return self.get_object(obj.uuid) def add_object(self, obj, force=False, fail_on_duplicate=False): if not obj.uuid in self._objects or force: self._objects[obj.uuid] = obj elif fail_on_duplicate: raise KeyError, "UUID %s is already taken by another object %s, cannot add object %s" % (obj.uuid, self._objects[obj.uuid], obj) else: # Just change the objects uuid, will break refs, but # it prevents issues with inherited properties etc. logger.warning("A duplicate UUID was passed to an ObjectPool for a %s object." % obj) obj.uuid = get_new_uuid() def change_all_uuids(self): # first get a copy off all uuids & objects: items = self._objects.items() for uuid, obj in items: # @UnusedVariable obj.uuid = get_new_uuid() def remove_object(self, obj): if obj.uuid in self._objects and self._objects[obj.uuid] == obj: del self._objects[obj.uuid] def get_object(self, uuid): obj = self._objects.get(uuid, None) return obj def clear(self): self._objects.clear()
class User: def __init__(self, nick: str, user: str, host: str, connection: 'Connection'): self.nick = nick self.user = user self.host = host self.memberships = WeakValueDictionary() self.connection = weakref.proxy(connection) self.storage: DefaultDict[str, Dict] = defaultdict(dict) def __eq__(self, other) -> bool: if isinstance(other, User): other = other.nick if isinstance(other, str): return self.nick.lower() == other.lower() return NotImplemented def __str__(self): return self.nick @property def mask(self) -> str: return self.nick + "!" + self.user + "@" + self.host def renick(self, newnick: str): self.connection.log("Running renick") for name, membership in self.memberships.items(): self.connection.log(f"Checking memberships: {membership}") me = membership.channel.memberships.pop(self.nick) membership.channel.memberships[newnick] = me self.connection.users[newnick] = self.connection.users.pop(self.nick) self.nick = newnick def send_message(self, msg: str): self.connection.write(f"PRIVMSG {self.nick} :{msg}") def send_notice(self, msg: str): self.connection.write(f"NOTICE {self.nick} :{msg}")
class WorkerCollection(object): def __init__(self, capabilities, parallel_tasks=10, parallel_tasks_per_worker=10, worker_max_idle=300): self.logger = logging.getLogger('root') self.capabilities = capabilities self.parallel_tasks_per_worker = parallel_tasks_per_worker self.worker_max_idle = worker_max_idle self.workers = WeakValueDictionary() self.task_queue = gevent.queue.JoinableQueue(maxsize=parallel_tasks) def register_response_queue(self, response_queue): self.response_queue = response_queue self.logger.info("Registered worker collection for {caps}".format( caps=", ".join(self.capabilities.keys()))) def get_worker(self, NodeID): if NodeID not in self.workers or self.workers[ NodeID].shutdown_in_progress: self.workers[NodeID] = Worker(self, NodeID, self.response_queue, self.parallel_tasks_per_worker, self.worker_max_idle) return self.workers[NodeID] def remove_worker(self, worker): self.workers = { n: w for n, w in self.workers.items() if w is not worker } def shutdown_workers(self): self.task_queue.join() items = list(self.workers.values()) for i in items: i.shutdown() del items def handle_requests_per_worker(self): self.logger.info("Started forwarding requests") while True: anum, capability, timeout, params, zmq_info = self.task_queue.get() try: worker = self.get_worker(params['NodeID']) capability = self.capabilities[capability] try: worker.add_action( capability.action_class(anum, params['NodeID'], zmq_info, timeout, params, **capability.params)) except Exception as e: self.logger.debug(e) dummy_action = FailedAction(anum, params['NodeID'], zmq_info, timeout, params) dummy_action.statusmsg += "\n" + traceback.format_exc() worker.add_action(dummy_action) except KeyError: self.logger.error( "Unknown capability {cap}".format(cap=capability)) finally: del worker, capability
class _WeakValueDictionary(object): # Maps from OID -> Persistent object, but # only weakly references the Persistent object. This is similar # to ``weakref.WeakValueDictionary``, but is customized depending on the # platform. On PyPy, all objects can cheaply use a WeakRef, so that's # what we actually use. On CPython, though, ``PersistentPy`` cannot be weakly # referenced, so we rely on the fact that the ``id()`` of an object is its # memory location, and we use ``ctypes`` to cast that integer back to # the object. # # To remove stale addresses, we rely on the ``ffi.gc()`` object with the exact # same lifetime as the ``PersistentPy`` object. It calls us, we get the ``id`` # back out of the CData, and clean up. if PYPY: # pragma: no cover def __init__(self): self._data = WeakValueDictionary() def _from_addr(self, addr): return addr def _save_addr(self, oid, obj): return obj cleanup_hook = None else: def __init__(self): # careful not to require ctypes at import time; most likely the # C implementation is in use. import ctypes self._data = {} self._addr_to_oid = {} self._cast = ctypes.cast self._py_object = ctypes.py_object def _save_addr(self, oid, obj): i = id(obj) self._addr_to_oid[i] = oid return i def _from_addr(self, addr): return self._cast(addr, self._py_object).value def cleanup_hook(self, cdata): # This is called during GC, possibly at interpreter shutdown # when the __dict__ of this object may have already been cleared. try: addr_to_oid = self._addr_to_oid except AttributeError: return oid = addr_to_oid.pop(cdata.pobj_id, None) self._data.pop(oid, None) def __contains__(self, oid): return oid in self._data def __len__(self): return len(self._data) def __setitem__(self, key, value): addr = self._save_addr(key, value) self._data[key] = addr def pop(self, oid): return self._from_addr(self._data.pop(oid)) def items(self): from_addr = self._from_addr for oid, addr in self._data.items(): yield oid, from_addr(addr) def get(self, oid, default=None): addr = self._data.get(oid, self) if addr is self: return default return self._from_addr(addr) def __getitem__(self, oid): addr = self._data[oid] return self._from_addr(addr)
class SilkArray(SilkObject): _element = None dtype = None _elementary = None _arity = None __slots__ = [ "_parent", "_storage_enum", "_storage_nonjson_children", "_data", "_children", "_Len", "_is_none", "__weakref__" ] def __init__(self, *args, _mode="any", **kwargs): self._storage_enum = None self._storage_nonjson_children = set() self._children = None if _mode == "parent": self._init( kwargs["parent"], kwargs["storage"], kwargs["data_store"], kwargs["len_data_store"], ) elif _mode == "from_numpy": assert "parent" not in kwargs self._init( None, "numpy", kwargs["data_store"], kwargs["len_data_store"], ) else: assert "parent" not in kwargs assert "storage" not in kwargs assert "data_store" not in kwargs self._init(None, "json", None, None) if _mode == "any": self.set(*args) elif _mode == "empty": pass elif _mode == "from_json": self.set(*args, prop_setter=_prop_setter_json, **kwargs) else: raise ValueError(_mode) @property def _len(self): return int(self._Len[0]) @_len.setter def _len(self, value): self._Len[0] = value def _init(self, parent, storage, data_store, len_data_store): if parent is not None: if storage == "numpy": self._parent = lambda: parent # hard ref else: self._parent = weakref.ref(parent) else: self._parent = lambda: None self.storage = storage self._is_none = False self._storage_nonjson_children.clear() if self._children is not None: for child in self._children: child._parent = lambda: None if storage == "json": self._children = [] if data_store is None: data_store = [] self._data = data_store self._Len = [0] elif storage == "numpy": self._children = WeakValueDictionary() assert data_store is not None assert len_data_store is not None assert len(len_data_store), len_data_store dtype = np.dtype(self.dtype, align=True) assert data_store.dtype == dtype self._data = data_store self._Len = len_data_store return else: raise ValueError(storage) assert storage == "json" for n in range(len(self._data)): if n > len(data_store): if issubclass(self._element, SilkArray): self._data.append([]) else: self._data.append({}) if not self._elementary: child = self._element( _mode="parent", storage="json", parent=self, data_store=self._data[n], len_data_store=None, ) self._children.append(child) self._len = len(self._data) def copy(self, storage="json"): """Returns a copy with the storage in the specified format""" cls = type(self) if storage == "json": json = self.json() return cls.from_json(json) elif storage == "numpy": numpydata = self.numpy() lengths = self.lengths() return cls.from_numpy(numpydata, lengths, copy=False) else: raise ValueError(storage) @classmethod def from_json(cls, data): data = _filter_json(data) return cls(data, _mode="from_json") @classmethod def _check_numpy_args(cls, arr, lengths, length_can_be_none, self_data): if self_data is not None: d = self_data if len(arr.shape) != len(d.shape) or arr.dtype != d.dtype: err = TypeError( (len(arr.shape), len(d.shape), arr.dtype, d.dtype)) raise err if len(arr.shape) != cls._arity: raise TypeError("Array must be %d-dimensional" % cls._arity) if arr.dtype != np.dtype(cls.dtype, align=True): raise TypeError("Array has the wrong dtype") if lengths is None and length_can_be_none: return assert lengths.dtype == np.uint32 lenarray_shape = (_get_lenarray_size(arr.shape), ) if lengths.shape != lenarray_shape: err = TypeError((lengths.shape, lenarray_shape, arr.shape)) raise err @classmethod def from_numpy(cls, arr, lengths=None, *, copy=True, validate=True): """Constructs from a numpy array "arr" "lengths": The lengths of the array elements If not specified, it is assumed that "arr" is unpadded, i.e. that all elements have a valid value """ if isinstance(arr, tuple) and len(arr) == 2 and \ isinstance(arr[0], np.ndarray) and isinstance(arr[1], np.ndarray): return cls.from_numpy(arr[0], arr[1], copy=copy, validate=validate) cls._check_numpy_args(arr, lengths, length_can_be_none=True, self_data=None) if copy: arr = datacopy(arr) if lengths is None: lengths = _get_lenarray_full(arr.shape) ret = cls(_mode="from_numpy", data_store=arr, len_data_store=lengths) if validate: ret.validate() return ret @classmethod def empty(cls): return cls(_mode="empty") def _get_child(self, childnr): if not isinstance(childnr, int): raise TypeError(childnr) if childnr < 0: childnr += self._len if childnr < 0 or childnr >= self._len: raise IndexError(childnr) from .silkarray import SilkArray if self.storage == "numpy": child = self._element( _mode="parent", parent=self, storage="numpy", data_store=self._data[childnr], len_data_store=self._get_child_lengths(childnr)) self._children[childnr] = child return self._children[childnr] def _get_children(self): if self.storage == "numpy": for n in range(self._len): yield self._get_child(n) else: for child in self._children: yield child def set(self, *args, prop_setter=_prop_setter_any): if len(args) == 1: if args[0] is None: self._is_none = True self._len = 0 self._clear_data() return # TODO: make a nice composite exception that stores all exceptions try: if self.storage == "numpy" and \ len(args) == 1 and len(kwargs) == 0 and \ isinstance(args[0], np.ndarray): self._construct_from_numpy(args[0], lengths=None) else: raise TypeError("Not a numpy array") except Exception: try: keep_trying = True ok = False if len(args) == 1: a = args[0] if isinstance(a, str): self._parse(a) elif isinstance(a, SilkArray): if a.storage == "numpy": if isinstance(a, type(self)): keep_trying = False self._construct_from_numpy(a._data, a._Len) else: self._construct(prop_setter, a.json(), prop_setter=_prop_setter_json) else: self._construct(prop_setter, *a) elif isinstance(a, collections.Iterable) or isinstance( a, np.void): self._construct(prop_setter, *a) else: raise TypeError(a) else: raise TypeError(args) ok = True except Exception: if not ok: if not keep_trying: raise try: self._construct(prop_setter, *args) except Exception: raise self.validate() self._is_none = False def validate(self): pass def json(self): """Returns a JSON representation of the Silk object """ if self.storage == "json": return _filter_json(self._data) if self._elementary: return [dd for dd in self._data] else: d = [] for child in self._get_children(): dd = child.json() d.append(dd) return d def numpy(self): """Returns a numpy representation of the Silk object NOTE: for all numpy arrays, the entire storage buffer is returned, including (zeroed) elements if the data is not present! the length of each array is stored in the LEN_xxx field TODO: document multidimensional length vector, PTR_LEN_xxx TODO: add and document SHAPE field """ if self.storage == "numpy": return datacopy(self._data) new_obj = self.copy("json") return new_obj.make_numpy() def make_json(self): if self.storage == "json": return self._data elif self.storage == "numpy": json = _filter_json(self.json(), self) parent = self._parent() if parent is not None and parent.storage == "numpy": parent.numpy_shatter() self._init(parent, "json", None, None) self.set(json, prop_setter=_prop_setter_json) if parent is not None: parent._remove_nonjson_child(self) myname = parent._find_child(id(self)) parent._data[myname] = self._data return self._data elif self.storage == "mixed": for child_id in list(self._storage_nonjson_children): # copy! for child in self._get_children(): if id(child) == child_id: child.make_json() break else: raise Exception( "Cannot find child that was marked as 'non-JSON'") # Above will automatically update storage status to "json" assert self.storage == "json" return self._data def _get_outer_shape(self): shape = [len(self)] d = self for n in range(1, self._arity): maxlen = max([len(dd) for dd in d]) shape.append(maxlen) d2 = [] for dd in d: for ddd in dd: d2.append(ddd) d = d2 return shape def _get_child_lengths(self, child): if self.storage != "numpy": return None if self._arity == 1: return None child_size = _get_lenarray_size(self._data.shape[1:]) start = 1 + child_size * child assert start + child_size <= len(self._Len) return self._Len[start:start + child_size] def _del_child_lengths(self, child): if self.storage != "numpy": return if self._arity == 1: return size = _get_lenarray_size(self._data.shape[1:]) offset = 1 + size * child lsize = len(self._Len) self._Len[offset:lsize - size] = self._Len[offset + size:lsize] self._Len[lsize - size:] = 0 for n in range(child + 1, len(self._children)): c_offset = 1 + size * n c = self._children[n] c._Len = self._Len[c_offset:c_offset + size] def _insert_child_lengths(self, child, child_lengths): if self.storage != "numpy": assert child_lengths is None return if self._arity == 1: assert child_lengths is None return assert child_lengths is not None size = _get_lenarray_size(self._data.shape[1:]) offset = 1 + size * child lsize = len(self._Len) self._Len[offset + size:lsize] = self._Len[offset:lsize - size] self._Len[offset:offset + size] = child_lengths for n in range(child, len(self._children)): c_offset = 1 + size * (n + 1) c = self._children[n] c._Len = self._Len[c_offset:c_offset + size] def _restore_array_coupling(self, data=None, myname=None): """ Array members have their length vector stored in the parent data In addition, var_arrays have a pointer to their data stored If the parent data gets reallocated or copied, then this information gets decoupled, so it must be restored """ assert self.storage == "numpy" if data is None: parent = self._parent() if parent is None: return if parent.storage != "numpy": return myname = parent._find_child(id(self)) if not isinstance(parent, SilkArray) and \ parent._props[myname].get("var_array", False): data = parent._data assert data is not None if data is not None: assert myname is not None data[myname] = self._data data["PTR_" + myname] = self._data.ctypes.data data["LEN_" + myname] = self._Len.copy() self._Len = data["LEN_" + myname] if self._arity > 1: data["SHAPE_" + myname] = self._data.shape data["PTR_LEN_" + myname] = self._Len.ctypes.data def make_numpy(self, _toplevel=None): """Sets the internal storage to 'numpy' Returns the numpy array that is used as internal storage buffer NOTE: for optional members, the entire storage buffer is returned, including (zeroed) elements if the data is not present! an extra field "HAS_xxx" indicates if the data is present. TODO: update doc NOTE: for numpy array members of variable shape, an extra field "PTR_xxx" contains a C pointer to the data For this, the dimensionality of the array does not matter, e.g. both for IntegerArray and IntegerArrayArray, the C pointer will be "int *" and both for MyStructArray and MyStructArrayArray, the C pointer will be "MyStruct *" """ from .silkarray import SilkArray if self.storage == "numpy": return self._data dtype = np.dtype(self.dtype, align=True) shape = self._get_outer_shape() data = np.zeros(dtype=dtype, shape=shape) lengths = _get_lenarray_empty(shape) lengths[0] = len(self) if self._elementary: self._set_numpy_ele_range(self, 0, len(self._data), self._data, self._arity, data) else: for childnr, child in enumerate(self._get_children()): child.make_numpy(_toplevel=False) if self._arity > 1: slices = [slice(0, v) for v in child._data.shape] data[childnr][slices] = child._data else: try: data[childnr] = child._data except ValueError: #numpy bug for field in child._data.dtype.names: data[childnr][field] = child._data[field] if self._arity > 1: child_size = _get_lenarray_size(shape[1:]) start = 1 + child_size * childnr arr1 = lengths[start:start + child_size] shape1 = data.shape[1:] arr2 = child._Len shape2 = child._data.shape _lenarray_copypad(arr1, shape1, arr2, shape2) self._init(self._parent(), "numpy", data, lengths) parent = self._parent() if parent is not None: if parent.storage != "numpy": parent._add_nonjson_child(self) for child in self._get_children(): child._restore_array_coupling() return data def lengths(self): assert self.storage == "numpy" return self._Len def realloc(self, *shape): assert self.storage == "numpy" if len(shape) == 1 and isinstance(shape[0], tuple): shape = shape[0] parent = self._parent() if parent is not None: myname = parent._find_child(id(self)) if parent.storage == "numpy": if not parent._props[myname].get("var_array", False): raise Exception("Cannot reallocate numpy array that is\ part of a larger numpy buffer. Use numpy_shatter() on the parent to allow\ reallocation") if len(shape) != self._arity: msg = "Shape must have %d dimensions, not %d" raise ValueError(msg % (self._arity, len(shape))) min_shape = self._data.shape for n in range(self._arity): msg = "Dimension %d: shape must have at least length %d, not %d" if min_shape[n] > shape[n]: raise ValueError(msg % (n + 1, min_shape[n], shape[n])) old_data = self._data old_len = self._Len self._data = np.zeros(dtype=self.dtype, shape=shape) slices = [slice(0, s) for s in min_shape] self._data[slices] = old_data self._Len = _get_lenarray_empty(shape) _lenarray_copypad(self._Len, shape, old_len, old_data.shape) self._init(parent, "numpy", self._data, self._Len) self._restore_array_coupling() def _find_child(self, child_id): if self.storage == "numpy": for childname, ch in self._children.items(): if child_id == id(ch): return childname else: for childname, ch in enumerate(self._children): if child_id == id(ch): return childname raise KeyError def _add_nonjson_child(self, child): assert self.storage != "numpy" njc = self._storage_nonjson_children child_id = id(child) if child_id not in njc: njc.add(child_id) if self.storage == "json": self.storage = "mixed" parent = self._parent() if parent is not None: parent._add_nonjson_child(self) def _remove_nonjson_child(self, child): assert self.storage != "numpy" njc = self._storage_nonjson_children child_id = id(child) if child_id in njc: assert self.storage == "mixed", self.storage njc.remove(child_id) if len(njc) == 0: self.storage = "json" parent = self._parent() if parent is not None: parent()._remove_nonjson_child(self) def numpy_shatter(self): """ Breaks up a unified numpy storage into one numpy storage per child """ assert self.storage == "numpy" assert not self._elementary parent = self._parent() if parent is not None and parent.storage == "numpy": parent.numpy_shatter() data = [] children = [] for child in self._get_children(): d = datacopy(child._data) data.append(d) child._data = d children.append(child) self._data = data self._children = children self._storage_nonjson_children = set([id(p) for p in children]) self.storage = "mixed" def _construct(self, prop_setter, *args): old_data = self._data old_children = self._children with _ArrayConstructContext(self): if self.storage == "numpy": if len(args) > len(self._data): msg = "index {0} is out of bounds for axis with size {1}"\ .format(len(args), len(data)) raise IndexError(msg) if self._elementary: _set_numpy_ele_range(self, 0, len(args), args, self._arity) else: for anr, a in enumerate(args): child = self._get_child(anr) child.set(args[anr], prop_setter=prop_setter) else: if self._elementary: newdata = [] for anr, a in enumerate(args): v = self._element(a) newdata.append(v) self._data[:] = newdata else: for n in range(self._len, len(args)): if issubclass(self._element, SilkArray): self._data.append([]) else: self._data.append({}) child = self._element( _mode="parent", storage=self.storage, parent=self, data_store=self._data[n], len_data_store=self._get_child_lengths(n)) self._children.append(child) for n in range(len(args)): child = self._children[n] child.set(args[n], prop_setter=prop_setter) if len(args) < self._len: self._children[:] = self._children[:len(args)] if self.storage == "numpy": self._data[len(args):] = \ np.zeros_like(self._data[len(args):]) else: self._data[:] = self._data[:len(args)] self._len = len(args) def _construct_from_numpy(self, arr, lengths): if self.storage != "numpy": self._init(self._parent(), "numpy", arr, lengths) self.make_json() return self._check_numpy_args(arr, lengths, self_data=self._data, length_can_be_none=False) if lengths is None: lengths = _get_lenarray_full(arr.shape) self._data = datacopy(arr) self._Len = lengths.copy() self._restore_array_coupling() def _parse(self, s): raise NotImplementedError # can be user-defined _storage_names = ("numpy", "json", "mixed") @property def storage(self): return self._storage_names[self._storage_enum] @storage.setter def storage(self, storage): assert storage in self._storage_names, storage self._storage_enum = self._storage_names.index(storage) def __dir__(self): return dir(type(self)) def __setattr__(self, attr, value): if attr.startswith("_") or attr == "storage": object.__setattr__(self, attr, value) else: self._set_prop(attr, value, _prop_setter_any) def __getitem__(self, item): if isinstance(item, slice): return type(self)( [self[v] for v in range(*item.indices(len(self)))]) if not isinstance(item, int): msg = "{0} indices must be integers or slices, not {1}" raise TypeError( msg.format(self.__class__.__name__, item.__class__.__name__)) if self._elementary: if self.storage == "numpy": return _get_numpy_ele_prop(self, item, self._len) else: return self._data[:self._len][item] else: return self._get_child(item) def _set_prop(self, item, value, prop_setter=_prop_setter_any): if self._elementary: if self.storage == "numpy": _set_numpy_ele_prop(self, item, value) else: if item < 0: item = self._len - item elif item >= self._len: raise IndexError(item) self._data[item] = self._element(value) else: child = self._get_child(item) child.set(value, prop_setter=prop_setter) def __setitem__(self, item, value): if isinstance(item, slice): start, stop, stride = item.indices(self._len) indices = list(range(start, stop, stride)) if len(indices) != len(value): msg = "Cannot assign to a slice of length %d using \ a sequence of length %d" raise IndexError(msg % (len(indices), len(value))) for n in indices: self._set_prop(n, value[n]) return elif isinstance(item, int): self._set_prop(item, value) else: msg = "{0} indices must be integers or slices, not {1}" raise TypeError( msg.format(self.__class__.__name__, item.__class__.__name__)) def __delitem__(self, item): if isinstance(item, slice): start, stop, stride = item.indices(self._len) indices = list(range(start, stop, stride)) for n in reversed(indices): self.pop(n) return if not isinstance(item, int): msg = "{0} indices must be integers or slices, not {1}" raise TypeError( msg.format(self.__class__.__name__, item.__class__.__name__)) self.pop(item) def pop(self, index=-1): if not isinstance(index, int): msg = "{0} indices must be integers, not {1}" raise TypeError( msg.format(self.__class__.__name__, index.__class__.__name__)) if index < 0: index += self._len if index < 0: raise IndexError if self.storage == "numpy": ret_data = datacopy(self._data[index]) ret_lengths = None if self._arity > 1: ret_lengths = _get_lenarray_empty(ret_data.shape) ret = self._element( _mode="from_numpy", data_store=ret_data, len_data_store=ret_lengths, ) self._data[index:self._len - 1] = self._data[index + 1:self._len] try: self._data[self._len - 1] = np.zeros_like( self._data[self._len - 1]) except ValueError: # numpy bug for field in self._data.dtype.fields: self._data[self._len - 1][field] = np.zeros_like( self._data[self._len - 1][field]) self._del_child_lengths(index) elif self._elementary: ret = self._data[:self._len][index] self._data.__delitem__(index) else: ret = self._children[:self._len][index].copy() self._children.__delitem__(index) self._data.__delitem__(index) self._len -= 1 return ret def _Len__(self): return self._len def _print(self, spaces): ret = "{0} (\n".format(self.__class__.__name__) for n in range(self._len): if self._elementary: value = self._data[n] if self.storage == "numpy": if value.dtype.kind == 'S': substr = '"' + value.decode() + '"' else: substr = str(value) else: substr = value._print(spaces + 2) else: value = self._get_child(n) substr = value._print(spaces + 2) ret += "{0}{1},\n".format(" " * (spaces + 2), substr) ret += "{0})".format(" " * spaces) return ret def __str__(self): return self._print(0) def __repr__(self): return self._print(0) def clear(self): self.set([]) def append(self, item): self.insert(self._len, item) def insert(self, index, item): if not isinstance(index, int): msg = "{0} indices must be integers, not {1}" raise TypeError( msg.format(self.__class__.__name__, index.__class__.__name__)) if index < 0: index += self._len if index < 0: raise IndexError if self.storage == "numpy": if self._len >= len(self._data): raise IndexError("Numpy array overflows allocated space") if not self._elementary: ele = self._element(item) child_data = ele.make_numpy() child_lengths = None if self._arity > 1: child_lengths = ele.lengths() self._data[index + 1:self._len + 1] = self._data[index:self._len] if self._arity > 1: slices = [slice(0, v) for v in child_data.shape] self._data[index][slices] = child_data else: self._data[index] = child_data self._insert_child_lengths(index, child_lengths) else: self._data[self._len] = item # dry run self._data[index + 1:self._len + 1] = self._data[index:self._len] self._data[index] = item # should give no exception now self._len += 1 else: with _ArrayInsertContext(self, index): if self._elementary: self._data[index] = item else: child = self._element( _mode="parent", storage=self.storage, parent=self, data_store=self._data[index], len_data_store=self._get_child_lengths(index)) self._children.insert(index, child) child.set(item) def __eq__(self, other): if not isinstance(other, SilkArray): return False if self.storage == other.storage == "json": return self._data == other._data else: #can't use numpy _data because of PTR and different allocation sizes return self.json() == other.json() def __len__(self): return self._len def _clear_data(self): d = self._data if self.storage == "numpy": d[:] = np.zeros_like(d) else: for child in self._get_children(): child._clear_data()
class Application(object): """The Main application class. The Application class is responsible for holding global data that will be shared between the whole application. It also provides a message dispatching system for intercommunication. Methods for loading resources are also avail- ble. This class is a singleton so the constructor will be called just once by the `imagem_cinematica` module. The instance can be retrived by the function get_app avaible in the module `application`. """ # Singleton instance _INSTANCE = None # This option tells the application to pump raw frames to the preview queue OPT_PREVIEW_RAW = 0x1 # This options tells the application to pump frames right after they got # processed by the analysis plugin OPT_PREVIEW_POST_ANALYSIS = 0x2 OPT_PREVIEW_FILTER_PAGE = 0x3 def __init__(self, argv): # Assert that there is no previous created instance assert Application._INSTANCE is None # Set this instance to the singleton _INSTANCE constant Application._INSTANCE = self # Create a new QApplication instance self._qapp = QApplication(argv) pjoin = os.path.join # Save the application main path (the local where the main script was # invoked by the interpreter) self.PATH = sys.path[0] # Create a default settings dictionary self.settings = { # The max frames that can be put in the raw queue 'raw_queue_len' : 1, # The max frames that can be put in the preview queue 'preview_queue_len' : 1, # The directory of the installed plugins 'plugins_dir' : pjoin(self.PATH, 'plugins') , # The directory of the application resources (imgs, icons, ui, etc.) 'resources_dir' : pjoin(self.PATH, 'resources'), # Max time to wait for a worker thread to finish 'thread_wait_timeout': 500, 'lang_dir' : pjoin(self.PATH, 'lang') } # This dictionary holds all the ui files loaded by calling the method # load_ui. They can be retrived by calling the method get_ui self._loaded_ui_objects = {} # A thread-safe queue that contains the messages to be dispatched self._messages = Queue() # A set containing all the objects that can receive messages. To insert # an object to the list, the method register_message_listener must be # used. The objects must have a `receive_message` method. # Each element of this set will be a tuple containing a unique identifier # and the object itself. self._message_listeners = WeakValueDictionary() # The timer that will be responsible for dispatching the messages self._message_timer = QTimer() self._message_timer.setInterval(20) # Connect the timeout signal to the method responsible for dispatching # the messages in the queue self._message_timer.timeout.connect(self._dispatch_messages) # A dictionary containing options the user can change by interacting with # the GUI self.user_options = { "preview_source": Application.OPT_PREVIEW_POST_ANALYSIS, "filter_group" : "Raw" } #Current installed translator self._translator = None #Start the message-system timer self._message_timer.start() def get_ui(self, name): """Return the instance of a previously loaded ui file. The ui object will be a QObject. To load a new a new ui file use the `load_ui` method. Parameters ---------- name : str The ui object's name/alias. Returns ------- QObject Raises ------ KeyError : If there is no ui object with the given name. """ if name not in self._loaded_ui_objects: raise KeyError("There is no ui object with name '{}'".format(name)) else: return self._loaded_ui_objects[name] def load_ui(self, name, ui_file, base_instance = None): """Load a ui_file from the resources directory and return it. This method loads a QtDesigner `.ui` file from the resources directory using the `uic` module and returns the instance. A name must be given for saving the instance in the application's dictionary. The name must be an alias, like 'main_window' or `console_window` so it can be shared across the application Parameters ---------- name : str The name that will be used to share the object between the appli- cation modules. If the string is empty, the ui object will not be saved. ui_file : str The name of the ui file located in the resources directory. The `.ui` extension is optional. base_instance : QObject, None The base instance to load the ui file. The default value is None. When its `None`, the uic module will created a new instance, when an proper QWidget is given, the uic will use it as a base. Returns ------- QObject Raises ------ IOError : If the file was not found or has an invalid format/extension. KeyError : If there is an already loaded ui instance with the same name TypeError : If the base_instance is not the same as the one in the ui file. UILoadError : If the `uic` module raises any exception """ #Check if there is a loaded ui with the given name if name in self._loaded_ui_objects: raise KeyError("A ui with name '{}' is already loaded.".format(name)) join = os.path.join isfile = os.path.isfile splitext = os.path.splitext #Extract the file extension fname, ext = splitext(ui_file) #If there is no extension, append the right one to the end if ext == "": ui_file = fname + ".ui" #If there is an extensions, check if its right, else raised the IOError else: if ext != ".ui": raise IOError(("The file name has an invalid extension" "'{}'").format(ext)) #Expand the file to the absolute path ui_path = join(self.settings["resources_dir"], "ui", ui_file) #Check if the file exists if not isfile(ui_path): raise IOError("The file '{}' does not exist.".format(ui_file)) try: if base_instance: instance = uic.loadUi(ui_path, base_instance) if hasattr(instance, "setupUi"): try: instance.setupUi() except: log.exception("") if hasattr(instance, "retranslateUi"): try: instance.retranslateUi() except: log.exception("") else: instance = uic.loadUi(ui_path) if name.strip() != '': self._loaded_ui_objects[name] = instance return instance except: print "------------------------------------------------------------" log.exception("") print "------------------------------------------------------------" raise UILoadError() def register_message_listener(self, listener): """Register a object so the application dispatch messages to it. The message listener is registered using a Weak Reference, so the messages system will not keep the objects alive when all the other references are deleted. Parameters ---------- listener : obj The object that will receive the message. Returns ------- int An unique identifier to use when referencing the this listener in the messages system. Raises ------ ValueError : If the listener object is already registered """ #Check if the listener is in the list already for r in self._message_listeners.itervaluerefs(): if r() is listener: raise ValueError("The object is already registered") id_ = find_free_key(self._message_listeners) self._message_listeners[id_] = listener return id_ def unregister_message_listener(self, identifier): """Tell the pplication to stop dispatching messages to a listener. Parameters ---------- identifier : int The identifier of the listener that will stop receiving messages. Raises ------ ValueError : If there is no listener with the given identifier """ try: del self._message_listeners[identifier] except KeyError: raise ValueError("There is no listener with id '{}'".format(identifier)) def get_message_listener_instance(self, identifier): """Return a registered message listener with the given identifier. Parameters ---------- identifier: int The identifier of the listener. Returns ------- object The instance of the listener. Raises ------ Value : If there is no listener with the given identifier. """ try: return self._message_listeners[identifier] except KeyError: raise ValueError(("There is no message listener" "registered with id {}").format(identifier)) def post_message(self, message_type, message_data, sender): """Post a message to the messages system. Parameters ---------- message_type : str A string containing a name/tag/alias for the message type. It should be lowercase and have no leading/trailing spaces. The string will be trimmed and changed to lower case. message_data : Any kind of object sender_id : int, class instance or None The id or reference of the sender. This will prevent the sender to receiving this message. If the type is int, it should be the identi- fier of the listener in the messages system. Can also be a reference to the sender instead of it's id. If None, the sender is considered anonymous, and will receive the message. If the id provided is invalid, it will be ignored. """ self._messages.put((message_type.lower().strip(), message_data, sender)) def _dispatch_messages(self): """Dispatch all the messages in the queue to the registered listeners. """ #Create strong references to the listeners items = self._message_listeners.items() while not self._messages.empty(): mtype, mdata, sender = self._messages.get(False) if _DEBUG_MESSAGES: print "<{}, {}>: {}".format(sender, mtype, mdata) #Assign an invalid identifier sender_id = -1 #If the sender is an int it should be the sender identifier if isinstance(sender, int): sender_id = sender #If the sender is not an int, it should be a ref to a listener, so #try to locate it's id elif sender is not None: for (identifier, instance) in items: if instance is sender: sender_id = identifier for (identifier, instance) in items: #Dont dispatch the message to the sender if identifier == sender_id: continue try: #Call the receive_message method instance.receive_message(mtype, mdata, sender_id) except: print ("!! A message listener raised an exception when " "receiving a message and will be removed from the list.") log.exception("") del self._message_listeners[identifier] def import_resources(self): """Load the resource files contained in the gui package path. """ #Get the path to the `gui` package, where the resource files will be #located _1, path, _3 = imp.find_module("gui") if path not in sys.path: sys.path.append(path) #Import all the files that end with `_rc.py` for m in [f for f in os.listdir(path) if f.endswith("_rc.py")]: try: import_module(m[:-3]) except: pass def load_plugin_ui(self, plugin_id, name, ui_file, base_instance = None): """Load a ui_file from the `ui` directory of the given plugin id. This method loads a QtDesigner `.ui` file from the resources directory using the `uic` module and returns the instance. A name must be given for saving the instance in the plugin's dictionary. The name must be an alias, like 'tools', so it can be shared across the application. Parameters ---------- plugin_id : int The plugin's identifier name : str The name that will be used to share the object. ui_file : str The name of the ui file located in the resources directory. The `.ui` extension is optional. base_instance : QObject, None The base instance to load the ui file. The default value is None. When its `None`, the uic module will created a new instance, when an proper QWidget is given, the uic will use it as a base. Returns ------- QObject Raises ------ IOError : If the file was not found or has an invalid format/extension. KeyError : If there is an already loaded ui instance with the same name TypeError : If the base_instance is not the same as the one in the ui file. UILoadError : If the `uic` module raises any exception """ # Temporary solution for resolving circular dependencies engine = sys.modules["ic.engine"] plugin = engine.get_plugin(plugin_id) #Check if there is a loaded ui with the given name if name in plugin.loaded_ui_objects: raise KeyError("A ui with name '{}' is already loaded.".format(name)) join = os.path.join isfile = os.path.isfile splitext = os.path.splitext #Extract the file extension fname, ext = splitext(ui_file) #If there is no extension, append the right one to the end if ext == "": ui_file = fname + ".ui" #If there is an extensions, check if its right, else raised the IOError else: if ext != ".ui": raise IOError(("The file name has an invalid extension" "'{}'").format(ext)) #Expand the file to the absolute path ui_path = join(plugin.root_path, "ui", ui_file) #Check if the file exists if not isfile(ui_path): raise IOError("The file '{}' does not exist.".format(ui_file)) try: if base_instance: instance = uic.loadUi(ui_path, base_instance) if hasattr(instance, "setupUi"): try: instance.setupUi() except: log.exception("") if hasattr(instance, "retranslateUi"): try: instance.retranslateUi() except: log.exception("") else: instance = uic.loadUi(ui_path) plugin.loaded_ui_objects[name] = instance return instance except: log.exception("") raise UILoadError() def get_plugin_ui(self, plugin_id, name): """Return the instance of a previously loaded ui file of a plugin. The ui object will be a QObject. To load a new a new ui file use the `load_plugin_ui` method. Parameters ---------- plugin_id : int The identifier of the plugin name : str The ui object's name/alias. Returns ------- QObject Raises ------ KeyError : If there is no ui object with the given name. """ # Temporary solution for resolving circular dependencies engine = sys.modules["ic.engine"] plugin = engine.get_plugin(plugin_id) if name not in plugin.loaded_ui_objects: raise KeyError("There is no ui object with name '{}'".format(name)) else: return plugin.loaded_ui_objects[name] def set_language(self, locale_str): """Remove the current translator and install one based on the locale_str. This method will look for an installed qm file with the locale str pro- vided. If one is found it will remove the current one and install the new. After the installation it will call the retranslateUi for all the loaded ui objects. If a loaded ui object does not have the retranslateUi method, it will just ignore it. Parameters ---------- locale_str : str A str containing the locale, i.e. "pt_BR", "en_US". It can also be "default" and if so, the current translator will be removed and the language will be send to default. """ # Temporary solution for resolving circular dependencies engine = sys.modules["ic.engine"] join = os.path.join isfile = os.path.isfile #This the path where the qm file should be located if installed #`lang_dir`/qm/`locale_str`.qm qm_file = join(self.settings["lang_dir"], "qm", locale_str+".qm") #Check if the locale is "default" or if the qm file for the given str #does exist. if isfile(qm_file) or locale_str == "default": #Remove the current translator if there is one installed if self._translator is not None: self._qapp.removeTranslator(self._translator) self._translator = None #If the qm file exists, load it and install the translator to the #QApplication instance if isfile(qm_file): self._translator = QTranslator() self._translator.load(qm_file) self._qapp.installTranslator(self._translator) #Call the retranslanteUi method of all the loaded ui objects that #have it. for ui in self._loaded_ui_objects.values(): if hasattr(ui, "retranslateUi"): try: ui.retranslateUi() except: log.exception("") #Translate the loaded ui objects of the plugins for plugin in engine.loaded_plugins().values(): for ui in plugin.loaded_ui_objects: if hasattr(ui, "retranslateUi"): try: ui.retranslateUi() except: log.exception("") if plugin.gui_interface is not None: plugin.gui_interface.retranslateUi() #Post an anonymous message signaling the language change self.post_message("language_changed", {"locale_str": locale_str}, -1) #If the file does not exist nor the string is equals to "default", raise #an error else: raise ValueError("There is no locale '{}' installed".format(locale_str)) def exec_(self): """Just wrappers the QApplication instance `exec_` method. """ return self._qapp.exec_() def release(self): #TODO pass
class MapImageExporter(MapExporter): def __init__(self, nodes, ways, min_lat, max_lat, min_lon, max_lon, *args, node_color=(0, 0, 0), way_color="allrandom", bg_color="white", enlargement=50000): """Export map data (nodes and ways) as a map like image. Params: nodes - The raw nodes as read by any OSM file reader ways - The raw ways as read by any OSM file reader min_lat - The southern border of the map max_lat - The northern border of the map min_lon - The western border of the map max_lon - The eastern border of the map node_color - The colour of the nodes in the image way_color - The colour of the ways in the image bg_color - The colour of the image background enlargement - Multiplication factor from map coordinate to pixel coordinate. Determines image size. """ super(MapImageExporter, self).__init__(min_lat, max_lat, min_lon, max_lon, bg_color, enlargement) self.logger = logging.getLogger('.'.join( (__name__, type(self).__name__))) self.nodes = WeakValueDictionary(nodes) self.ways = WeakValueDictionary(ways) self.node_color = node_color self.way_color = way_color def export(self, filename="export.png"): """Export the information to an image file Params: filename - The filename to export to, must have a valid image extention. Default: export.png """ self.logger.info('Exporting a map image to %s', filename) # Draw all ways self.logger.info('Drawing the ways') for id, way in self.ways.items(): coords = [ ((self.nodes[node].lon - self.min_lon) * self.enlargement, (self.nodes[node].lat - self.min_lat) * self.enlargement) for node in way.nodes ] self.draw.line(coords, fill=self.way_color) # draw all nodes as points self.logger.info('Drawing the nodes') for id, node in self.nodes.items(): self.draw.point(((node.lon - self.min_lon) * self.enlargement, (node.lat - self.min_lat) * self.enlargement), fill=self.node_color) self._save_image(filename)
class ConfigManager(object): '''This class defines an object that manages a set of config files. The config manager abstracts the lookup of files using the XDG search paths and ensures that there is only a single instance used for each config file. The config manager can switch the config file based on the config X{profile} that is used. The profile is determined by the notebook properties. However this object relies on it's creator to setup the hooks to get the property from the notebook. Changes to the profile are communicated to all users of the config by means of the "changed" signals on L{ConfigFile} and L{ConfigDict} objects. ''' def __init__(self, dir=None, dirs=None, profile=None): '''Constructor @param dir: the folder for reading and writing config files, e.g. a C{Dir} or a C{VirtualConfigBackend} objects. If no dir is given, the XDG basedirs are used and C{dirs} is ignored. @param dirs: list or generator of C{Dir} objects used as search path when a config file does not exist on C{dir} @param profile: initial profile name ''' self.profile = profile self._config_files = WeakValueDictionary() self._config_dicts = WeakValueDictionary() if dir is None: assert dirs is None, "Do not provide 'dirs' without 'dir'" self._dir = dir self._dirs = dirs def set_profile(self, profile): '''Set the profile to use for the configuration @param profile: the profile name or C{None} ''' assert profile is None or isinstance(profile, basestring) if profile != self.profile: self.profile = profile for path, conffile in self._config_files.items(): if path.startswith('<profile>/'): file, defaults = self._get_file(path) conffile.set_files(file, defaults) # Updates will cascade through the dicts by the # "changed" signals on various objects def _get_file(self, filename): basepath = filename.replace('<profile>/', '') if self.profile: path = filename.replace('<profile>/', 'profiles/%s/' % self.profile) else: path = basepath if self._dir: file = self._dir.file(path) if self._dirs: defaults = DefaultFileIter(self._dirs, path) else: defaults = DefaultFileIter([], path) if self.profile and filename.startswith('<profile>/'): mypath = filename.replace('<profile>/', '') defaults.extra.insert(0, self._dir.file(mypath)) else: file = basedirs.XDG_CONFIG_HOME.file('zim/' + path) defaults = XDGConfigFileIter(basepath) ## Backward compatibility for profiles if self.profile \ and filename in ( '<profile>/preferences.conf', '<profile>/style.conf' ): backwardfile = self._get_backward_file(filename) defaults.extra.insert(0, backwardfile) return file, defaults def _get_backward_file(self, filename): if filename == '<profile>/preferences.conf': path = 'profiles/%s.conf' % self.profile elif filename == '<profile>/style.conf': path = 'styles/%s.conf' % self.profile else: raise AssertionError if self._dir: return self._dir.file(path) else: return basedirs.XDG_CONFIG_HOME.file('zim/' + path) def get_config_file(self, filename): '''Returns a C{ConfigFile} object for C{filename}''' if filename not in self._config_files: file, defaults = self._get_file(filename) config_file = ConfigFile(file, defaults) self._config_files[filename] = config_file return self._config_files[filename] def get_config_dict(self, filename): '''Returns a C{SectionedConfigDict} object for C{filename}''' if filename not in self._config_dicts: file = self.get_config_file(filename) config_dict = ConfigManagerINIConfigFile(file) self._config_dicts[filename] = config_dict return self._config_dicts[filename]
from weakref import WeakValueDictionary import gc class BigDataClass: def method(self): print("Hello") bdc = BigDataClass() wvd = WeakValueDictionary() wvd['bookid'] = bdc for k, v in wvd.items(): print(k, v) # v is available after for loop, one more link to object del bdc del v gc.collect() print(wvd['bookid'])
if other is None: return False if other == self.kind: return True cheese_list = [Cheese(item) for item in ["奶油", "芝士", "水果", "谷物"]] # [Cheese( 奶油 ), Cheese( 芝士 ), Cheese( 水果 ), Cheese( 谷物 )] print(cheese_list) # 创建对象 weak_dict = WeakValueDictionary() for cheese in cheese_list: weak_dict[cheese.kind] = cheese # {'奶油': Cheese( 奶油 ), '芝士': Cheese( 芝士 ), '水果': Cheese( 水果 ), '谷物': Cheese( 谷物 )} """ 4 -> 4 """ print(dict(weak_dict.items())) cheese_list.remove("水果") cheese_list.remove("芝士") # {'奶油': Cheese( 奶油 ), '谷物': Cheese( 谷物 )} """ 已经被删除了两个item """ print(dict(weak_dict.items()))
class Silk(SilkObject): _anonymous = None # bool _props = None # list dtype = None # list _positional_args = None # list __slots__ = [ "_parent", "_storage_enum", "_storage_nonjson_children", "_data", "_children", "_is_none", "__weakref__" ] def __init__(self, *args, _mode="any", **kwargs): self._storage_enum = None self._storage_nonjson_children = set() self._children = None if _mode == "parent": self._init( kwargs["parent"], kwargs["storage"], kwargs["data_store"], ) elif _mode == "from_numpy": assert "parent" not in kwargs self._init( None, "numpy", kwargs["data_store"], ) else: assert "parent" not in kwargs assert "storage" not in kwargs assert "data_store" not in kwargs self._init(None, "json", None) if _mode == "any": self.set(*args, **kwargs) elif _mode == "empty": pass elif _mode == "from_json": self.set(*args, prop_setter=_prop_setter_json, **kwargs) else: raise ValueError(_mode) def _init(self, parent, storage, data_store): from .silkarray import SilkArray if parent is not None: if storage == "numpy": self._parent = lambda: parent # hard ref self._parent = weakref.ref(parent) else: self._parent = lambda: None self.storage = storage self._is_none = False self._storage_nonjson_children.clear() if self._children is not None: for child in self._children.values(): child._parent = lambda: None if storage == "json": self._children = {} if data_store is None: data_store = {} elif storage == "numpy": self._children = WeakValueDictionary() assert data_store is not None assert data_store.dtype == np.dtype(self.dtype, align=True) assert data_store.shape == () self._data = data_store return else: raise ValueError(storage) assert storage == "json" for pname, p in self._props.items(): if p["elementary"]: continue t = self._get_typeclass(pname) if pname not in data_store: if issubclass(t, SilkArray): data_store[pname] = [] else: data_store[pname] = {} c_data_store = data_store[pname] self._children[pname] = t( _mode="parent", storage="json", parent=self, data_store=c_data_store, len_data_store=None, ) self._data = data_store def _get_typeclass(self, propname): p = self._props[propname] if "typeclass" in p: t = p["typeclass"] else: typename = p["typename"] t = typenames._silk_types[typename] return t def copy(self, storage="json"): """Returns a copy with the storage in the specified format""" cls = type(self) if storage == "json": json = self.json() ret = cls.from_json(json) for prop in self._props: if not self._props[prop]["elementary"]: child = self._children[prop] is_none = child._is_none ret._children[prop]._is_none = is_none elif storage == "numpy": ret = cls.from_numpy(self.numpy()) else: raise ValueError(storage) return ret @classmethod def from_json(cls, data): data = _filter_json(data) return cls(data, _mode="from_json") @classmethod def from_numpy(cls, data, copy=True,validate=True): """Constructs from a numpy array singleton "data" """ if data.shape != (): raise TypeError("Data must be a singleton") if data.dtype != np.dtype(cls.dtype,align=True): raise TypeError("Data has the wrong dtype") if copy: data = datacopy(data) ret = cls(_mode="from_numpy", data_store=data) if validate: ret.validate() return ret @classmethod def empty(cls): return cls(_mode="empty") def _get_child(self, childname, force=False): from .silkarray import SilkArray if self.storage == "numpy": prop = self._props[childname] is_none = False if prop["optional"]: if not self._data["HAS_" + childname]: is_none = True if is_none and not force: return NoneChild t = self._get_typeclass(childname) len_data_store = None if issubclass(t, SilkArray): if prop.get("var_array", False): len_data_store = self._data["LEN_"+childname] child = t ( _mode = "parent", parent = self, storage = "numpy", data_store = self._data[childname], len_data_store = len_data_store ) self._children[childname] = child return self._children[childname] def set(self, *args, prop_setter=_prop_setter_any, **kwargs): if len(args) == 1 and len(kwargs) == 0: if args[0] is None or isinstance(args[0], SilkObject) and args[0]._is_none: self._is_none = True self._clear_data() return # TODO: make a nice composite exception that stores all exceptions try: self._construct(prop_setter, *args, **kwargs) except Exception: if len(args) == 1 and len(kwargs) == 0: try: a = args[0] try: if isinstance(a, np.void): d = {} for name in a.dtype.fields: if name.startswith("HAS_"): continue name2 = "HAS_" + name if name2 in a.dtype.names and not a[name2]: continue d[name] = a[name] self._construct(prop_setter, **d) else: raise TypeError except Exception: if isinstance(a, dict): self._construct(prop_setter, **a) elif isinstance(a, str): self._parse(a) elif isinstance(a, collections.Iterable) or isinstance(a, np.void): self._construct(prop_setter, *a) elif isinstance(a, SilkObject): d = {prop: getattr(a, prop) for prop in dir(a)} self._construct(prop_setter, **d) elif hasattr(a, "__dict__"): self._construct(prop_setter, **a.__dict__) else: raise TypeError(a) except Exception: raise else: raise self.validate() self._is_none = False def validate(self): pass # overridden during registration def json(self): """Returns a JSON representation of the Silk object """ if self.storage == "json": return _filter_json(self._data) d = {} for attr in self._props: p = self._props[attr] ele = p["elementary"] value = None if ele: if self.storage == "numpy": value = _get_numpy_ele_prop(self, attr) else: value = self._data[attr] if value is not None: t = self._get_typeclass(attr) value = t(value) else: child = self._get_child(attr) if not child._is_none: value = child.json() if value is not None: d[attr] = value return d def numpy(self): """Returns a numpy representation of the Silk object NOTE: for optional members, the entire storage buffer is returned, including (zeroed) elements if the data is not present! the extra field "HAS_xxx" indicates if the data is present. NOTE: for all numpy array members, the entire storage buffer is returned, including (zeroed) elements if the data is not present! the length of each array is stored in the LEN_xxx field TODO: document multidimensional length vector, PTR_LEN_xxx NOTE: for numpy array members of variable shape, an extra field "PTR_xxx" contains a C pointer to the data For this, the dimensionality of the array does not matter, e.g. both for IntegerArray and IntegerArrayArray, the C pointer will be "int *" and both for MyStructArray and MyStructArrayArray, the C pointer will be "MyStruct *" TODO: add and document SHAPE field """ if self.storage == "numpy": return datacopy(self._data) new_obj = self.copy("json") return new_obj.make_numpy() def make_json(self): if self.storage == "json": return self._data elif self.storage == "numpy": json = _filter_json(self.json(), self) parent = self._parent() if parent is not None and parent.storage == "numpy": parent.numpy_shatter() self._init(parent, "json", None) self.set(json, prop_setter=_prop_setter_json) if parent is not None: parent._remove_nonjson_child(self) myname = parent._find_child(id(self)) parent._data[myname] = self._data return self._data elif self.storage == "mixed": for child_id in list(self._storage_nonjson_children): # copy! for child in self._children.values(): if id(child) == child_id: child.make_json() break else: raise Exception("Cannot find child that was marked as 'non-JSON'") # Above will automatically update storage status to "json" return self._data def _restore_array_coupling(self): pass def make_numpy(self,_toplevel=None): """Sets the internal storage to 'numpy' Returns the numpy array that is used as internal storage buffer NOTE: for optional members, the entire storage buffer is returned, including (zeroed) elements if the data is not present! an extra field "HAS_xxx" indicates if the data is present. TODO: update doc NOTE: for numpy array members of variable shape, an extra field "PTR_xxx" contains a C pointer to the data For this, the dimensionality of the array does not matter, e.g. both for IntegerArray and IntegerArrayArray, the C pointer will be "int *" and both for MyStructArray and MyStructArrayArray, the C pointer will be "MyStruct *" """ from .silkarray import SilkArray if self.storage == "numpy": return self._data dtype = np.dtype(self.dtype, align=True) data = np.zeros(dtype=dtype, shape=(1,)) for propname,prop in self._props.items(): if prop["elementary"]: value = getattr(self, propname) _set_numpy_ele_prop(self, propname, value, data) else: child = self._get_child(propname) if not child._is_none: child.make_numpy(_toplevel=False) if isinstance(child, SilkArray): if prop.get("var_array", False): child._restore_array_coupling(data[0], propname) else: data[0][propname] = np.zeros_like(dtype[propname]) slices = [slice(0,v) in child._data.shape] data[0][propname][slices] = child._data else: data[0][propname] = child._data child._data = None self._init(self._parent(), "numpy", data[0]) parent = self._parent() if parent is not None: if parent.storage != "numpy": parent._add_nonjson_child(self) return data[0] def _find_child(self, child_id): for childname, ch in self._children.items(): if child_id == id(ch): return childname raise KeyError def _add_nonjson_child(self, child): childname = self._find_child(id(child)) if self._props[childname].get("var_array", False) and \ self.storage == "numpy": return assert self.storage != "numpy" njc = self._storage_nonjson_children child_id = id(child) if child_id not in njc: njc.add(child_id) if self.storage == "json": self.storage = "mixed" parent = self._parent() if parent is not None: parent._add_nonjson_child(self) def _remove_nonjson_child(self, child): assert self.storage != "numpy" njc = self._storage_nonjson_children child_id = id(child) if child_id in njc: assert self.storage == "mixed", self.storage njc.remove(child_id) if len(njc) == 0: self.storage = "json" parent = self._parent() if parent is not None: parent()._remove_nonjson_child(self) def numpy_shatter(self): """ Breaks up a unified numpy storage into one numpy storage per child """ assert self.storage == "numpy" parent = self._parent() if parent is not None and parent.storage == "numpy": parent.numpy_shatter() data = {} children = {} for prop in self._props: p = self._props[prop] if p["elementary"]: value = getattr(self, prop) if value is not None: if "typeclass" in p: t = p["typeclass"] else: typename = p["typename"] t = typenames._silk_types[typename] value = t(value) data[prop] = value else: child = self._get_child(prop) d = datacopy(child._data) data[prop] = d child._data = d children[prop] = child self._data = data self._children = children self._storage_nonjson_children = set([id(p) for p in children.values()]) self.storage = "mixed" def _construct(self, prop_setter, *args, **kwargs): propdict = {} if len(args) > len(self._positional_args): message = "{0}() takes {1} positional arguments \ but {2} were given".format( self.__class__.__name__, len(self._positional_args), len(args) ) raise TypeError(message) for anr, a in enumerate(args): propdict[self._positional_args[anr]] = a for argname, a in kwargs.items(): if argname in propdict: message = "{0}() got multiple values for argument '{1}'" message = message.format( self.__class__.__name__, argname ) raise TypeError(message) propdict[argname] = a missing = [p for p in self._props if p not in propdict] missing_required = [p for p in missing if not self._props[p]["optional"] and p not in self._props_init] if missing_required: missing_required = ["'{0}'".format(p) for p in missing_required] if len(missing_required) == 1: plural = "" missing_txt = missing_required[0] elif len(missing_required) == 2: plural = "s" missing_txt = missing_required[0] + " and " + \ missing_required[1] else: plural = "s" missing_txt = ", ".join(missing_required[:-1]) + \ ", and " + missing_required[-1] message = "{0}() missing {1} positional argument{2}: {3}".format( self.__class__.__name__, len(missing_required), plural, missing_txt ) raise TypeError(message) for propname in self._props: value = propdict.get(propname, None) if value is None and propname in self._props_init: value = self._props_init[propname] self._set_prop(propname, value, prop_setter) def _parse(self, s): raise NotImplementedError # can be user-defined _storage_names = ("numpy", "json", "mixed") @property def storage(self): return self._storage_names[self._storage_enum] @storage.setter def storage(self, storage): assert storage in self._storage_names, storage self._storage_enum = self._storage_names.index(storage) def __dir__(self): return dir(type(self)) def __setattr__(self, attr, value): if attr.startswith("_") or attr == "storage": object.__setattr__(self, attr, value) else: self._set_prop(attr, value, _prop_setter_any) def _set_prop(self, prop, value, child_prop_setter): try: p = self._props[prop] except KeyError: raise AttributeError(prop) if value is None and not p["optional"]: raise TypeError("'%s' cannot be None" % prop) ele = p["elementary"] if ele: if self.storage == "numpy": _set_numpy_ele_prop(self, prop, value) else: if value is not None: if "typeclass" in p: t = p["typeclass"] else: typename = p["typename"] t = typenames._silk_types[typename] value = t(value) self._data[prop] = value else: child = self._get_child(prop) do_set = True if child is NoneChild: if value is None: do_set = False else: child = self._get_child(prop, force=True) if do_set: if self.storage == "numpy" and p.get("var_array", False): child.set(value) else: child_prop_setter(child, value) if self.storage == "numpy" and p["optional"]: self._data["HAS_"+prop] = (value is not None) def __getattribute__(self, attr): value = object.__getattribute__(self, attr) if attr.startswith("_") or attr in ("storage", "dtype"): return value class_value = getattr(type(self), attr) if value is class_value: raise AttributeError(value) return value def __getattr__(self, attr): try: ele = self._props[attr]["elementary"] except KeyError: raise AttributeError(attr) from None if ele: if self.storage == "numpy": ret = _get_numpy_ele_prop(self, attr) else: ret = self._data.get(attr, None) if ret is None: assert self._props[attr]["optional"] else: ret = self._get_child(attr) if ret._is_none: ret = None return ret def _print(self, spaces): name = "" if not self._anonymous: name = self.__class__.__name__ + " " ret = "{0}(\n".format(name) for propname in self._props: prop = self._props[propname] value = getattr(self, propname) if prop["optional"]: if value is None: continue if self.storage == "numpy" and prop["elementary"]: substr = value if self._data[propname].dtype.kind == 'S': substr = '"' + value + '"' else: substr = str(value) else: substr = value._print(spaces+2) ret += "{0}{1} = {2},\n".format(" " * (spaces+2), propname, substr) ret += "{0})".format(" " * spaces) return ret def __str__(self): return self._print(0) def __repr__(self): return self._print(0) def __eq__(self, other): if not isinstance(other, SilkObject): return False if self.storage == other.storage == "json": return self._data == other._data else: #can't use numpy _data because of PTR and different allocation sizes return self.json() == other.json() def _clear_data(self): d = self._data if self.storage == "numpy": d.fill(np.zeros_like(d)) else: for propname in self._props: prop = self._props[propname] if prop["elementary"]: if propname in d: d.pop(propname) else: child = self._get_child(propname) child._clear_data()
class TaskManager(object): """ Provides a set of tools to maintain a list of asyncio Tasks that are to be executed during the lifetime of an arbitrary object, usually getting killed with it. """ def __init__(self): self._pending_tasks = WeakValueDictionary() self._task_lock = RLock() self._shutdown = False self._counter = 0 self._logger = logging.getLogger(self.__class__.__name__) self._checker = self.register_task('_check_tasks', self._check_tasks, interval=MAX_TASK_AGE, delay=MAX_TASK_AGE * 1.5) def _check_tasks(self): now = time.time() for name, task in self._pending_tasks.items(): if not task.interval and now - task.start_time > MAX_TASK_AGE: self._logger.warning( 'Non-interval task "%s" has been running for %.2f!', name, now - task.start_time) def replace_task(self, name, *args, **kwargs): """ Replace named task with the new one, cancelling the old one in the process. """ new_task = Future() def cancel_cb(_): try: new_task.set_result(self.register_task(name, *args, **kwargs)) except Exception as e: new_task.set_exception(e) old_task = self.cancel_pending_task(name) old_task.add_done_callback(cancel_cb) return new_task def register_task(self, name, task, *args, delay=None, interval=None, ignore=()): """ Register a Task/(coroutine)function so it can be canceled at shutdown time or by name. """ if not isinstance( task, Task) and not iscoroutinefunction(task) and not callable(task): raise ValueError( 'Register_task takes a Task or a (coroutine)function as a parameter' ) if (interval or delay) and isinstance(task, Task): raise ValueError('Cannot run Task at an interval or with a delay') if not isinstance(ignore, tuple) or not all( (issubclass(e, Exception) for e in ignore)): raise ValueError('Ignore should be a tuple of Exceptions or None') with self._task_lock: if self._shutdown: self._logger.warning("Not adding task %s due to shutdown!", str(task)) if isinstance(task, (Task, Future)): if not task.done(): task.cancel() return task if self.is_pending_task_active(name): raise RuntimeError("Task already exists: '%s'" % name) if iscoroutinefunction(task) or callable(task): task = task if iscoroutinefunction(task) else coroutine(task) if interval: # The default delay for looping calls is the same as the interval delay = interval if delay is None else delay task = ensure_future( interval_runner(delay, interval, task, *args)) elif delay: task = ensure_future(delay_runner(delay, task, *args)) else: task = ensure_future(task(*args)) # Since weak references to list/tuple are not allowed, we're not storing start_time/interval # in _pending_tasks. Instead we add them as attributes to the task. task.start_time = time.time() task.interval = interval assert isinstance(task, Task) def done_cb(future): self._pending_tasks.pop(name, None) try: future.result() except CancelledError: pass except ignore as e: self._logger.error('Task resulted in error: %s', e) self._pending_tasks[name] = task task.add_done_callback(done_cb) return task def register_anonymous_task(self, basename, task, *args, **kwargs): """ Wrapper for register_task to derive a unique name from the basename. """ self._counter += 1 return self.register_task(basename + ' ' + str(self._counter), task, *args, **kwargs) def cancel_pending_task(self, name): """ Cancels the named task """ with self._task_lock: task = self._pending_tasks.get(name, None) if not task: return succeed(None) if not task.done(): task.cancel() self._pending_tasks.pop(name, None) return task def cancel_all_pending_tasks(self): """ Cancels all the registered tasks. This usually should be called when stopping or destroying the object so no tasks are left floating around. """ with self._task_lock: assert all([ isinstance(t, (Task, Future)) for t in self._pending_tasks.values() ]), self._pending_tasks return [ self.cancel_pending_task(name) for name in list(self._pending_tasks.keys()) ] def is_pending_task_active(self, name): """ Return a boolean determining if a task is active. """ with self._task_lock: task = self._pending_tasks.get(name, None) return not task.done() if task else False def get_tasks(self): """ Returns a list of all registered tasks, excluding tasks the are created by the TaskManager itself. """ with self._task_lock: return [ t for t in self._pending_tasks.values() if t != self._checker ] async def wait_for_tasks(self): """ Waits until all registered tasks are done. """ with self._task_lock: tasks = self.get_tasks() if tasks: await gather(*tasks, return_exceptions=True) async def shutdown_task_manager(self): """ Clear the task manager, cancel all pending tasks and disallow new tasks being added. """ with self._task_lock: self._shutdown = True tasks = self.cancel_all_pending_tasks() if tasks: with suppress(CancelledError): await gather(*tasks)
class SilkArray(SilkObject): _element = None dtype = None _elementary = None _arity = None __slots__ = [ "_parent", "_storage_enum", "_storage_nonjson_children", "_data", "_children", "_Len", "_is_none", "__weakref__" ] def __init__(self, *args, _mode="any", **kwargs): self._storage_enum = None self._storage_nonjson_children = set() self._children = None if _mode == "parent": self._init( kwargs["parent"], kwargs["storage"], kwargs["data_store"], kwargs["len_data_store"], ) elif _mode == "from_numpy": assert "parent" not in kwargs self._init( None, "numpy", kwargs["data_store"], kwargs["len_data_store"], ) else: assert "parent" not in kwargs assert "storage" not in kwargs assert "data_store" not in kwargs self._init(None, "json", None, None) if _mode == "any": self.set(*args) elif _mode == "empty": pass elif _mode == "from_json": self.set(*args, prop_setter=_prop_setter_json, **kwargs) else: raise ValueError(_mode) @property def _len(self): return int(self._Len[0]) @_len.setter def _len(self, value): self._Len[0] = value def _init(self, parent, storage, data_store, len_data_store): if parent is not None: if storage == "numpy": self._parent = lambda: parent # hard ref else: self._parent = weakref.ref(parent) else: self._parent = lambda: None self.storage = storage self._is_none = False self._storage_nonjson_children.clear() if self._children is not None: for child in self._children: child._parent = lambda: None if storage == "json": self._children = [] if data_store is None: data_store = [] self._data = data_store self._Len = [0] elif storage == "numpy": self._children = WeakValueDictionary() assert data_store is not None assert len_data_store is not None assert len(len_data_store), len_data_store dtype = np.dtype(self.dtype, align=True) assert data_store.dtype == dtype self._data = data_store self._Len = len_data_store return else: raise ValueError(storage) assert storage == "json" for n in range(len(self._data)): if n > len(data_store): if issubclass(self._element, SilkArray): self._data.append([]) else: self._data.append({}) if not self._elementary: child = self._element( _mode="parent", storage="json", parent=self, data_store=self._data[n], len_data_store=None, ) self._children.append(child) self._len = len(self._data) def copy(self, storage="json"): """Returns a copy with the storage in the specified format""" cls = type(self) if storage == "json": json = self.json() return cls.from_json(json) elif storage == "numpy": numpydata = self.numpy() lengths = self.lengths() return cls.from_numpy(numpydata, lengths, copy=False) else: raise ValueError(storage) @classmethod def from_json(cls, data): data = _filter_json(data) return cls(data, _mode="from_json") @classmethod def _check_numpy_args(cls, arr, lengths, length_can_be_none, self_data): if self_data is not None: d = self_data if len(arr.shape) != len(d.shape) or arr.dtype != d.dtype: err = TypeError((len(arr.shape), len(d.shape), arr.dtype, d.dtype)) raise err if len(arr.shape) != cls._arity: raise TypeError("Array must be %d-dimensional" % cls._arity) if arr.dtype != np.dtype(cls.dtype,align=True): raise TypeError("Array has the wrong dtype") if lengths is None and length_can_be_none: return assert lengths.dtype == np.uint32 lenarray_shape = (_get_lenarray_size(arr.shape),) if lengths.shape != lenarray_shape: err = TypeError((lengths.shape, lenarray_shape, arr.shape)) raise err @classmethod def from_numpy(cls, arr, lengths=None, *, copy=True, validate=True): """Constructs from a numpy array "arr" "lengths": The lengths of the array elements If not specified, it is assumed that "arr" is unpadded, i.e. that all elements have a valid value """ if isinstance(arr, tuple) and len(arr) == 2 and \ isinstance(arr[0], np.ndarray) and isinstance(arr[1], np.ndarray): return cls.from_numpy(arr[0], arr[1], copy=copy, validate=validate ) cls._check_numpy_args(arr, lengths, length_can_be_none=True, self_data=None) if copy: arr = datacopy(arr) if lengths is None: lengths = _get_lenarray_full(arr.shape) ret = cls(_mode="from_numpy", data_store=arr,len_data_store=lengths) if validate: ret.validate() return ret @classmethod def empty(cls): return cls(_mode="empty") def _get_child(self, childnr): if not isinstance(childnr, int): raise TypeError(childnr) if childnr < 0: childnr += self._len if childnr < 0 or childnr >= self._len: raise IndexError(childnr) from .silkarray import SilkArray if self.storage == "numpy": child = self._element ( _mode = "parent", parent = self, storage = "numpy", data_store = self._data[childnr], len_data_store = self._get_child_lengths(childnr) ) self._children[childnr] = child return self._children[childnr] def _get_children(self): if self.storage == "numpy": for n in range(self._len): yield self._get_child(n) else: for child in self._children: yield child def set(self, *args, prop_setter=_prop_setter_any): if len(args) == 1: if args[0] is None: self._is_none = True self._len = 0 self._clear_data() return # TODO: make a nice composite exception that stores all exceptions try: if self.storage == "numpy" and \ len(args) == 1 and len(kwargs) == 0 and \ isinstance(args[0], np.ndarray): self._construct_from_numpy(args[0],lengths=None) else: raise TypeError("Not a numpy array") except Exception: try: keep_trying = True ok = False if len(args) == 1: a = args[0] if isinstance(a, str): self._parse(a) elif isinstance(a, SilkArray): if a.storage == "numpy": if isinstance(a, type(self)): keep_trying = False self._construct_from_numpy(a._data, a._Len) else: self._construct(prop_setter, a.json(), prop_setter=_prop_setter_json) else: self._construct(prop_setter, *a) elif isinstance(a, collections.Iterable) or isinstance(a, np.void): self._construct(prop_setter, *a) else: raise TypeError(a) else: raise TypeError(args) ok = True except Exception: if not ok: if not keep_trying: raise try: self._construct(prop_setter, *args) except Exception: raise self.validate() self._is_none = False def validate(self): pass def json(self): """Returns a JSON representation of the Silk object """ if self.storage == "json": return _filter_json(self._data) if self._elementary: return [dd for dd in self._data] else: d = [] for child in self._get_children(): dd = child.json() d.append(dd) return d def numpy(self): """Returns a numpy representation of the Silk object NOTE: for all numpy arrays, the entire storage buffer is returned, including (zeroed) elements if the data is not present! the length of each array is stored in the LEN_xxx field TODO: document multidimensional length vector, PTR_LEN_xxx TODO: add and document SHAPE field """ if self.storage == "numpy": return datacopy(self._data) new_obj = self.copy("json") return new_obj.make_numpy() def make_json(self): if self.storage == "json": return self._data elif self.storage == "numpy": json = _filter_json(self.json(), self) parent = self._parent() if parent is not None and parent.storage == "numpy": parent.numpy_shatter() self._init(parent, "json", None, None) self.set(json, prop_setter=_prop_setter_json) if parent is not None: parent._remove_nonjson_child(self) myname = parent._find_child(id(self)) parent._data[myname] = self._data return self._data elif self.storage == "mixed": for child_id in list(self._storage_nonjson_children): # copy! for child in self._get_children(): if id(child) == child_id: child.make_json() break else: raise Exception("Cannot find child that was marked as 'non-JSON'") # Above will automatically update storage status to "json" assert self.storage == "json" return self._data def _get_outer_shape(self): shape = [len(self)] d = self for n in range(1, self._arity): maxlen = max([len(dd) for dd in d]) shape.append(maxlen) d2 = [] for dd in d: for ddd in dd: d2.append(ddd) d = d2 return shape def _get_child_lengths(self, child): if self.storage != "numpy": return None if self._arity == 1: return None child_size = _get_lenarray_size(self._data.shape[1:]) start = 1 + child_size * child assert start+child_size <= len(self._Len) return self._Len[start:start+child_size] def _del_child_lengths(self, child): if self.storage != "numpy": return if self._arity == 1: return size = _get_lenarray_size(self._data.shape[1:]) offset = 1 + size * child lsize = len(self._Len) self._Len[offset:lsize-size] = self._Len[offset+size:lsize] self._Len[lsize-size:] = 0 for n in range(child+1, len(self._children)): c_offset = 1 + size * n c = self._children[n] c._Len = self._Len[c_offset:c_offset+size] def _insert_child_lengths(self, child, child_lengths): if self.storage != "numpy": assert child_lengths is None return if self._arity == 1: assert child_lengths is None return assert child_lengths is not None size = _get_lenarray_size(self._data.shape[1:]) offset = 1 + size * child lsize = len(self._Len) self._Len[offset+size:lsize] = self._Len[offset:lsize-size] self._Len[offset:offset+size] = child_lengths for n in range(child, len(self._children)): c_offset = 1 + size * (n+1) c = self._children[n] c._Len = self._Len[c_offset:c_offset+size] def _restore_array_coupling(self, data=None, myname=None): """ Array members have their length vector stored in the parent data In addition, var_arrays have a pointer to their data stored If the parent data gets reallocated or copied, then this information gets decoupled, so it must be restored """ assert self.storage == "numpy" if data is None: parent = self._parent() if parent is None: return if parent.storage != "numpy": return myname = parent._find_child(id(self)) if not isinstance(parent, SilkArray) and \ parent._props[myname].get("var_array", False): data = parent._data assert data is not None if data is not None: assert myname is not None data[myname] = self._data data["PTR_"+myname] = self._data.ctypes.data data["LEN_"+myname] = self._Len.copy() self._Len = data["LEN_"+myname] if self._arity > 1: data["SHAPE_"+myname] = self._data.shape data["PTR_LEN_"+myname] = self._Len.ctypes.data def make_numpy(self,_toplevel=None): """Sets the internal storage to 'numpy' Returns the numpy array that is used as internal storage buffer NOTE: for optional members, the entire storage buffer is returned, including (zeroed) elements if the data is not present! an extra field "HAS_xxx" indicates if the data is present. TODO: update doc NOTE: for numpy array members of variable shape, an extra field "PTR_xxx" contains a C pointer to the data For this, the dimensionality of the array does not matter, e.g. both for IntegerArray and IntegerArrayArray, the C pointer will be "int *" and both for MyStructArray and MyStructArrayArray, the C pointer will be "MyStruct *" """ from .silkarray import SilkArray if self.storage == "numpy": return self._data dtype = np.dtype(self.dtype, align=True) shape = self._get_outer_shape() data = np.zeros(dtype=dtype, shape=shape) lengths = _get_lenarray_empty(shape) lengths[0] = len(self) if self._elementary: self._set_numpy_ele_range(self, 0, len(self._data), self._data, self._arity, data) else: for childnr, child in enumerate(self._get_children()): child.make_numpy(_toplevel=False) if self._arity > 1: slices = [slice(0,v) for v in child._data.shape] data[childnr][slices] = child._data else: try: data[childnr] = child._data except ValueError: #numpy bug for field in child._data.dtype.names: data[childnr][field] = child._data[field] if self._arity > 1: child_size = _get_lenarray_size(shape[1:]) start = 1 + child_size * childnr arr1 = lengths[start:start+child_size] shape1 = data.shape[1:] arr2 = child._Len shape2 = child._data.shape _lenarray_copypad(arr1, shape1, arr2, shape2) self._init(self._parent(), "numpy", data, lengths) parent = self._parent() if parent is not None: if parent.storage != "numpy": parent._add_nonjson_child(self) for child in self._get_children(): child._restore_array_coupling() return data def lengths(self): assert self.storage == "numpy" return self._Len def realloc(self, *shape): assert self.storage == "numpy" if len(shape) == 1 and isinstance(shape[0], tuple): shape = shape[0] parent = self._parent() if parent is not None: myname = parent._find_child(id(self)) if parent.storage == "numpy": if not parent._props[myname].get("var_array", False): raise Exception("Cannot reallocate numpy array that is\ part of a larger numpy buffer. Use numpy_shatter() on the parent to allow\ reallocation") if len(shape) != self._arity: msg = "Shape must have %d dimensions, not %d" raise ValueError(msg % (self._arity, len(shape))) min_shape = self._data.shape for n in range(self._arity): msg = "Dimension %d: shape must have at least length %d, not %d" if min_shape[n] > shape[n]: raise ValueError(msg % (n+1, min_shape[n], shape[n])) old_data = self._data old_len = self._Len self._data = np.zeros(dtype=self.dtype, shape=shape) slices = [slice(0,s) for s in min_shape] self._data[slices] = old_data self._Len = _get_lenarray_empty(shape) _lenarray_copypad(self._Len, shape, old_len, old_data.shape) self._init(parent, "numpy", self._data, self._Len) self._restore_array_coupling() def _find_child(self, child_id): if self.storage == "numpy": for childname, ch in self._children.items(): if child_id == id(ch): return childname else: for childname, ch in enumerate(self._children): if child_id == id(ch): return childname raise KeyError def _add_nonjson_child(self, child): assert self.storage != "numpy" njc = self._storage_nonjson_children child_id = id(child) if child_id not in njc: njc.add(child_id) if self.storage == "json": self.storage = "mixed" parent = self._parent() if parent is not None: parent._add_nonjson_child(self) def _remove_nonjson_child(self, child): assert self.storage != "numpy" njc = self._storage_nonjson_children child_id = id(child) if child_id in njc: assert self.storage == "mixed", self.storage njc.remove(child_id) if len(njc) == 0: self.storage = "json" parent = self._parent() if parent is not None: parent()._remove_nonjson_child(self) def numpy_shatter(self): """ Breaks up a unified numpy storage into one numpy storage per child """ assert self.storage == "numpy" assert not self._elementary parent = self._parent() if parent is not None and parent.storage == "numpy": parent.numpy_shatter() data = [] children = [] for child in self._get_children(): d = datacopy(child._data) data.append(d) child._data = d children.append(child) self._data = data self._children = children self._storage_nonjson_children = set([id(p) for p in children]) self.storage = "mixed" def _construct(self, prop_setter, *args): old_data = self._data old_children = self._children with _ArrayConstructContext(self): if self.storage == "numpy": if len(args) > len(self._data): msg = "index {0} is out of bounds for axis with size {1}"\ .format(len(args), len(data)) raise IndexError(msg) if self._elementary: _set_numpy_ele_range(self, 0, len(args), args, self._arity) else: for anr, a in enumerate(args): child = self._get_child(anr) child.set(args[anr],prop_setter=prop_setter) else: if self._elementary: newdata = [] for anr, a in enumerate(args): v = self._element(a) newdata.append(v) self._data[:] = newdata else: for n in range(self._len, len(args)): if issubclass(self._element, SilkArray): self._data.append([]) else: self._data.append({}) child = self._element( _mode="parent", storage=self.storage, parent=self, data_store=self._data[n], len_data_store=self._get_child_lengths(n) ) self._children.append(child) for n in range(len(args)): child = self._children[n] child.set(args[n], prop_setter=prop_setter) if len(args) < self._len: self._children[:] = self._children[:len(args)] if self.storage == "numpy": self._data[len(args):] = \ np.zeros_like(self._data[len(args):]) else: self._data[:] = self._data[:len(args)] self._len = len(args) def _construct_from_numpy(self, arr, lengths): if self.storage != "numpy": self._init(self._parent(), "numpy", arr, lengths) self.make_json() return self._check_numpy_args(arr, lengths, self_data=self._data, length_can_be_none=False) if lengths is None: lengths = _get_lenarray_full(arr.shape) self._data = datacopy(arr) self._Len = lengths.copy() self._restore_array_coupling() def _parse(self, s): raise NotImplementedError # can be user-defined _storage_names = ("numpy", "json", "mixed") @property def storage(self): return self._storage_names[self._storage_enum] @storage.setter def storage(self, storage): assert storage in self._storage_names, storage self._storage_enum = self._storage_names.index(storage) def __dir__(self): return dir(type(self)) def __setattr__(self, attr, value): if attr.startswith("_") or attr == "storage": object.__setattr__(self, attr, value) else: self._set_prop(attr, value, _prop_setter_any) def __getitem__(self, item): if isinstance(item, slice): return type(self)([self[v] for v in range(*item.indices(len(self)))]) if not isinstance(item, int): msg = "{0} indices must be integers or slices, not {1}" raise TypeError(msg.format(self.__class__.__name__, item.__class__.__name__)) if self._elementary: if self.storage == "numpy": return _get_numpy_ele_prop(self, item, self._len) else: return self._data[:self._len][item] else: return self._get_child(item) def _set_prop(self, item, value, prop_setter=_prop_setter_any): if self._elementary: if self.storage == "numpy": _set_numpy_ele_prop(self, item, value) else: if item < 0: item = self._len - item elif item >= self._len: raise IndexError(item) self._data[item] = self._element(value) else: child = self._get_child(item) child.set(value,prop_setter=prop_setter) def __setitem__(self, item, value): if isinstance(item, slice): start, stop, stride = item.indices(self._len) indices = list(range(start, stop, stride)) if len(indices) != len(value): msg = "Cannot assign to a slice of length %d using \ a sequence of length %d" raise IndexError(msg % (len(indices), len(value))) for n in indices: self._set_prop(n, value[n]) return elif isinstance(item, int): self._set_prop(item, value) else: msg = "{0} indices must be integers or slices, not {1}" raise TypeError(msg.format(self.__class__.__name__, item.__class__.__name__)) def __delitem__(self, item): if isinstance(item, slice): start, stop, stride = item.indices(self._len) indices = list(range(start, stop, stride)) for n in reversed(indices): self.pop(n) return if not isinstance(item, int): msg = "{0} indices must be integers or slices, not {1}" raise TypeError(msg.format(self.__class__.__name__, item.__class__.__name__)) self.pop(item) def pop(self, index=-1): if not isinstance(index, int): msg = "{0} indices must be integers, not {1}" raise TypeError(msg.format(self.__class__.__name__, index.__class__.__name__)) if index < 0: index += self._len if index < 0: raise IndexError if self.storage == "numpy": ret_data = datacopy(self._data[index]) ret_lengths = None if self._arity > 1: ret_lengths = _get_lenarray_empty(ret_data.shape) ret = self._element( _mode="from_numpy", data_store=ret_data, len_data_store=ret_lengths, ) self._data[index:self._len-1] = self._data[index+1:self._len] try: self._data[self._len-1] = np.zeros_like(self._data[self._len-1]) except ValueError: # numpy bug for field in self._data.dtype.fields: self._data[self._len-1][field] = np.zeros_like(self._data[self._len-1][field]) self._del_child_lengths(index) elif self._elementary: ret = self._data[:self._len][index] self._data.__delitem__(index) else: ret = self._children[:self._len][index].copy() self._children.__delitem__(index) self._data.__delitem__(index) self._len -= 1 return ret def _Len__(self): return self._len def _print(self, spaces): ret = "{0} (\n".format(self.__class__.__name__) for n in range(self._len): if self._elementary: value = self._data[n] if self.storage == "numpy": if value.dtype.kind == 'S': substr = '"' + value.decode() + '"' else: substr = str(value) else: substr = value._print(spaces+2) else: value = self._get_child(n) substr = value._print(spaces+2) ret += "{0}{1},\n".format(" " * (spaces+2), substr) ret += "{0})".format(" " * spaces) return ret def __str__(self): return self._print(0) def __repr__(self): return self._print(0) def clear(self): self.set([]) def append(self, item): self.insert(self._len, item) def insert(self, index, item): if not isinstance(index, int): msg = "{0} indices must be integers, not {1}" raise TypeError(msg.format(self.__class__.__name__, index.__class__.__name__)) if index < 0: index += self._len if index < 0: raise IndexError if self.storage == "numpy": if self._len >= len(self._data): raise IndexError("Numpy array overflows allocated space") if not self._elementary: ele = self._element(item) child_data = ele.make_numpy() child_lengths = None if self._arity > 1: child_lengths = ele.lengths() self._data[index+1:self._len+1] = self._data[index:self._len] if self._arity > 1: slices = [slice(0,v) for v in child_data.shape] self._data[index][slices] = child_data else: self._data[index] = child_data self._insert_child_lengths(index, child_lengths) else: self._data[self._len] = item # dry run self._data[index+1:self._len+1] = self._data[index:self._len] self._data[index] = item # should give no exception now self._len += 1 else: with _ArrayInsertContext(self, index): if self._elementary: self._data[index] = item else: child = self._element( _mode="parent", storage=self.storage, parent=self, data_store=self._data[index], len_data_store=self._get_child_lengths(index) ) self._children.insert(index, child) child.set(item) def __eq__(self, other): if not isinstance(other, SilkArray): return False if self.storage == other.storage == "json": return self._data == other._data else: #can't use numpy _data because of PTR and different allocation sizes return self.json() == other.json() def __len__(self): return self._len def _clear_data(self): d = self._data if self.storage == "numpy": d[:] = np.zeros_like(d) else: for child in self._get_children(): child._clear_data()
class PersistentDict(object): """ Mapping object that is persistently stored :param store_uri: URI for storing buckets; see :py:class:`~BaseBucketStore` :type store_uri: :py:class:`str` :param bucket_count: number of buckets to use for storing data :type bucket_count: :py:class:`int` :param bucket_salt: salt for finding buckets to store data :type bucket_salt: :py:class:`int` :param cache_size: number of buckets to LRU-cache in memory :type cache_size: :py:class:`int` :param cache_keys: whether to cache all keys in memory :type cache_keys: :py:class:`bool` """ persistent_defaults = { 'bucket_count': 32, 'bucket_salt': 0, } def __init__(self, store_uri, bucket_count=NOTSET, bucket_salt=NOTSET, cache_size=3, cache_keys=True): self._bucket_store = BaseBucketStore.from_uri(store_uri=store_uri, default_scheme='file') # set empty fields self._bucket_count = None self._bucket_salt = None self._bucket_keys = set() self.bucket_key_fmt = None self._keys_cache = None self._bucket_cache = None self._cache_size = None # load current settings try: for attr, value in self._bucket_store.fetch_head().items(): setattr(self, attr, value) self._update_bucket_key_fmt() except BucketNotFound: pass # apply new settings self.bucket_count = bucket_count self.bucket_salt = bucket_salt # LRU store for objects fetched from disk self.cache_size = cache_size # weakref store for objects still in use self._active_buckets = WeakValueDictionary() self._active_items = WeakValueDictionary() # store new settings self._store_head() # cache keys in memory self.cache_keys = cache_keys @property def store_uri(self): return self._bucket_store.store_uri # Settings def _store_head(self): """ Store the meta-information of the dict """ self._bucket_store.store_head({ attr: getattr(self, attr) for attr in # work directly on internal values, setters are called as part of init for finalization ('_bucket_count', '_bucket_salt', '_bucket_keys') }) def _bucket_fmt_digits(self, bucket_count=None): """Return the number of hex digits required for the bucket name""" bucket_count = bucket_count or self._bucket_count return max(int(math.ceil(math.log(bucket_count, 16))), 1) # exposed settings @property def cache_size(self): return self._cache_size @cache_size.setter def cache_size(self, value): self._cache_size = int(value or 1) self._bucket_cache = deque(maxlen=self.cache_size) @property def bucket_salt(self): """ Get/Set the ``bucket_salt`` of the persistent mapping :note: Setting ``bucket_salt`` causes **all** buckets storing data to be recreated. Until the new buckets have been created, changes to the mapping content may be silently dropped. """ return self._bucket_salt @bucket_salt.setter def bucket_salt(self, value): # default if unset if value == NOTSET: if self._bucket_salt is not None: return self._bucket_salt = self.persistent_defaults['bucket_salt'] else: value = int(value) # no change if self._bucket_salt == value: return # uninitialized, we don't have content yet elif self._bucket_salt is None: self._bucket_salt = value # TODO: allow resalting backend else: raise NotImplementedError('Changing bucket salt not implemented yet') self._update_bucket_key_fmt() @property def bucket_count(self): """ Get/Set the ``bucket_count`` of the persistent mapping :note: Setting ``bucket_count`` causes **all** buckets storing data to be recreated. Until the new buckets have been created, changes to the mapping content may be silently dropped. """ return self._bucket_count @bucket_count.setter def bucket_count(self, value): # default if unset if value == NOTSET: if self._bucket_count is not None: return self._bucket_count = self.persistent_defaults['bucket_count'] else: value = int(value) if value < 1: raise ValueError('At least one bucket must be used') # no change elif self._bucket_count == value: return # uninitialized, we don't have content yet elif self._bucket_count is None: self._bucket_count = value # TODO: allow resizing backend else: raise NotImplementedError('Changing bucket count not implemented yet') # apply secondary settings self._update_bucket_key_fmt() @property def cache_keys(self): return self._keys_cache is not None @cache_keys.setter def cache_keys(self, value): if value and self._keys_cache is None: # switch on self._keys_cache = set(self.keys()) elif not value and self._keys_cache is not None: # switch off self._keys_cache = None def _update_bucket_key_fmt(self): # key: count, salt, index self.bucket_key_fmt = "pdictbkt_%(bucket_count)x%(bucket_salt)s%%0%(index_digits)dx" % { 'bucket_count': self.bucket_count, 'bucket_salt': HASHKEY_HEXFMT % hashkey(self.bucket_salt, self.bucket_salt), 'index_digits': self._bucket_fmt_digits(), } # bucket management def _bucket_key(self, key): """ Create the bucket identifier for a given key :param key: key to the content in-memory :return: key to the bucket stored persistently :rtype: str """ return self.bucket_key_fmt % (hashkey(key) % self._bucket_count) def _fetch_bucket(self, bucket_key): """ Return a bucket from disk or create a new one :param bucket_key: key for the bucket :return: bucket for ``bucket_key`` :rtype: :py:class:`~DictBucket` """ try: bucket = self._bucket_store.fetch_bucket(bucket_key=bucket_key) except BucketNotFound: bucket = DictBucket() self._active_buckets[bucket_key] = bucket self._bucket_cache.appendleft(bucket) return bucket def _get_bucket(self, bucket_key): """ Return the appropriate bucket May return the cached bucket if available. :param bucket_key: key for the bucket :return: bucket for ``bucket_key`` :rtype: :py:class:`~DictBucket` """ try: return self._active_buckets[bucket_key] except KeyError: return self._fetch_bucket(bucket_key) def _store_bucket(self, bucket_key, bucket=None): """ Store a bucket on disk :param bucket_key: key for the entire bucket """ if bucket is None: try: bucket = self._active_buckets[bucket_key] except KeyError: return if bucket: self._bucket_store.store_bucket(bucket_key=bucket_key, bucket=bucket) self._add_bucket_key(bucket_key) # free empty buckets else: self._bucket_store.free_bucket(bucket_key) self._discard_bucket_key(bucket_key) def _add_bucket_key(self, bucket_key): if bucket_key not in self._bucket_keys: self._bucket_keys.add(bucket_key) self._store_head() def _discard_bucket_key(self, bucket_key): if bucket_key in self._bucket_keys: self._bucket_keys.remove(bucket_key) self._store_head() # cache management # Item cache def _set_cached_item(self, key, item): """Cache reference to existing item""" try: self._active_items[key] = item except TypeError: pass def _get_cached_item(self, key): """Get reference to existing item; raises KeyError if item cannot be fetched""" try: return self._active_items[key] except TypeError: raise KeyError def _del_cached_item(self, key): """Release reference to existing item""" try: del self._active_items[key] except (TypeError, KeyError): pass # paths and files def flush(self): """ Commit all outstanding changes to persistent store """ for bucket_key, bucket in self._active_buckets.items(): self._store_bucket(bucket_key, bucket) # dictionary interface def __getitem__(self, key): # - use cached reference to existing item # - fetch item from cached reference to existing bucket # - fetch item from fetched bucket try: return self._get_cached_item(key) except KeyError: bucket = self._get_bucket(self._bucket_key(key)) item = bucket[key] self._set_cached_item(key, item) return item def __setitem__(self, key, value): bucket_key = self._bucket_key(key) bucket = self._get_bucket(bucket_key) bucket[key] = value self._store_bucket(bucket_key, bucket) if self._keys_cache is not None: self._keys_cache.add(key) # update item cache self._set_cached_item(key, value) def __delitem__(self, key): bucket_key = self._bucket_key(key) bucket = self._get_bucket(bucket_key) del bucket[key] self._store_bucket(bucket_key) if self._keys_cache is not None: self._keys_cache.discard(key) self._del_cached_item(key) # container protocol def __contains__(self, key): if self._keys_cache is not None: return key in self._keys_cache elif key in self._active_items: return True else: bucket = self._get_bucket(self._bucket_key(key)) return key in bucket def __len__(self): # try cached if self._keys_cache is not None: return len(self._keys_cache) # count each bucket, see 'keys' for iteration scheme read_buckets, length = set(), 0 # start with the buckets we have in memory for bucket_key in self._active_buckets.keys(): length += len(self._active_buckets[bucket_key]) read_buckets.add(bucket_key) # pull in remaining buckets for bucket_key in self._bucket_keys: if bucket_key not in read_buckets: length += len(self._fetch_bucket(bucket_key)) read_buckets.add(bucket_key) return length def __bool__(self): # can only have items if we have buckets return bool(self._bucket_keys) __nonzero__ = __bool__ def __eq__(self, other): # other is pdict, try some fast comparisons if isinstance(other, PersistentDict): # we are the same store if ( self._bucket_store == other._bucket_store and self.bucket_count == other.bucket_count and self.bucket_salt == other.bucket_salt ): return True # different keys, cannot be equal if self._keys_cache is not None and self._keys_cache != other._keys_cache: return False # not a mapping, cannot be equal elif not isinstance(other, abc.Mapping): return False # no fast path resolved... # try a not-quite slow path if len(self) // self.bucket_count <= self.cache_size: # we're probably in memory already, just rewrap content return self.copy() == other return all(other[key] == value for key, value in self.items()) def __ne__(self, other): return not self == other def __iter__(self): """:see: :py:meth:`~.PersistentDict.keys`""" read_buckets = set() # start with the buckets we have in memory for bucket_key in self._active_buckets.keys(): for item_key in self._active_buckets[bucket_key].keys(): yield item_key read_buckets.add(bucket_key) # pull in all buckets for bucket_key in self._bucket_keys: if bucket_key not in read_buckets: bucket = self._fetch_bucket(bucket_key) for item_key in bucket.keys(): yield item_key read_buckets.add(bucket_key) # dictionary methods def get(self, key, default=None): """ Return the value for key if key is in the dictionary, else default. If default is not given, it defaults to ``None``, so that this method never raises a :py:exc:`KeyError`. :param key: key to an item in the dictionary :param default: default to return if no item exists :raises KeyError: if no items exists and no default is given """ try: return self[key] except KeyError: return default def pop(self, key, default=NOTSET): """ If ``key`` is in the dictionary, remove it and return its value, else return ``default``. If ``default`` is not given and ``key`` is not in the dictionary, a KeyError is raised. :param key: key to an item in the dictionary :param default: default to return if no item exists :raises KeyError: if no items exists and no default is given """ try: item = self[key] del self[key] except KeyError: if default is NOTSET: raise item = default return item def popitem(self): """ Remove and return an arbitrary (key, value) pair from the dictionary. popitem() is useful to destructively iterate over a dictionary, as often used in set algorithms. If the dictionary is empty, calling popitem() raises a KeyError. :raises KeyError: if no items exists and no default is given """ try: key = next(iter(self)) except StopIteration: raise KeyError else: return key, self.pop(key) def setdefault(self, key, default=None): """ If key is in the dictionary, return its value. If not, insert key with a value of ``default`` and return ``default``. ``default`` defaults to ``None``. :param key: key to an item in the dictionary :param default: default to insert and return if no item exists """ try: return self[key] except KeyError: self[key] = default return default def clear(self): """Remove all items from the dictionary.""" # clear persistent storage for bucket_key in self._bucket_keys: self._bucket_store.free_bucket(bucket_key=bucket_key) self._bucket_keys = type(self._bucket_keys)() self._store_head() # reset caches self._bucket_cache = deque(maxlen=self.cache_size) self._active_buckets = type(self._active_buckets)() self._active_items = type(self._active_items)() self._keys_cache = None if self._keys_cache is None else type(self._keys_cache)() def update(self, other=None, **kwargs): """ Update the dictionary with the ``(key,value)`` pairs from other, overwriting existing keys. :py:meth:`~.PersistentDict.update` accepts either another dictionary object or an iterable of ``(key,value)`` pairs (as tuples or other iterables of length two). If keyword arguments are specified, the dictionary is then updated with those ``(key,value)`` pairs: ``d.update(red=1, blue=2)``. :param other: mapping or iterable of ``(key,value)`` pairs :param kwargs: ``key=value`` arguments to insert :return: None :note: This function is faster for large collections as changes are made per bucket, not per item. The drawback is a larger memory consumption as the entire input is sorted in memory. """ def updatebuckets(key_values): """ Commit entire buckets from key, value pairs :param key_values: iterable of ``(key, value)`` pairs """ # sort kvs by bucket key_values = sorted(key_values, key=lambda key_val: self._bucket_key(key_val[0])) # insert kvs by bucket last_bucket_key, bucket = None, None for key, value in key_values: bucket_key = self._bucket_key(key) # cycle to next bucket if current one is done if bucket_key != last_bucket_key: if last_bucket_key is not None: self._store_bucket(last_bucket_key) last_bucket_key = bucket_key bucket = self._get_bucket(bucket_key) # update bucket bucket[key] = value # update caches if self._keys_cache is not None: self._keys_cache.add(key) self._set_cached_item(key, value) # commit outstanding bucket, if any if last_bucket_key is not None: self._store_bucket(last_bucket_key) if other is not None: # mapping types if hasattr(other, "items"): # dictionary updatebuckets(other.items()) elif hasattr(other, "keys"): # partial dictionary updatebuckets((key, other[key]) for key in other.keys()) elif isinstance(other, abc.Mapping): updatebuckets((key, other[key]) for key in other) else: # sequence updatebuckets(other) updatebuckets(kwargs.items()) # iterations def keys(self): """ :__doc__: If ``d.cache_keys == True``, the view provides keys without access to the persistent backend, but in arbitrary order. This is likely to jump between persistent buckets. If ``d.cache_keys == False``, iteration is aligned to buckets - this is only an implementation detail and may change in the future. If you need aligned iteration, use ``for key in d`` or directly access :py:meth:`.items`. :note: See the note on iterator equivalency for :py:meth:`~.PersistentDict.items`. """ return PersistentDictKeysView(self) def items(self): """ :__doc__: This iterates over all keys in a semi-deterministic way. First, all keys from buckets cached in memory are returned. Following this, keys from the remaining buckets are returned. :note: Due to aligning keys to buckets, this function does not benefit from ``d.cache_keys == True``. :note: Since the state of the mapping also depends on accesses, the strict guarantee for iteration sequence equivalence given by ``dict`` is not replicated. Thus, it cannot be assumed that ``d.items() == zip(d.values(), d.keys()) == [(v, k) for (k, v) in d]`` holds true in any case. """ return PersistentDictItemsView(self) def values(self): """ :__doc__: :note: See the note on iterator equivalency for :py:meth:`~.PersistentDict.items`. """ return PersistentDictValuesView(self) # high level operations def copy(self): """ :__doc__: :note: This will return a ``dict``, not a :py:class:`~.PersistentDict`. """ return dict(self.items()) def __repr__(self): return "%s(bucket_store=%r, bucket_count=%r, cache_size=%r, cache_keys=%r, items={%s})" % ( self.__class__.__name__, self._bucket_store, self.bucket_count, self.cache_size, self._keys_cache is not None, self.__repr_content(), ) def __repr_content(self): # pragma: no cover reprs = [] read_keys = set() for bucket_key in self._active_buckets.keys(): try: bucket = self._active_buckets[bucket_key] if not bucket: continue reprs.append(repr(bucket)[1:-1]) read_keys.update(bucket.keys()) except KeyError: pass if self._keys_cache is None: reprs.append(", ...") elif self._keys_cache: cache_repr = ": <?>, ".join(repr(key) for key in self._keys_cache if key not in read_keys) if cache_repr: reprs.append(cache_repr + ": <?>") return ",".join(reprs)