Example #1
    def set_caching(self, caching):
        Set the caching mode for this store

        caching : :class:`openpathsampling.netcdfplus.Cache`

        if caching is None:
            caching = self.default_cache

        if caching is True:
            caching = MaxCache()
        elif caching is False:
            caching = NoCache()
        elif type(caching) is int:
            caching = WeakLRUCache(caching)

        if isinstance(caching, Cache):
            self.cache = caching.transfer(self.cache)
Example #2
class ObjectStore(StorableNamedObject):
    Base Class for storing complex objects in a netCDF4 file. It holds a
    reference to the store file.`

    content_class : :obj:`openpathsampling.netcdfplus.base.StorableObject`
        a reference to the class type to be stored using this Storage. Must be
        subclassed from :obj:`openpathsampling.netcdfplus.base.StorableObject`
    json : string
        if already computed a JSON Serialized string of the object
    cache : :py:class:`openpathsampling.netcdfplus.cache.Cache`
        a dictionary that holds references to all stored elements by index
        or string for named objects. This is only used for cached access
        if caching is not `False`. Must be of type
        :obj:`openpathsampling.netcdfplus.base.StorableObject` or subclassed.

    _restore_non_initial_attr = False

    allowed_types = [
        'int', 'float', 'long', 'str', 'bool', 'numpy.float32',
        'numpy.float64', 'numpy.int8', 'numpy.inf16', 'numpy.int32',
        'numpy.int64', 'numpy.uint8', 'numpy.uinf16', 'numpy.uint32',
        'numpy.uint64', 'index', 'length', 'uuid'

    default_store_chunk_size = 256

    _log_debug = False

    class DictDelegator(object):
        def __init__(self, store, dct):
            self.prefix = store.prefix + '_'
            self.dct = dct

        def __getitem__(self, item):
            return self.dct[self.prefix + item]

    def prefix_delegate(self, dct):
        return ObjectStore.DictDelegator(self, dct)

    default_cache = 10000

    def __init__(self, content_class, json=True, nestable=False):

        json : bool or str `json` or `jsonobj`
            if `False` the store will not create a json variable for
            serialization if `True` the store will use the json pickling to
            store objects and a single storable object will be serialized and
            not referenced. If a string is given the string is taken as the
            variable type of the json variable. Here only two values are
            allowed: `jsonobj` (equivalent to `True`) or `json` which will
            also reference directly given storable objects.

        nestable : bool
            if `True` this marks the content_class to be saved as nested dict
            objects and not a pointing to saved objects. So the saved complex
            object is only stored once and not split into several objects that
            are referenced by each other in a tree-like fashion

        Usually you want caching, but limited. Recommended is to use an LRUCache
        with a reasonable maximum number of objects that depends on the typical
        number of objects to cache and their size

        The class that takes care of storing data in a file is called a
        `Storage`, so the netCDF+ subclassed `Storage` is a storage.
        The classes that know how to load and save an object from the storage
        are called `Store`, like ObjectStore, SampleStore, etc...

        The difference between `json` and `jsonobj` is subtle. Consider
        storing a complex object. Then there are two ways to do that.
        1. `json`: Store a reference to the object (provided) it is stored and
        2. `jsonobj`: serialize the object and only use references for contained
        objects. All inner objects will always be stored using references.
        The only exception is using nestable. Consider objects that contain
        references to objects of the same type, like e.g. operations in an
        equation (2*3 + 3). Each operation represents a value but each
        operation needs values to operate on. To save such an object you have
        again two options:
        1. `nestable=False`. Store all single objects and always reference
        the contained objects. For an equation that would mean to store several
        objects `op1 = plus(op2, 3), op2 = times(2, 3)`. Since this is correct
        though not intuitive you can also use
        2. `nestable=True`. Store all the serialized objects nested into one
        object (string). For our example this corresponds to
        `plus(times(2,3), 3)`.


        super(ObjectStore, self).__init__()
        self._storage = None
        self.content_class = content_class
        self.prefix = None
        self.cache = NoCache()
        self._free = set()
        self._cached_all = False
        self.nestable = nestable
        self._created = False

        # This will not be stored since its information is contained in the
        # dimension names
        self._dimension_prefix_store = None

        self.variables = dict()
        self.vars = dict()
        self.units = dict()

        self.index = None

        self.proxy_index = WeakValueDictionary()

        if json in [True, False, 'json', 'jsonobj']:
            self.json = json
            raise ValueError(
                'Valid settings for json are only True, False, `json` or '

        if self.content_class is not None \
                and not issubclass(self.content_class, StorableObject):
            raise ValueError(
                'Content class "%s" must be subclassed from StorableObject.' %

        self.fallback_store = None

    def is_created(self):
        return self._created

    def to_dict(self):
        return {
            'content_class': self.content_class,
            'json': self.json,
            'nestable': self.nestable

    def register_fallback(self, store):
        self.fallback_store = store

    def register(self, storage, prefix):
        Associate the object store to a specific storage with a given prefix

        storage : :class:`openpathsampling.netcdfplus.NetCDFPlus`
            the storage to be associated with
        prefix : str
            the name under which

        self._storage = storage
        self.prefix = prefix

        self.variables = self.prefix_delegate(self.storage.variables)
        self.units = self.prefix_delegate(self.storage.units)
        self.vars = self.prefix_delegate(self.storage.vars)

        self.index = self.create_uuid_index()

    def create_uuid_index(self):
        return HashedList()

    def restore(self):

    def load_indices(self):

    def storage(self):
        """Return the associated storage object


            the referenced storage object

        if self._storage is None:
            raise RuntimeError(
                'A storage needs to be added to this store to be used! '
                'Use .register() to do so.')

        return self._storage

    def __str__(self):
        return repr(self)

    def __repr__(self):
        return 'store.%s[%s] : %s' % (
            self.prefix, self.content_class.__name__
            if self.content_class is not None else 'None/ANY', str(len(self)) +
            ' object(s)' if self._created else '(not created)')

    def simplifier(self):
        Return the simplifier instance used to create JSON serialization

            the simplifier object used in the associated storage

        return self.storage.simplifier

    def set_caching(self, caching):
        Set the caching mode for this store

        caching : :class:`openpathsampling.netcdfplus.Cache`

        if caching is None:
            caching = self.default_cache

        if caching is True:
            caching = MaxCache()
        elif caching is False:
            caching = NoCache()
        elif type(caching) is int:
            caching = WeakLRUCache(caching)

        if isinstance(caching, Cache):
            self.cache = caching.transfer(self.cache)

    def idx(self, obj):
        Return the index in this store for a given object

        obj : :class:`openpathsampling.netcdfplus.base.StorableObject`
            the object that can be stored in this store for which its index is
            to be returned

        int or `None`
            The integer index of the given object or `None` if it is not
            stored yet
        return self.index[obj.__uuid__]

    def __iter__(self):
        Add iteration over all elements in the storage
        # we want to iterator in the order object were saved!
        for uuid in self.index._list:
            yield self.load(uuid)

    def __len__(self):
        Return the number of stored objects

            number of stored objects

        return len(self.storage.dimensions[self.prefix])

    def write(self, variable, idx, obj, attribute=None):
        if attribute is None:
            attribute = variable

        var = self.vars[variable]
        val = getattr(obj, attribute)

        var[int(idx)] = val

        if var.var_type.startswith('lazy'):
            proxy = var.store.proxy(val)
            if isinstance(obj, LoaderProxy):
                # for a loader proxy apply it to the real object
                setattr(obj.__subject__, attribute, proxy)
                setattr(obj, attribute, proxy)

    def proxy(self, item):
        Return a proxy of a object for this store

        item : :py:class:`openpathsampling.netcdfplus.base.StorableObject`
            or int The item or index that points to an object in this store
            and to which a proxy is requested.


        if item is None:
            return None

        tt = type(item)
        if tt is int:
            idx = self.vars['uuid'][item]
        elif tt is long:
            idx = item
        elif tt in [str, unicode]:
            if item[0] == '-':
                return None
            idx = int(UUID(item))
            idx = item.__uuid__

        return LoaderProxy(self, idx)

    def __contains__(self, item):
        if item.__uuid__ in self.index:
            return True

        if self.fallback_store is not None and item in self.fallback_store:
            return True

        if self.storage.fallback is not None and item in self.storage.fallback:
            return True

        return False

    def __getitem__(self, item):
        Enable numpy style selection of object in the store
            if type(item) is int:
                if item < 0:
                    item += len(self)
                return self.load(item)
            elif type(item) is str or type(item) is long:
                return self.load(item)
            elif type(item) is slice:
                return [
                    self.load(idx) for idx in range(*item.indices(len(self)))
            elif type(item) is list:
                return [self.load(idx) for idx in item]
            elif item is Ellipsis:
                return iter(self)
        except KeyError:
            return None

    def get(self, item):
            return self[item]
        except KeyError:
            return None

    def _load(self, idx):
        obj = self.vars['json'][idx]
        return obj

    def clear_cache(self):
        """Clear the cache and force reloading


        self._cached_all = False

    def cache_all(self):
        """Load all samples as fast as possible into the cache

        if not self._cached_all:
            idxs = range(len(self))
            jsons = self.variables['json'][:]

            [self.add_single_to_cache(i, j) for i, j in zip(idxs, jsons)]

            self._cached_all = True

    def _save(self, obj, idx):
        self.vars['json'][idx] = obj

    def last(self):
        Returns the last generated trajectory. Useful to continue a run.

            the last stored object in this store
        return self.load(len(self) - 1)

    def first(self):
        Returns the first stored object.

            the actual first stored object
        return self.load(0)

    def free(self):
        Return the number of the next free index for this store

        index : int
            the number of the next free index in the storage.
            Used to store a new object.

        # start at first free position in the storage
        idx = len(self)

        # and skip also reserved potential stored ones
        while idx in self._free:
            idx += 1

        return idx

    def reserve_idx(self, idx):
        Locks an idx as used

        idx : int
            the integer index to be reserved

    def release_idx(self, idx):
        Releases a lock on an idx

        idx : int
            the integer index to be released

    def initialize(self):
        Initialize the associated storage to allow for object storage. Mainly
        creates an index dimension with the name of the object.
        # define dimensions used for the specific object

        self.storage.create_dimension(self.prefix, 0)

        if self.json:
            jsontype = 'jsonobj'
            if type(self.json) is str:
                jsontype = self.json

                description='A json serialized version of the object',

        # TODO: Change to 16byte string
                             description='The uuid of the object',

        self._created = True

    # ==========================================================================
    # ==========================================================================

    def create_variable(self,
        Create a new variable in the netCDF storage. This is just a helper
        function to structure the code better.

        var_name : str
            The var_name of the variable to be created
        var_type : str
            The string representing the type of the data stored in the
            variable.  Allowed are strings of native python types in which
            case the variables will be treated as python or a string of the
            form 'numpy.type' which will refer to the numpy data types.
            Numpy is preferred sinec the api to netCDF uses numpy and thus
            it is faster. Possible input strings are
            `int`, `float`, `long`, `str`, `numpy.float32`, `numpy.float64`,
            `numpy.int8`, `numpy.int16`, `numpy.int32`, `numpy.int64`, `json`,
            `obj.<store>`, `lazyobj.<store>`
        dimensions : str or tuple of str
            A tuple representing the dimensions used for the netcdf variable.
            If not specified then the default dimension of the storage is used.
            If the last dimension is `'...'` then it is assumed that the
            objects are of variable length. In netCDF this is usually
            referred to as a VLType.  We will treat is just as another
            dimension, but it can only be the last dimension.
        description : str
            A string describing the variable in a readable form.
        chunksizes : tuple of int
            A tuple of ints per number of dimensions. This specifies in what
            block sizes a variable is stored. Usually for object related stuff
            we want to store everything of one object at once so this is often
            (1, ..., ...)
        simtk_unit : str
            A string representing the units used for this variable. Can be
            used with all var_types although it makes sense only for numeric
        maskable : bool, default: False
            If set to `True` the values in this variable can only partially
            exist and if they have not yet been written they are filled with
            a fill_value which is treated as a non-set variable. The created
            variable will interpret this values as `None` when returned

        # add the main dimension to the var_type

        if type(dimensions) is str:
            dimensions = [dimensions]

        if type(dimensions) is int:
            if dimensions == 1:
                dimensions = ['scalar']
                dimensions = [dimensions]

        if dimensions is None:
            dimensions = (self.prefix, )
            dimensions = tuple([self.prefix] + list(dimensions))

        store_chunk_size = ObjectStore.default_store_chunk_size

        if chunksizes is None and len(dimensions) == 1:
            chunksizes = (store_chunk_size, )
        elif chunksizes is not None and dimensions[-1] == '...' \
                and len(dimensions) == len(chunksizes) + 2:
            chunksizes = tuple([store_chunk_size] + list(chunksizes))
        elif chunksizes is not None and dimensions[-1] != '...' \
                and len(dimensions) == len(chunksizes) + 1:
            chunksizes = tuple([store_chunk_size] + list(chunksizes))

        if self.dimension_prefix:
            dimensions = tuple([dimensions[0]] + [
                self.dimension_prefix +
                dim if type(dim) is str and dim != '...' else dim
                for dim in dimensions[1:]
            chunksizes = tuple([chunksizes[0]] + [
                self.dimension_prefix + chs if type(chs) is str else chs
                for chs in chunksizes[1:]

        self.storage.create_variable(self.prefix + '_' + var_name,

    def dimension_prefix(self):
        if self._dimension_prefix_store is not None:
            return self._dimension_prefix_store.prefix
            return ''

    def set_dimension_prefix_store(self, prefix_store=None):
        Select which store or none should be used to prefix dimension names

        If you want to create multiple instances of a store and these should
        have differently long dimensions you need unique names for these. This
        way you can select a store and the dimensions will be prefixed with the
        stores prefix

        prefix_store : :obj:`openpathsampling.netcdf.ObjectStore`
            the store from which to use its prefix / name to prefix
            dimension names

        self._dimension_prefix_store = prefix_store

    # ==========================================================================
    # ==========================================================================

    def load(self, idx):
        Returns an object from the storage.

        idx : int
            the integer index of the object to be loaded

            the loaded object

        if type(idx) is long:
            if idx in self.index:
                n_idx = self.index[idx]
                if self.fallback_store is not None:
                    return self.fallback_store.load(idx)
                elif self.storage.fallback is not None:
                    return self.storage.fallback.stores[self.name].load(idx)
                    raise ValueError(
                        'str %s not found in storage or fallback' % idx)

        elif type(idx) is not int:
            raise ValueError(
                ('indices of type "%s" are not allowed in named storage '
                 '(only str and int)') % type(idx).__name__)
            n_idx = int(idx)

        if n_idx < 0:
            return None

        # if it is in the cache, return it
            obj = self.cache[n_idx]
            if self._log_debug:
                logger.debug('Found IDX #' + str(idx) +
                             ' in cache. Not loading!')
            return obj

        except KeyError:

        if self._log_debug:
            logger.debug('Calling load object of type `%s` @ IDX #%d' %
                         (self.content_class.__name__, n_idx))

        if n_idx >= len(self):
                'Trying to load from IDX #%d > number of object %d' %
                (n_idx, len(self)))
            return None
        elif n_idx < 0:
            logger.warning(('Trying to load negative IDX #%d < 0. '
                            'This should never happen!!!') % n_idx)
            raise RuntimeError(
                'Loading of negative int should result in no object. '
                'This should never happen!')
            obj = self._load(n_idx)

        if self._log_debug:
                'Calling load object of type %s and IDX # %d ... DONE' %
                (self.content_class.__name__, n_idx))

        if obj is not None:
            self._get_id(n_idx, obj)

            # update cache there might have been a change due to naming
            self.cache[n_idx] = obj

            if self._log_debug:
                    'Try loading UUID object of type %s and IDX # %d ... DONE'
                    % (self.content_class.__name__, n_idx))

        if self._log_debug:
                'Finished load object of type %s and IDX # %d ... DONE' %
                (self.content_class.__name__, n_idx))

        return obj

    def reference(obj):
        return obj.__uuid__

    def remember(self, obj):
        Tell a store that an obj should be assumed as stored

        This is useful, if you do not want to store an object in a specific
        store. Especially to make sure snapshots are not stored multiple times

        obj : :py:class:`openpathsampling.netcdfplus.base.StorableObject`
            the object to be fake stored


    def forget(self, obj):
        This will revert remembering non-stored objects.

        Stored objects cannot be forgotten

        obj : :py:class:`openpathsampling.netcdfplus.base.StorableObject`
            the object to be forgotten



    def save(self, obj, idx=None):
        Saves an object to the storage.

        obj : :class:`openpathsampling.netcdfplus.base.StorableObject`
            the object to be stored
        idx : int or string or `None`
            the index to be used for storing. This is highly discouraged since
            it changes an immutable object (at least in the storage). It is
            better to store also the new object and just ignore the
            previously stored one.

        uuid = obj.__uuid__

        if uuid in self.index:
            # has been saved so quit and do nothing
            if not self.index[uuid] == -1:
                return self.reference(obj)

            # numbers other than -1 are reserved for other things

        if isinstance(obj, LoaderProxy):
            if obj._store is self:
                # is a proxy of a saved object so do nothing
                return uuid
                # it is stored but not in this store so we try storing the
                # full snapshot which might be still in cache or memory
                # if that is not the case it will be stored again. This can
                # happen when you load from one store save to another. And load
                # again after some time while the cache has been changed and try
                # to save again the loaded object. We will not explicitly store
                # a table that matches objects between different storages.
                return self.save(obj.__subject__)

        if self.fallback_store is not None and \
            if obj in self.fallback_store:
                return self.reference(obj)

        elif self.storage.fallback is not None and \
            if obj in self.storage.fallback:
                return self.reference(obj)

        if not isinstance(obj, self.content_class):
            raise ValueError(
                ('This store can only store object of base type "%s". Given '
                 'obj is of type "%s". You might need to use another store.') %
                (self.content_class, obj.__class__.__name__))

        # n_idx = self.free()
        n_idx = len(self.index)

        # mark as saved so circular dependencies will not cause infinite loops

        # make sure in nested saving that an IDX is not used twice!
        # self.reserve_idx(n_idx)

        logger.debug('Saving ' + str(type(obj)) + ' using IDX #' + str(n_idx))

            self._save(obj, n_idx)

            # store the name in the cache
            # if hasattr(self, 'cache'):
            self.cache[n_idx] = obj

            # in case we did not succeed remove the mark as being saved
            del self.index[uuid]
            # self.release_idx(n_idx)

        # self.release_idx(n_idx)
        self._set_id(n_idx, obj)

        return self.reference(obj)

    def __setitem__(self, key, value):
        Enable saving using __setitem__

        This only supports writing `store[...] = value`. Not sure if this is
        ever used.

        if key is Ellipsis:
            key = None

        self.save(value, key)

    def load_single(self, idx):
        return self._load(idx)

    def load_range(self, start, end):
        return map(self._load, range(start, end))

    def add_single_to_cache(self, idx, json):
        Add a single object to cache by json

        idx : int
            the index where the object was stored
        json : str
            json string the represents a serialized version of the stored object

        if idx not in self.cache:
            obj = self.simplifier.from_json(json)

            self._get_id(idx, obj)

            self.cache[idx] = obj
            self.index[obj.__uuid__] = idx

            return obj

    def uuid(self, uuid):
        Return last object with a given uuid

        uuid : str
            the uuid to be searched for

            the last object with a given uuid. This is to mimic an immutable
            object. Once you (re-)save with the same uuid you replace the old
            one and hence you leed to load the last stored one.

        return self.load(uuid)

    def _set_id(self, idx, obj):
        self.vars['uuid'][idx] = obj.__uuid__

    def _get_id(self, idx, obj):
        obj.__uuid__ = self.index.index(int(idx))
