Exemple #1
0
    def __init__(self, name, content_class):
        """

        Parameters
        ----------
        name : str
            the name of the store
        content_class : class
            the base class of the content, must be subclassed from
            `StorableMixin`

        """

        super(ObjectStore, self).__init__()
        self._storage = None
        self.content_class = content_class
        self.cache = NoCache()
        self._free = set()
        self._cached_all = False
        self._created = False
        self._document = None

        self.name = name

        self.attribute_list = {}
        self.cv = {}

        # This will not be stored since its information is contained in the
        # dimension names
        self._dimension_name_store = None

        self.variables = dict()
        self.units = dict()

        self.index = None

        self.proxy_index = WeakValueDictionary()

        if self.content_class is not None \
                and not issubclass(self.content_class, StorableMixin):
            raise ValueError(
                'Content class "%s" must be subclassed from StorableMixin.' %
                self.content_class.__name__)
Exemple #2
0
    def set_caching(self, caching):
        if caching is None:
            caching = self.default_cache

        if caching is True:
            caching = MaxCache()
        elif caching is False:
            caching = NoCache()
        elif type(caching) is int:
            caching = WeakLRUCache(caching)

        if isinstance(caching, Cache):
            self.cache = caching.transfer(self.cache)
Exemple #3
0
    def __init__(
        self,
        auth_token,
        url="http://content-api.p2p.tribuneinteractive.com",
        debug=False,
        cache=NoCache(),
        image_services_url=None,
        product_affiliate_code='lanews',
        source_code='latimes',
        webapp_name='tRibbit',
        state_filter='working,live,pending,copyready',
        preserve_embedded_tags=True
    ):
        self.config = {
            'P2P_API_ROOT': url,
            'P2P_API_KEY': auth_token,
            'IMAGE_SERVICES_URL': image_services_url,
        }
        self.cache = cache
        self.debug = debug
        self.product_affiliate_code = product_affiliate_code
        self.source_code = source_code
        self.webapp_name = webapp_name
        self.state_filter = state_filter
        self.preserve_embedded_tags = preserve_embedded_tags

        self.default_filter = {
            'product_affiliate': self.product_affiliate_code,
            'state': self.state_filter
        }

        self.default_content_item_query = {
            'include': [
                'web_url',
                'section',
                'related_items',
                'content_topics',
                'embedded_items'
            ],
            'filter': self.default_filter
        }

        self.content_item_defaults = {
            "content_item_type_code": "blurb",
            "product_affiliate_code": self.product_affiliate_code,
            "source_code": self.source_code,
            "content_item_state_code": "live",
        }

        self.s = requests.Session()
        self.s.mount('https://', TribAdapter())
Exemple #4
0
    def __init__(self,
                 url,
                 auth_token,
                 debug=False,
                 cache=NoCache(),
                 image_services_url=None,
                 product_affiliate_code='chinews',
                 source_code='chicagotribune',
                 webapp_name='tRibbit'):
        self.config = {
            'P2P_API_ROOT': url,
            'P2P_API_KEY': auth_token,
            'IMAGE_SERVICES_URL': image_services_url,
        }
        self.cache = cache
        self.debug = debug
        self.product_affiliate_code = product_affiliate_code
        self.source_code = source_code
        self.webapp_name = webapp_name

        self.default_filter = {
            'product_affiliate': self.product_affiliate_code,
            'state': 'live'
        }

        self.default_content_item_query = {
            'include': [
                'web_url', 'section', 'related_items', 'content_topics',
                'embedded_items'
            ],
            'filter':
            self.default_filter
        }

        self.content_item_defaults = {
            "content_item_type_code": "blurb",
            "product_affiliate_code": self.product_affiliate_code,
            "source_code": self.source_code,
            "content_item_state_code": "live",
        }

        self.s = requests.Session()
        self.s.mount('https://', TribAdapter())
Exemple #5
0
    def set_caching(self, caching):
        """
        Set the caching mode for this store

        Parameters
        ----------
        caching : :class:`mongodb.Cache`

        """
        if caching is None:
            caching = self.default_cache

        if caching is True:
            caching = MaxCache()
        elif caching is False:
            caching = NoCache()
        elif type(caching) is int:
            caching = WeakLRUCache(caching)

        if isinstance(caching, Cache):
            self.cache = caching.transfer(self.cache)
Exemple #6
0
    def __init__(self,
                 content_class,
                 json=True,
                 caching=None,
                 nestable=False,
                 has_name=False):
        """

        Parameters
        ----------
        storage
        content_class
        json
        dimension_units
        caching : dict-like or bool or int or None
            this is the dict used for caching.
            `True` means to use a python built-in dict which unlimited caching.
            Be careful.
            `False` means no caching at all. If a dict-like object is passed,
            it will be used.
            An integer `n` means to use LRU Caching with maximal n elements and is
            equal to `cache=LRUCache(n)`
            Default (None) is equivalent to `cache=ObjectStore.default_cache`
        nestable : bool
            if true this marks the content_class to be saved as nested dict
            objects and not a pointing to saved objects. So the saved complex
            object is only stored once and not split into several objects that
            are referenced by each other in a tree-like fashion

        Notes
        -----
        Usually you want caching, but limited. Recommended is to use an LRUCache
        with a reasonable number that depends on the typical number of objects to
        cache and their size

        Attributes
        ----------

        storage : Storage
            the reference the Storage object where all data is stored
        content_class : class
            a reference to the class type to be stored using this Storage
        has_name : bool
            if `True` objects can also be loaded by a string name
        json : string
            if already computed a JSON Serialized string of the object
        simplifier : util.StorableObjectJSON
            an instance of a JSON Serializer
        identifier : str
            name of the netCDF variable that contains the string to be
            identified by. So far this is `name`
        cache : dict-like (int or str : object)
            a dictionary that holds references to all stored elements by index
            or string for named objects. This is only used for cached access
            if caching is not `False`

        Notes
        -----
        The class that takes care of storing data in a file is called a Storage,
        so the netCDF subclassed Storage is a storage. The classes that know how
        to load and save an object from the storage are called stores,
        like ObjectStore, SampleStore, etc...

        """

        self._storage = None
        self.content_class = content_class
        self.prefix = None
        self.cache = NoCache()
        self.has_name = has_name
        self.json = json
        self._free = set()
        self._cached_all = False
        self._names_loaded = False
        self.nestable = nestable
        self.name_idx = dict()

        self.variables = dict()
        self.vars = dict()
        self.units = dict()

        self.index = weakref.WeakKeyDictionary()
Exemple #7
0
class ObjectStore(object):
    """
    Base Class for storing complex objects in a netCDF4 file. It holds a
    reference to the store file.
    """

    allowed_types = [
        'int', 'float', 'long', 'str', 'bool'
        'numpy.float32', 'numpy.float64', 'numpy.int8', 'numpy.inf16',
        'numpy.int32', 'numpy.int64', 'numpy.uint8', 'numpy.uinf16',
        'numpy.uint32', 'numpy.uint64', 'index', 'length'
    ]

    class DictDelegator(object):
        def __init__(self, store, dct):
            self.prefix = store.prefix + '_'
            self.dct = dct

        def __getitem__(self, item):
            return self.dct[self.prefix + item]

    def prefix_delegate(self, dct):
        return ObjectStore.DictDelegator(self, dct)

    default_cache = 10000

    def __init__(self,
                 content_class,
                 json=True,
                 caching=None,
                 nestable=False,
                 has_name=False):
        """

        Parameters
        ----------
        storage
        content_class
        json
        dimension_units
        caching : dict-like or bool or int or None
            this is the dict used for caching.
            `True` means to use a python built-in dict which unlimited caching.
            Be careful.
            `False` means no caching at all. If a dict-like object is passed,
            it will be used.
            An integer `n` means to use LRU Caching with maximal n elements and is
            equal to `cache=LRUCache(n)`
            Default (None) is equivalent to `cache=ObjectStore.default_cache`
        nestable : bool
            if true this marks the content_class to be saved as nested dict
            objects and not a pointing to saved objects. So the saved complex
            object is only stored once and not split into several objects that
            are referenced by each other in a tree-like fashion

        Notes
        -----
        Usually you want caching, but limited. Recommended is to use an LRUCache
        with a reasonable number that depends on the typical number of objects to
        cache and their size

        Attributes
        ----------

        storage : Storage
            the reference the Storage object where all data is stored
        content_class : class
            a reference to the class type to be stored using this Storage
        has_name : bool
            if `True` objects can also be loaded by a string name
        json : string
            if already computed a JSON Serialized string of the object
        simplifier : util.StorableObjectJSON
            an instance of a JSON Serializer
        identifier : str
            name of the netCDF variable that contains the string to be
            identified by. So far this is `name`
        cache : dict-like (int or str : object)
            a dictionary that holds references to all stored elements by index
            or string for named objects. This is only used for cached access
            if caching is not `False`

        Notes
        -----
        The class that takes care of storing data in a file is called a Storage,
        so the netCDF subclassed Storage is a storage. The classes that know how
        to load and save an object from the storage are called stores,
        like ObjectStore, SampleStore, etc...

        """

        self._storage = None
        self.content_class = content_class
        self.prefix = None
        self.cache = NoCache()
        self.has_name = has_name
        self.json = json
        self._free = set()
        self._cached_all = False
        self._names_loaded = False
        self.nestable = nestable
        self.name_idx = dict()

        self.variables = dict()
        self.vars = dict()
        self.units = dict()

        self.index = weakref.WeakKeyDictionary()

    def register(self, storage, name):
        self._storage = storage
        self.prefix = name

        self.variables = self.prefix_delegate(self.storage.variables)
        self.units = self.prefix_delegate(self.storage.units)
        self.vars = self.prefix_delegate(self.storage.vars)

    @property
    def storage(self):
        if self._storage is None:
            raise RuntimeError(
                'A store need to be added to a storage to be used!')

        return self._storage

    @property
    def dimension_units(self):
        return self.storage.dimension_units

    def __str__(self):
        return repr(self)

    def __repr__(self):
        return "store.%s[%s]" % (self.prefix, self.content_class.__name__)

    @property
    def simplifier(self):
        return self.storage.simplifier

    def set_caching(self, caching):
        if caching is None:
            caching = self.default_cache

        if caching is True:
            caching = MaxCache()
        elif caching is False:
            caching = NoCache()
        elif type(caching) is int:
            caching = WeakLRUCache(caching)

        if isinstance(caching, Cache):
            self.cache = caching.transfer(self.cache)

    def idx(self, obj):
        """
        Return the index in this store for a given object

        Parameters
        ----------
        obj : object
            the object that can be stored in this store for which its index is
            to be returned

        Returns
        -------
        int or None
            The integer index of the given object or None if it is not stored yet
        """
        return self.index.get(obj, None)

    def update_name_cache(self):
        """
        Update the internal cache with all stored names in the store.
        This allows to load by name for named objects
        """
        if self.has_name:
            if not self._names_loaded:
                for idx, name in enumerate(self.storage.variables[self.prefix +
                                                                  "_name"][:]):
                    self._update_name_in_cache(name, idx)

                self._names_loaded = True

    def _update_name_in_cache(self, name, idx):
        if name != '':
            if name not in self.cache:
                self.name_idx[name] = [idx]
            else:
                if idx not in self.cache[name]:
                    self.name_idx[name].append(idx)

    def find(self, name):
        """
        Return all objects with a given name

        Parameters
        ----------
        name : str
            the name to be searched for

        Returns
        -------
        list of objects
            a list of found objects, can be empty [] if no objects with
            that name exist

        """
        if self.has_name:
            if name not in self.name_idx:
                self.update_name_cache()

            return self[self.name_idx[name]]

        return []

    def find_indices(self, name):
        """
        Return indices for all objects with a given name

        Parameters
        ----------
        name : str
            the name to be searched for

        Returns
        -------
        list of int
            a list of indices in the storage for all found objects,
            can be empty [] if no objects with that name exist

        """
        if self.has_name:
            if name not in self.name_idx:
                self.update_name_cache()

            return self.name_idx[name]

        return []

    def find_first(self, name):
        """
        Return first object with a given name

        Parameters
        ----------
        name : str
            the name to be searched for

        Returns
        -------
        object of None
            the first found object, can be None if no object with the given
            name exists

        """
        if self.has_name:
            if name not in self.name_idx:
                self.update_name_cache()

            if len(self.name_idx[name]) > 0:
                return self[self.name_idx[name][0]]

        return None

    def __iter__(self):
        """
        Add iteration over all elements in the storage
        """
        return self.iterator()

    def __len__(self):
        """
        Return the number of stored objects

        Returns
        -------
        int
            number of stored objects

        Notes
        -----
        Equal to `store.count()`
        """
        return len(self.storage.dimensions[self.prefix])

    def iterator(this, iter_range=None):
        """
        Return an iterator over all objects in the storage

        Parameters
        ----------
        iter_range : slice or None
            if this is not `None` it confines the iterator to objects specified
            in the slice

        Returns
        -------
        Iterator()
            The iterator that iterates the objects in the store

        """
        class ObjectIterator:
            def __init__(self):
                self.storage = this
                self.iter_range = iter_range
                if iter_range is None:
                    self.idx = 0
                    self.end = len(self.storage)
                else:
                    self.idx = iter_range.start
                    self.end = iter_range.stop

            def __iter__(self):
                return self

            def next(self):
                if self.idx < self.end:
                    obj = self.storage.load(self.idx)
                    if self.iter_range is not None and self.iter_range.step is not None:
                        self.idx += self.iter_range.step
                    else:
                        self.idx += 1
                    return obj
                else:
                    raise StopIteration()

        return ObjectIterator()

    def write(self, variable, idx, obj, attribute=None):
        if attribute is None:
            attribute = variable

        var = self.vars[variable]
        val = getattr(obj, attribute)

        var[int(idx)] = val

        if var.var_type.startswith('lazy'):
            proxy = var.store.proxy(val)
            setattr(obj, attribute, proxy)

    def proxy(self, item):
        if item is None:
            return None

        if type(item) is not int:
            idx = self.index.get(item, None)
        else:
            idx = item

        if idx is None:
            return item
        else:
            return LoaderProxy(self, idx)

    def __getitem__(self, item):
        """
        Enable numpy style selection of object in the store
        """
        try:
            if type(item) is int or type(item) is str:
                return self.load(item)
            elif type(item) is slice:
                return [
                    self.load(idx) for idx in range(*item.indices(len(self)))
                ]
            elif type(item) is list:
                return [self.load(idx) for idx in item]
            elif item is Ellipsis:
                return self.iterator()
        except KeyError:
            return None

    def _load(self, idx):
        obj = self.vars['json'][idx]
        return obj

    #        return self.load_json(self.prefix + '_json', idx)

    def clear_cache(self):
        """Clear the cache and force reloading

        """

        self.cache.clear()
        self._cached_all = False

    def cache_all(self):
        """Load all samples as fast as possible into the cache

        """
        if not self._cached_all:
            idxs = range(len(self))
            jsons = self.variables['json'][:]

            [self.add_single_to_cache(i, j) for i, j in zip(idxs, jsons)]

            self._cached_all = True

    def add_single_to_cache(self, idx, json):
        """
        Add a single object to cache by json
        """

        if idx not in self.cache:
            simplified = yaml.load(json)
            obj = self.simplifier.build(simplified)

            obj.json = json
            self.index[obj] = idx
            self.cache[idx] = obj

            if self.has_name:
                name = self.storage.variables[self.prefix + '_name'][idx]
                setattr(obj, '_name', name)
                if name != '':
                    self._update_name_in_cache(obj._name, idx)

    def _save(self, obj, idx):
        self.vars['json'][idx] = obj

    @property
    def last(self):
        """
        Returns the last generated trajectory. Useful to continue a run.

        Returns
        -------
        Trajectoy
            the actual trajectory object
        """
        return self.load(len(self) - 1)

    @property
    def first(self):
        """
        Returns the last stored object. Useful to continue a run.

        Returns
        -------
        Object
            the actual last stored object
        """
        return self.load(0)

    def free(self):
        """
        Return the number of the next free index

        Returns
        -------
        index : int
            the number of the next free index in the storage.
            Used to store a new object.
        """
        count = len(self)
        self._free = set([idx for idx in self._free if idx >= count])
        idx = count
        while idx in self._free:
            idx += 1

        return idx

    def reserve_idx(self, idx):
        """
        Locks an idx as used
        """
        self._free.add(idx)

    def _init(self):
        """
        Initialize the associated storage to allow for object storage. Mainly
        creates an index dimension with the name of the object.

        Parameters
        ----------
        units : dict of {str : simtk.unit.Unit} or None
            representing a dict of string representing a dimension
            ('length', 'velocity', 'energy') pointing to
            the simtk.unit.Unit to be used. If not None overrides the standard
            units used in the storage
        """
        # define dimensions used for the specific object
        self.storage.createDimension(self.prefix, 0)

        if self.has_name:
            self.init_variable("name",
                               'str',
                               description='A name',
                               chunksizes=tuple([10240]))

        if self.json:
            self.init_variable(
                "json",
                'json',
                description='A json serialized version of the object',
                chunksizes=tuple([10240]))

    def _restore(self):
        pass

    # ==============================================================================
    # INITIALISATION UTILITY FUNCTIONS
    # ==============================================================================

    def init_variable(self, name, var_type, dimensions=None, **kwargs):
        """
        Create a new variable in the netCDF storage. This is just a helper
        function to structure the code better.

        Parameters
        ==========
        name : str
            The name of the variable to be created
        var_type : str
            The string representing the type of the data stored in the variable.
            Allowed are strings of native python types in which case the variables
            will be treated as python or a string of the form 'numpy.type' which
            will refer to the numpy data types. Numpy is preferred sinec the api
            to netCDF uses numpy and thus it is faster. Possible input strings are
            `int`, `float`, `long`, `str`, `numpy.float32`, `numpy.float64`,
            `numpy.int8`, `numpy.int16`, `numpy.int32`, `numpy.int64`
        dimensions : str or tuple of str
            A tuple representing the dimensions used for the netcdf variable.
            If not specified then the default dimension of the storage is used.
        units : str
            A string representing the units used if the var_type is `float`
            the units is set to `none`
        description : str
            A string describing the variable in a readable form.
        variable_length : bool
            If true the variable is treated as a variable length (list) of the
            given type. A built-in example for this type is a string which is
            a variable length of char. This make using all the mixed
            stuff superfluous
        chunksizes : tuple of int
            A tuple of ints per number of dimensions. This specifies in what
            block sizes a variable is stored. Usually for object related stuff
            we want to store everything of one object at once so this is often
            (1, ..., ...)
        """

        # add the main dimension to the var_type

        if type(dimensions) is str:
            dimensions = [dimensions]

        if type(dimensions) is int:
            if dimensions == 1:
                dimensions = ['scalar']
            else:
                dimensions = [dimensions]

        if dimensions is None:
            dimensions = (self.prefix, )
        else:
            dimensions = tuple([self.prefix] + list(dimensions))

        self.storage.create_variable(self.prefix + '_' + name,
                                     var_type=var_type,
                                     dimensions=dimensions,
                                     **kwargs)

    # ==============================================================================
    # COLLECTIVE VARIABLE UTILITY FUNCTIONS
    # ==============================================================================

    @property
    def op_idx(self):
        """
        Returns a function that returns for an object of this storage the idx.
        This can be used to construct order parameters the return the index
        in this storage. Useful for visualization

        Returns
        -------
        function
            the function that reports the index (int) in this store or None if it is not stored
        """
        def idx(obj):
            return self.index.get(obj, None)

        return idx

    # =============================================================================
    # LOAD/SAVE DECORATORS FOR CACHE HANDLING
    # =============================================================================

    def load(self, idx):
        """
        Returns an object from the storage.

        Parameters
        ----------
        idx : int or str
            either the integer index of the object to be loaded or a string
            (name) for named objects. This will always return the first object
            found with the specified name.

        Returns
        -------
        object
            the loaded object
        """

        if type(idx) is not str and idx < 0:
            return None

        if not hasattr(self, 'cache'):
            return self._load(idx)

        n_idx = idx

        if type(idx) is str:
            # we want to load by name and it was not in cache.
            if self.has_name:
                if idx in self.name_idx:
                    if len(self.name_idx[idx]) > 1:
                        logger.debug(
                            'Found name "%s" multiple (%d) times in storage! Loading first!'
                            % (idx, len(self.cache[idx])))

                    n_idx = self.name_idx[idx][0]
                else:
                    # since it is not found in the cache before. Refresh the cache
                    self.update_name_cache()

                    # and give it another shot
                    if idx in self.name_idx:
                        if len(self.name_idx[idx]) > 1:
                            logger.debug(
                                'Found name "%s" multiple (%d) times in storage! Loading first!'
                                % (idx, len(self.cache[idx])))

                        n_idx = self.name_idx[idx][0]
                    else:
                        raise ValueError('str "' + idx +
                                         '" not found in storage')

        elif type(idx) is not int:
            raise ValueError(
                'indices of type "%s" are not allowed in named storage' %
                type(idx).__name__)

        # if it is in the cache, return it
        try:
            obj = self.cache[n_idx]
            logger.debug('Found IDX #' + str(idx) + ' in cache. Not loading!')
            return obj

        except KeyError:
            pass

        # turn into python int if it was a numpy int (in some rare cases!)
        n_idx = int(n_idx)

        logger.debug('Calling load object of type ' +
                     self.content_class.__name__ + ' and IDX #' + str(idx))

        if n_idx >= len(self):
            logger.warning('Trying to load from IDX #' + str(n_idx) +
                           ' > number of object ' + str(len(self)))
            return None
        elif n_idx < 0:
            logger.warning('Trying to load negative IDX #' + str(n_idx) +
                           ' < 0. This should never happen!!!')
            raise RuntimeError(
                'Loading of negative int should result in no object. This should never happen!'
            )
        else:
            obj = self._load(idx)

        self.index[obj] = n_idx

        if self.has_name and hasattr(obj, '_name'):
            setattr(obj, '_name',
                    self.storage.variables[self.prefix + '_name'][idx])
            # make sure that you cannot change the name of loaded objects
            obj.fix_name()

        if obj is not None:
            # update cache there might have been a change due to naming
            self.cache[n_idx] = obj

            # finally store the name of a named object in cache
            if self.has_name and obj._name != '':
                self._update_name_in_cache(obj._name, n_idx)

        return obj

    def save(self, obj, idx=None):
        """
        Saves an object to the storage.

        Parameters
        ----------
        obj : object
            the object to be stored
        idx : int or string or `None`
            the index to be used for storing. This is highly discouraged since
            it changes an immutable object (at least in the storage). It is
            better to store also the new object and just ignore the
            previously stored one.

        """

        if idx is None:
            if obj in self.index:
                # has been saved so quit and do nothing
                return self.index[obj]
            elif type(obj) is LoaderProxy:
                return obj._idx
            else:
                idx = self.free()
        elif type(idx) is str:
            # Not yet supported
            if self.has_name and obj._name_fixed is False:
                obj.name = idx
        else:
            #assume int like
            idx = int(idx)

        self.index[obj] = idx

        # make sure in nested saving that an IDX is not used twice!
        self.reserve_idx(idx)

        logger.debug('Saving ' + str(type(obj)) + ' using IDX #' + str(idx))
        self._save(obj, idx)

        if self.has_name and hasattr(obj, '_name'):
            # logger.debug('Object ' + str(type(obj)) + ' with IDX #' + str(idx))
            # logger.debug(repr(obj))
            # logger.debug("Cleaning up name; currently: " + str(obj._name))
            if obj._name is None:
                # this should not happen!
                logger.debug(
                    "Nameable object has not been initialized correctly. Has None in _name"
                )
                raise AttributeError(
                    '_name needs to be a string for nameable objects.')

            obj.fix_name()

            self.storage.variables[self.prefix + '_name'][idx] = obj._name

        # store the name in the cache
        if hasattr(self, 'cache'):
            self.cache[idx] = obj
            if self.has_name and obj._name != '':
                # and also the name, if it has one so we can load by
                # name afterwards from cache
                self._update_name_in_cache(obj._name, idx)

        return idx
    def __init__(self, content_class, json=True,
                 caching=None, nestable=False, has_name=False):

        """

        Parameters
        ----------
        storage
        content_class
        json
        dimension_units
        caching : dict-like or bool or int or None
            this is the dict used for caching.
            `True` means to use a python built-in dict which unlimited caching.
            Be careful.
            `False` means no caching at all. If a dict-like object is passed,
            it will be used.
            An integer `n` means to use LRU Caching with maximal n elements and is
            equal to `cache=LRUCache(n)`
            Default (None) is equivalent to `cache=ObjectStore.default_cache`
        nestable : bool
            if true this marks the content_class to be saved as nested dict
            objects and not a pointing to saved objects. So the saved complex
            object is only stored once and not split into several objects that
            are referenced by each other in a tree-like fashion

        Notes
        -----
        Usually you want caching, but limited. Recommended is to use an LRUCache
        with a reasonable number that depends on the typical number of objects to
        cache and their size

        Attributes
        ----------

        storage : Storage
            the reference the Storage object where all data is stored
        content_class : class
            a reference to the class type to be stored using this Storage
        has_name : bool
            if `True` objects can also be loaded by a string name
        json : string
            if already computed a JSON Serialized string of the object
        simplifier : util.StorableObjectJSON
            an instance of a JSON Serializer
        identifier : str
            name of the netCDF variable that contains the string to be
            identified by. So far this is `name`
        cache : dict-like (int or str : object)
            a dictionary that holds references to all stored elements by index
            or string for named objects. This is only used for cached access
            if caching is not `False`

        Notes
        -----
        The class that takes care of storing data in a file is called a Storage,
        so the netCDF subclassed Storage is a storage. The classes that know how
        to load and save an object from the storage are called stores,
        like ObjectStore, SampleStore, etc...

        """

        self._storage = None
        self.content_class = content_class
        self.prefix = None
        self.cache = NoCache()
        self.has_name = has_name
        self.json = json
        self._free = set()
        self._cached_all = False
        self._names_loaded = False
        self.nestable = nestable
        self.name_idx = dict()

        self.variables = dict()
        self.vars = dict()
        self.units = dict()

        self.index = weakref.WeakKeyDictionary()
class ObjectStore(object):
    """
    Base Class for storing complex objects in a netCDF4 file. It holds a
    reference to the store file.
    """

    allowed_types = [
        'int', 'float', 'long', 'str', 'bool'
                                       'numpy.float32', 'numpy.float64',
        'numpy.int8', 'numpy.inf16', 'numpy.int32', 'numpy.int64',
        'numpy.uint8', 'numpy.uinf16', 'numpy.uint32', 'numpy.uint64',
        'index', 'length'
    ]

    class DictDelegator(object):
        def __init__(self, store, dct):
            self.prefix = store.prefix + '_'
            self.dct = dct

        def __getitem__(self, item):
            return self.dct[self.prefix + item]

    def prefix_delegate(self, dct):
        return ObjectStore.DictDelegator(self, dct)

    default_cache = 10000

    def __init__(self, content_class, json=True,
                 caching=None, nestable=False, has_name=False):

        """

        Parameters
        ----------
        storage
        content_class
        json
        dimension_units
        caching : dict-like or bool or int or None
            this is the dict used for caching.
            `True` means to use a python built-in dict which unlimited caching.
            Be careful.
            `False` means no caching at all. If a dict-like object is passed,
            it will be used.
            An integer `n` means to use LRU Caching with maximal n elements and is
            equal to `cache=LRUCache(n)`
            Default (None) is equivalent to `cache=ObjectStore.default_cache`
        nestable : bool
            if true this marks the content_class to be saved as nested dict
            objects and not a pointing to saved objects. So the saved complex
            object is only stored once and not split into several objects that
            are referenced by each other in a tree-like fashion

        Notes
        -----
        Usually you want caching, but limited. Recommended is to use an LRUCache
        with a reasonable number that depends on the typical number of objects to
        cache and their size

        Attributes
        ----------

        storage : Storage
            the reference the Storage object where all data is stored
        content_class : class
            a reference to the class type to be stored using this Storage
        has_name : bool
            if `True` objects can also be loaded by a string name
        json : string
            if already computed a JSON Serialized string of the object
        simplifier : util.StorableObjectJSON
            an instance of a JSON Serializer
        identifier : str
            name of the netCDF variable that contains the string to be
            identified by. So far this is `name`
        cache : dict-like (int or str : object)
            a dictionary that holds references to all stored elements by index
            or string for named objects. This is only used for cached access
            if caching is not `False`

        Notes
        -----
        The class that takes care of storing data in a file is called a Storage,
        so the netCDF subclassed Storage is a storage. The classes that know how
        to load and save an object from the storage are called stores,
        like ObjectStore, SampleStore, etc...

        """

        self._storage = None
        self.content_class = content_class
        self.prefix = None
        self.cache = NoCache()
        self.has_name = has_name
        self.json = json
        self._free = set()
        self._cached_all = False
        self._names_loaded = False
        self.nestable = nestable
        self.name_idx = dict()

        self.variables = dict()
        self.vars = dict()
        self.units = dict()

        self.index = weakref.WeakKeyDictionary()

    def register(self, storage, name):
        self._storage = storage
        self.prefix = name

        self.variables = self.prefix_delegate(self.storage.variables)
        self.units = self.prefix_delegate(self.storage.units)
        self.vars = self.prefix_delegate(self.storage.vars)

    @property
    def storage(self):
        if self._storage is None:
            raise RuntimeError('A store need to be added to a storage to be used!')

        return self._storage

    @property
    def dimension_units(self):
        return self.storage.dimension_units

    def __str__(self):
        return repr(self)

    def __repr__(self):
        return "store.%s[%s]" % (
            self.prefix, self.content_class.__name__)

    @property
    def simplifier(self):
        return self.storage.simplifier

    def set_caching(self, caching):
        if caching is None:
            caching = self.default_cache

        if caching is True:
            caching = MaxCache()
        elif caching is False:
            caching = NoCache()
        elif type(caching) is int:
            caching = WeakLRUCache(caching)

        if isinstance(caching, Cache):
            self.cache = caching.transfer(self.cache)

    def idx(self, obj):
        """
        Return the index in this store for a given object

        Parameters
        ----------
        obj : object
            the object that can be stored in this store for which its index is
            to be returned

        Returns
        -------
        int or None
            The integer index of the given object or None if it is not stored yet
        """
        return self.index.get(obj, None)

    def update_name_cache(self):
        """
        Update the internal cache with all stored names in the store.
        This allows to load by name for named objects
        """
        if self.has_name:
            if not self._names_loaded:
                for idx, name in enumerate(self.storage.variables[self.prefix + "_name"][:]):
                    self._update_name_in_cache(name, idx)

                self._names_loaded = True

    def _update_name_in_cache(self, name, idx):
        if name != '':
            if name not in self.cache:
                self.name_idx[name] = [idx]
            else:
                if idx not in self.cache[name]:
                    self.name_idx[name].append(idx)

    def find(self, name):
        """
        Return all objects with a given name

        Parameters
        ----------
        name : str
            the name to be searched for

        Returns
        -------
        list of objects
            a list of found objects, can be empty [] if no objects with
            that name exist

        """
        if self.has_name:
            if name not in self.name_idx:
                self.update_name_cache()

            return self[self.name_idx[name]]

        return []

    def find_indices(self, name):
        """
        Return indices for all objects with a given name

        Parameters
        ----------
        name : str
            the name to be searched for

        Returns
        -------
        list of int
            a list of indices in the storage for all found objects,
            can be empty [] if no objects with that name exist

        """
        if self.has_name:
            if name not in self.name_idx:
                self.update_name_cache()

            return self.name_idx[name]

        return []

    def find_first(self, name):
        """
        Return first object with a given name

        Parameters
        ----------
        name : str
            the name to be searched for

        Returns
        -------
        object of None
            the first found object, can be None if no object with the given
            name exists

        """
        if self.has_name:
            if name not in self.name_idx:
                self.update_name_cache()

            if len(self.name_idx[name]) > 0:
                return self[self.name_idx[name][0]]

        return None

    def __iter__(self):
        """
        Add iteration over all elements in the storage
        """
        return self.iterator()

    def __len__(self):
        """
        Return the number of stored objects

        Returns
        -------
        int
            number of stored objects

        Notes
        -----
        Equal to `store.count()`
        """
        return len(self.storage.dimensions[self.prefix])

    def iterator(this, iter_range=None):
        """
        Return an iterator over all objects in the storage

        Parameters
        ----------
        iter_range : slice or None
            if this is not `None` it confines the iterator to objects specified
            in the slice

        Returns
        -------
        Iterator()
            The iterator that iterates the objects in the store

        """

        class ObjectIterator:
            def __init__(self):
                self.storage = this
                self.iter_range = iter_range
                if iter_range is None:
                    self.idx = 0
                    self.end = len(self.storage)
                else:
                    self.idx = iter_range.start
                    self.end = iter_range.stop

            def __iter__(self):
                return self

            def next(self):
                if self.idx < self.end:
                    obj = self.storage.load(self.idx)
                    if self.iter_range is not None and self.iter_range.step is not None:
                        self.idx += self.iter_range.step
                    else:
                        self.idx += 1
                    return obj
                else:
                    raise StopIteration()

        return ObjectIterator()

    def write(self, variable, idx, obj, attribute=None):
        if attribute is None:
            attribute = variable

        var = self.vars[variable]
        val = getattr(obj, attribute)

        var[int(idx)] = val

        if var.var_type.startswith('lazy'):
            proxy = var.store.proxy(val)
            setattr(obj, attribute, proxy)

    def proxy(self, item):
        if item is None:
            return None

        if type(item) is not int:
            idx = self.index.get(item, None)
        else:
            idx = item

        if idx is None:
            return item
        else:
            return LoaderProxy(self, idx)

    def __getitem__(self, item):
        """
        Enable numpy style selection of object in the store
        """
        try:
            if type(item) is int or type(item) is str:
                return self.load(item)
            elif type(item) is slice:
                return [self.load(idx) for idx in range(*item.indices(len(self)))]
            elif type(item) is list:
                return [self.load(idx) for idx in item]
            elif item is Ellipsis:
                return self.iterator()
        except KeyError:
            return None

    def _load(self, idx):
        obj = self.vars['json'][idx]
        return obj

    #        return self.load_json(self.prefix + '_json', idx)

    def clear_cache(self):
        """Clear the cache and force reloading

        """

        self.cache.clear()
        self._cached_all = False

    def cache_all(self):
        """Load all samples as fast as possible into the cache

        """
        if not self._cached_all:
            idxs = range(len(self))
            jsons = self.variables['json'][:]

            [self.add_single_to_cache(i, j) for i, j in zip(
                idxs,
                jsons)]

            self._cached_all = True

    def add_single_to_cache(self, idx, json):
        """
        Add a single object to cache by json
        """

        if idx not in self.cache:
            simplified = yaml.load(json)
            obj = self.simplifier.build(simplified)

            obj.json = json
            self.index[obj] = idx
            self.cache[idx] = obj

            if self.has_name:
                name = self.storage.variables[self.prefix + '_name'][idx]
                setattr(obj, '_name', name)
                if name != '':
                    self._update_name_in_cache(obj._name, idx)

    def _save(self, obj, idx):
        self.vars['json'][idx] = obj

    @property
    def last(self):
        """
        Returns the last generated trajectory. Useful to continue a run.

        Returns
        -------
        Trajectoy
            the actual trajectory object
        """
        return self.load(len(self) - 1)

    @property
    def first(self):
        """
        Returns the last stored object. Useful to continue a run.

        Returns
        -------
        Object
            the actual last stored object
        """
        return self.load(0)

    def free(self):
        """
        Return the number of the next free index

        Returns
        -------
        index : int
            the number of the next free index in the storage.
            Used to store a new object.
        """
        count = len(self)
        self._free = set([idx for idx in self._free if idx >= count])
        idx = count
        while idx in self._free:
            idx += 1

        return idx

    def reserve_idx(self, idx):
        """
        Locks an idx as used
        """
        self._free.add(idx)

    def _init(self):
        """
        Initialize the associated storage to allow for object storage. Mainly
        creates an index dimension with the name of the object.

        Parameters
        ----------
        units : dict of {str : simtk.unit.Unit} or None
            representing a dict of string representing a dimension
            ('length', 'velocity', 'energy') pointing to
            the simtk.unit.Unit to be used. If not None overrides the standard
            units used in the storage
        """
        # define dimensions used for the specific object
        self.storage.createDimension(self.prefix, 0)

        if self.has_name:
            self.init_variable("name", 'str',
                               description='A name',
                               chunksizes=tuple([10240]))

        if self.json:
            self.init_variable("json", 'json',
                               description='A json serialized version of the object',
                               chunksizes=tuple([10240]))

    def _restore(self):
        pass

    # ==============================================================================
    # INITIALISATION UTILITY FUNCTIONS
    # ==============================================================================

    def init_variable(self, name, var_type, dimensions=None, **kwargs):
        """
        Create a new variable in the netCDF storage. This is just a helper
        function to structure the code better.

        Parameters
        ==========
        name : str
            The name of the variable to be created
        var_type : str
            The string representing the type of the data stored in the variable.
            Allowed are strings of native python types in which case the variables
            will be treated as python or a string of the form 'numpy.type' which
            will refer to the numpy data types. Numpy is preferred sinec the api
            to netCDF uses numpy and thus it is faster. Possible input strings are
            `int`, `float`, `long`, `str`, `numpy.float32`, `numpy.float64`,
            `numpy.int8`, `numpy.int16`, `numpy.int32`, `numpy.int64`
        dimensions : str or tuple of str
            A tuple representing the dimensions used for the netcdf variable.
            If not specified then the default dimension of the storage is used.
        units : str
            A string representing the units used if the var_type is `float`
            the units is set to `none`
        description : str
            A string describing the variable in a readable form.
        variable_length : bool
            If true the variable is treated as a variable length (list) of the
            given type. A built-in example for this type is a string which is
            a variable length of char. This make using all the mixed
            stuff superfluous
        chunksizes : tuple of int
            A tuple of ints per number of dimensions. This specifies in what
            block sizes a variable is stored. Usually for object related stuff
            we want to store everything of one object at once so this is often
            (1, ..., ...)
        """

        # add the main dimension to the var_type

        if type(dimensions) is str:
            dimensions = [dimensions]

        if type(dimensions) is int:
            if dimensions == 1:
                dimensions = ['scalar']
            else:
                dimensions = [dimensions]

        if dimensions is None:
            dimensions = (self.prefix,)
        else:
            dimensions = tuple([self.prefix] + list(dimensions))

        self.storage.create_variable(
            self.prefix + '_' + name,
            var_type=var_type,
            dimensions=dimensions,
            **kwargs
        )

    # ==============================================================================
    # COLLECTIVE VARIABLE UTILITY FUNCTIONS
    # ==============================================================================

    @property
    def op_idx(self):
        """
        Returns a function that returns for an object of this storage the idx.
        This can be used to construct order parameters the return the index
        in this storage. Useful for visualization

        Returns
        -------
        function
            the function that reports the index (int) in this store or None if it is not stored
        """

        def idx(obj):
            return self.index.get(obj, None)

        return idx


    # =============================================================================
    # LOAD/SAVE DECORATORS FOR CACHE HANDLING
    # =============================================================================

    def load(self, idx):
        """
        Returns an object from the storage.

        Parameters
        ----------
        idx : int or str
            either the integer index of the object to be loaded or a string
            (name) for named objects. This will always return the first object
            found with the specified name.

        Returns
        -------
        object
            the loaded object
        """

        if type(idx) is not str and idx < 0:
            return None

        if not hasattr(self, 'cache'):
            return self._load(idx)

        n_idx = idx

        if type(idx) is str:
            # we want to load by name and it was not in cache.
            if self.has_name:
                if idx in self.name_idx:
                    if len(self.name_idx[idx]) > 1:
                        logger.debug('Found name "%s" multiple (%d) times in storage! Loading first!' % (
                            idx, len(self.cache[idx])))

                    n_idx = self.name_idx[idx][0]
                else:
                    # since it is not found in the cache before. Refresh the cache
                    self.update_name_cache()

                    # and give it another shot
                    if idx in self.name_idx:
                        if len(self.name_idx[idx]) > 1:
                            logger.debug('Found name "%s" multiple (%d) times in storage! Loading first!' % (
                                idx, len(self.cache[idx])))

                        n_idx = self.name_idx[idx][0]
                    else:
                        raise ValueError('str "' + idx + '" not found in storage')

        elif type(idx) is not int:
            raise ValueError('indices of type "%s" are not allowed in named storage' % type(idx).__name__)

        # if it is in the cache, return it
        try:
            obj = self.cache[n_idx]
            logger.debug('Found IDX #' + str(idx) + ' in cache. Not loading!')
            return obj

        except KeyError:
            pass

        # turn into python int if it was a numpy int (in some rare cases!)
        n_idx = int(n_idx)

        logger.debug('Calling load object of type ' + self.content_class.__name__ + ' and IDX #' + str(idx))

        if n_idx >= len(self):
            logger.warning('Trying to load from IDX #' + str(n_idx) + ' > number of object ' + str(len(self)))
            return None
        elif n_idx < 0:
            logger.warning('Trying to load negative IDX #' + str(n_idx) + ' < 0. This should never happen!!!')
            raise RuntimeError('Loading of negative int should result in no object. This should never happen!')
        else:
            obj = self._load(idx)

        self.index[obj] = n_idx

        if self.has_name and hasattr(obj, '_name'):
            setattr(obj, '_name',
                    self.storage.variables[self.prefix + '_name'][idx])
            # make sure that you cannot change the name of loaded objects
            obj.fix_name()

        if obj is not None:
            # update cache there might have been a change due to naming
            self.cache[n_idx] = obj

            # finally store the name of a named object in cache
            if self.has_name and obj._name != '':
                self._update_name_in_cache(obj._name, n_idx)

        return obj

    def save(self, obj, idx=None):
        """
        Saves an object to the storage.

        Parameters
        ----------
        obj : object
            the object to be stored
        idx : int or string or `None`
            the index to be used for storing. This is highly discouraged since
            it changes an immutable object (at least in the storage). It is
            better to store also the new object and just ignore the
            previously stored one.

        """

        if idx is None:
            if obj in self.index:
                # has been saved so quit and do nothing
                return self.index[obj]
            elif type(obj) is LoaderProxy:
                return obj._idx
            else:
                idx = self.free()
        elif type(idx) is str:
            # Not yet supported
            if self.has_name and obj._name_fixed is False:
                obj.name = idx
        else:
            #assume int like
            idx = int(idx)

        self.index[obj] = idx

        # make sure in nested saving that an IDX is not used twice!
        self.reserve_idx(idx)

        logger.debug('Saving ' + str(type(obj)) + ' using IDX #' + str(idx))
        self._save(obj, idx)

        if self.has_name and hasattr(obj, '_name'):
            # logger.debug('Object ' + str(type(obj)) + ' with IDX #' + str(idx))
            # logger.debug(repr(obj))
            # logger.debug("Cleaning up name; currently: " + str(obj._name))
            if obj._name is None:
                # this should not happen!
                logger.debug("Nameable object has not been initialized correctly. Has None in _name")
                raise AttributeError('_name needs to be a string for nameable objects.')

            obj.fix_name()

            self.storage.variables[self.prefix + '_name'][idx] = obj._name

        # store the name in the cache
        if hasattr(self, 'cache'):
            self.cache[idx] = obj
            if self.has_name and obj._name != '':
                # and also the name, if it has one so we can load by
                # name afterwards from cache
                self._update_name_in_cache(obj._name, idx)

        return idx
Exemple #10
0
class ObjectStore(StorableMixin):
    """
    Base Class for storing complex objects in a netCDF4 file. It holds a
    reference to the store file.`

    Attributes
    ----------
    content_class : :obj:`mongodb.base.StorableMixin`
        a reference to the class type to be stored using this Storage. Must be
        subclassed from :obj:`mongodb.base.StorableMixin`
    cache : :py:class:`mongodb.cache.Cache`
        a dictionary that holds references to all stored elements by index
        or string for named objects. This is only used for cached access
        if caching is not False. Must be of type
        :obj:`mongodb.base.StorableMixin` or subclassed.

    """
    _restore_non_initial_attr = False

    allowed_types = [
        'int', 'float', 'long', 'str', 'bool', 'numpy.float32',
        'numpy.float64', 'numpy.int8', 'numpy.inf16', 'numpy.int32',
        'numpy.int64', 'numpy.uint8', 'numpy.uinf16', 'numpy.uint32',
        'numpy.uint64', 'index', 'length', 'uuid'
    ]

    default_store_chunk_size = 256
    default_cache = 10000

    def __init__(self, name, content_class):
        """

        Parameters
        ----------
        name : str
            the name of the store
        content_class : class
            the base class of the content, must be subclassed from
            `StorableMixin`

        """

        super(ObjectStore, self).__init__()
        self._storage = None
        self.content_class = content_class
        self.cache = NoCache()
        self._free = set()
        self._cached_all = False
        self._created = False
        self._document = None

        self.name = name

        self.attribute_list = {}
        self.cv = {}

        # This will not be stored since its information is contained in the
        # dimension names
        self._dimension_name_store = None

        self.variables = dict()
        self.units = dict()

        self.index = None

        self.proxy_index = WeakValueDictionary()

        if self.content_class is not None \
                and not issubclass(self.content_class, StorableMixin):
            raise ValueError(
                'Content class "%s" must be subclassed from StorableMixin.' %
                self.content_class.__name__)

    def is_created(self):
        return self._created

    def to_dict(self):
        return {'content_class': self.content_class, 'name': self.name}

    def check_size(self):
        """
        Perform an update in case the DB has been extended by an external source

        Returns
        -------
        bool
            returns True if an update was performed

        """
        if len(self) > len(self.index):
            self.load_indices()
            return True

        return False

    def register(self, storage):
        """
        Associate the object store to a specific storage with a given name

        Parameters
        ----------
        storage : :class:`mongodb.NetCDFPlus`
            the storage to be associated with

        """
        self._storage = storage
        self.name = self.name

        self.index = self.create_uuid_index()
        self._document = storage.db[self.name]

    @staticmethod
    def create_uuid_index():
        return []

    def restore(self):
        self.load_indices()

    def load_indices(self):
        # self.index.clear()
        # self.index.extend(
        self.index = [int(UUID(x)) for x in self._document.distinct('_id')]

    @property
    def storage(self):
        """Return the associated storage object

        Returns
        -------

        :class:`mongodb.NetCDFPlus`
            the referenced storage object
        """

        if self._storage is None:
            raise RuntimeError(
                'A storage needs to be added to this store to be used! '
                'Use .register() to do so.')

        return self._storage

    def __str__(self):
        return repr(self)

    def __repr__(self):

        return 'store.%s[%s] : %s' % (
            self.name, self.content_class.__name__ if self.content_class
            is not None else 'None/ANY', str(len(self)) + ' object(s)')

    @property
    def simplifier(self):
        """
        Return the simplifier instance used to create JSON serialization

        Returns
        -------
        :class:`mongodb.dictify.StorableObjectJSON`
            the simplifier object used in the associated storage

        """
        return self.storage.simplifier

    def set_caching(self, caching):
        """
        Set the caching mode for this store

        Parameters
        ----------
        caching : :class:`mongodb.Cache`

        """
        if caching is None:
            caching = self.default_cache

        if caching is True:
            caching = MaxCache()
        elif caching is False:
            caching = NoCache()
        elif type(caching) is int:
            caching = WeakLRUCache(caching)

        if isinstance(caching, Cache):
            self.cache = caching.transfer(self.cache)

    def idx(self, obj):
        """
        Return the index in this store for a given object

        Parameters
        ----------
        obj : :class:`mongodb.base.StorableMixin`
            the object that can be stored in this store for which its index is
            to be returned

        Returns
        -------
        int or None
            The integer index of the given object or None if it is not
            stored yet
        """
        return self.index[obj.__uuid__]

    def __iter__(self):
        """
        Add iteration over all elements in the storage
        """
        self.check_size()
        for uuid in list(self.index):
            yield self.load(uuid)

    def __len__(self):
        """
        Return the number of stored objects

        Returns
        -------
        int
            number of stored objects

        """
        if hasattr(self, '_document'):
            if self._document:
                return self._document.count()

        return 0

    def proxy(self, item):
        """
        Return a proxy of a object for this store

        Parameters
        ----------
        item : :py:class:`mongodb.base.StorableMixin`
            or int The item or index that points to an object in this store
            and to which a proxy is requested.

        Returns
        -------

        """
        if item is None:
            return None

        tt = type(item)
        if tt is long:
            idx = item
        elif tt in [str, unicode]:
            if item[0] == '-':
                return None
            idx = int(UUID(item))
        else:
            idx = item.__uuid__

        return LoaderProxy(self, idx)

    def __contains__(self, item):
        if item.__uuid__ in self.index:
            return True

        if self.check_size():
            if item.__uuid__ in self.index:
                return True

        return False

    def __getitem__(self, item):
        """
        Enable numpy style selection of object in the store
        """
        try:
            if type(item) is int:
                if item < 0:
                    item += len(self)
                return self.load(item)
            elif type(item) is str or type(item) is long:
                return self.load(item)
            elif type(item) is list:
                return [self.load(idx) for idx in item]
            elif item is Ellipsis:
                return iter(self)
        except KeyError:
            return None

    def get(self, item):
        try:
            return self[item]
        except KeyError:
            if self.check_size():
                try:
                    return self[item]
                except KeyError:
                    pass

        return None

    def consume_one(self, test_fnc=None):
        """
        Remove one object and return it in the process

        Parameters
        ----------
        test_fnc : function
            only objects that match by this function are considered

        Returns
        -------
        None or `StorableMixin`
            if None then no object was altered, otherwise the changed object
            is returned

        """
        consumed = None
        while consumed is None and len(self) > 0:
            if test_fnc is None:
                one = self.one
            else:

                try:
                    one = next(t for t in self if test_fnc(t))
                except StopIteration:
                    break

            idx = one.__uuid__
            erg = self._document.remove({'_id': str(UUID(int=idx))})
            if erg['ok']:
                consumed = one
            else:
                # this means we have a racing condition and the one we found had
                # had been deleted in the meantime
                # just retry and get another
                pass

        if consumed is not None:
            self.index.remove(consumed.__uuid__)
            if consumed.__uuid__ in self.cache:
                del self.cache[consumed.__uuid__]

        return consumed

    def modify_one(self, key, value, update):
        """
        Change an attribute of one object

        Parameters
        ----------
        key : str
            the attributes name to be changed
        value : object
            the old value to be found and changed
        update : object
            the new value to the changed into

        Returns
        -------
        None or `StorableMixin`
            if None then no object was altered, otherwise the changed object
            is returned

        """
        modified = None
        while modified is None and len(self) > 0:

            erg = self._document.find_and_modify(
                query={key: value},
                update={"$set": {
                    key: update
                }},
                upsert=False)

            if erg is not None:
                # success, we got it
                idx = int(UUID(erg['_id']))

                # remove from cache
                if idx in self.cache:
                    del self.cache[idx]

                modified = self.load(idx)

        return modified

    def modify_test_one(self, test_fnc, key, value, update):
        """
        Change an attribute of one object that matches a function

        Parameters
        ----------
        test_fnc : function
            only objects that match by this function are considered
        key : str
            the attributes name to be changed
        value : object
            the old value to be found and changed
        update : object
            the new value to the changed into

        Returns
        -------
        None or `StorableMixin`
            if None then no object was altered, otherwise the changed object
            is returned

        """
        modified = None
        while modified is None and len(self) > 0:
            try:
                found_ones = self._document.find({key: value})
                one = next(t for t in (self.load(int(UUID(f['_id'])))
                                       for f in found_ones) if test_fnc(t))

            except StopIteration:
                break

            idx = one.__uuid__

            erg = self._document.find_and_modify(
                query={
                    key: value,
                    '_id': str(UUID(int=idx))
                },
                update={"$set": {
                    key: update
                }},
                upsert=False)

            if erg is not None:
                # success, we got it

                # remove from cache
                if idx in self.cache:
                    del self.cache[idx]

                modified = self.load(idx)

        return modified

    def _load(self, idx):
        obj = self.storage.simplifier.from_simple_dict(
            self._document.find_one({'_id': str(UUID(int=idx))}))
        obj.__store__ = self
        return obj

    def clear_cache(self):
        """Clear the cache and force reloading"""

        self.cache.clear()
        self._cached_all = False

    def cache_all(self):
        """Load all samples as fast as possible into the cache"""
        if not self._cached_all:
            idxs = range(len(self))
            jsons = self.variables['json'][:]

            [self.add_single_to_cache(i, j) for i, j in zip(idxs, jsons)]

            self._cached_all = True

    def _save(self, obj):
        dct = self.storage.simplifier.to_simple_dict(obj)
        self._document.insert(dct)
        obj.__store__ = self

    @property
    def last(self):
        """
        Returns the last generated trajectory. Useful to continue a run.

        Returns
        -------
        :py:class:`mongodb.base.StorableMixin`
            the last stored object in this store
        """
        return self.load(len(self) - 1)

    @property
    def first(self):
        """
        Returns the first stored object.

        Returns
        -------
        :py:class:`mongodb.base.StorableMixin`
            the actual first stored object
        """
        return self.load(0)

    @property
    def one(self):
        """
        Returns one random object.

        Returns
        -------
        `StorableMixin`
            the content of the store
        """
        idx = int(UUID(self._document.find_one()['_id']))
        return self.load(idx)

    @property
    def last(self):
        """
        Returns the last saved object.

        This is only accurate to seconds!

        Returns
        -------
        `StorableMixin`
            the content of the store
        """
        idx = int(UUID(self._document.find_one(sort=[("_time", -1)])['_id']))
        return self.load(idx)

    @property
    def first(self):
        """
        Returns the first saved object.

        This is only accurate to seconds!

        Returns
        -------
        `StorableMixin`
            the content of the store
        """
        idx = int(UUID(self._document.find_one(sort=[("_time", 1)])['_id']))
        return self.load(idx)

    def free(self):
        """
        Return the number of the next free index for this store

        Returns
        -------
        index : int
            the number of the next free index in the storage.
            Used to store a new object.
        """

        idx = len(self)

        return idx

    def initialize(self):
        """
        Initialize the associated storage to allow for object storage. Mainly
        creates an index dimension with the name of the object.
        """

        self._created = True

    # ==========================================================================
    # LOAD/SAVE DECORATORS FOR CACHE HANDLING
    # ==========================================================================

    def find_one(self, dct):
        idx = self._document.find_one(dct)['_id']
        return self.load(int(UUID(idx)))

    def load(self, idx):
        """
        Returns an object from the storage.

        Parameters
        ----------
        idx : int
            the integer index of the object to be loaded

        Returns
        -------
        :py:class:`mongodb.base.StorableMixin`
            the loaded object
        """

        if type(idx) is str:
            idx = int(UUID(self._document.find_one({'name': idx})['_id']))

        if type(idx) is long:
            if idx not in self.index:
                self.check_size()
                if idx not in self.index:
                    raise ValueError('str %s not found in storage' % idx)

        else:
            raise ValueError(
                ('indices of type "%s" are not allowed in named storage '
                 '(only str and long)') % type(idx).__name__)

        # if it is in the cache, return it
        try:
            obj = self.cache[idx]
            logger.debug('Found IDX #' + str(idx) + ' in cache. Not loading!')
            return obj

        except KeyError:
            pass

        logger.debug('Calling load object of type `%s` @ IDX #%d' %
                     (self.content_class.__name__, idx))

        obj = self._load(idx)

        logger.debug('Calling load object of type %s and IDX # %d ... DONE' %
                     (self.content_class.__name__, idx))

        if obj is not None:
            # update cache there might have been a change due to naming
            self.cache[idx] = obj

            logger.debug(
                'Try loading UUID object of type %s and IDX # %d ... DONE' %
                (self.content_class.__name__, idx))

        logger.debug('Finished load object of type %s and IDX # %d ... DONE' %
                     (self.content_class.__name__, idx))

        return obj

    @staticmethod
    def reference(obj):
        return obj.__uuid__

    def save(self, obj):
        """
        Saves an object to the storage.

        Parameters
        ----------
        obj : :class:`mongodb.base.StorableMixin`
            the object to be stored

        """
        uuid = obj.__uuid__

        if uuid in self.index:
            # has been saved so quit and do nothing
            return self.reference(obj)

        if isinstance(obj, LoaderProxy):
            if obj._store is self:
                # is a proxy of a saved object so do nothing
                return uuid
            else:
                # it is stored but not in this store so we try storing the
                # full attribute which might be still in cache or memory
                # if that is not the case it will be stored again. This can
                # happen when you load from one store save to another. And load
                # again after some time while the cache has been changed and try
                # to save again the loaded object. We will not explicitly store
                # a table that matches objects between different storages.
                return self.save(obj.__subject__)

        if not isinstance(obj, self.content_class):
            raise ValueError(
                ('This store can only store object of base type "%s". Given '
                 'obj is of type "%s". You might need to use another store.') %
                (self.content_class, obj.__class__.__name__))

        # mark as saved so circular dependencies will not cause infinite loops
        n_idx = len(self.index)
        self.index.append(uuid)

        logger.debug('Saving ' + str(type(obj)) + ' using IDX #' + str(uuid))

        try:
            self._save(obj)
            self.cache[uuid] = obj

        except:
            # in case we did not succeed remove the mark as being saved
            del self.index[n_idx]
            raise

        return self.reference(obj)

    def add_single_to_cache(self, idx, json):
        """
        Add a single object to cache by json

        Parameters
        ----------
        idx : int
            the index where the object was stored
        json : str
            json string the represents a serialized version of the stored object
        """

        if idx not in self.cache:
            obj = self.simplifier.from_json(json)

            # self._get_id(idx, obj)

            self.cache[idx] = obj
            self.index[obj.__uuid__] = idx

            return obj