Esempio n. 1
0
class CacheSet:
    """
    A Set-like Cache that wraps :class:`diskcache.Index`
    """
    def __init__(self, iterable=(), directory=None):
        self.index = Index(directory)
        self.update(*iterable)

    def add(self, obj: object):
        if not isinstance(obj, Hashable):
            raise TypeError(f'{type(obj)} is not Hashable',
                            f'{str(obj)[:100]}...')
        self.index[hash(obj)] = obj

    def remove(self, obj):
        try:
            self.index.pop(hash(obj))
        except KeyError:
            raise KeyError(obj)

    def pop(self):
        return self.index.popitem()[1]

    def update(self, *obj):
        for o in obj:
            self.add(o)

    def clear(self):
        self.index.clear()

    def difference(self, other):
        self.__sub__(other)

    def copy(self):
        return set(self).copy()

    def __iter__(self):
        return iter(self.index.values())

    def __contains__(self, item):
        self__hash = hash(item)
        return self__hash in self.index

    def __sub__(self, other: Iterable):
        return set(self) - set(other)

    def __len__(self):
        return len(self.index)

    def __str__(self):
        return f'CacheSet({", ".join(self)})'

    def __repr__(self):
        return str(self)

    @property
    def directory(self):
        return self.index.directory
Esempio n. 2
0
File: index.py Progetto: efiop/dvc
class ObjectDBIndex(ObjectDBIndexBase):
    """Class for indexing hashes in an ODB."""

    INDEX_SUFFIX = ".idx"
    INDEX_DIR = "index"

    def __init__(
        self,
        tmp_dir: "StrPath",
        name: str,
    ):  # pylint: disable=super-init-not-called
        from diskcache import Index

        from dvc.fs.local import LocalFileSystem
        from dvc.utils.fs import makedirs

        self.index_dir = os.path.join(tmp_dir, self.INDEX_DIR, name)
        makedirs(self.index_dir, exist_ok=True)
        self.fs = LocalFileSystem()
        self.index = Index(self.index_dir)

    def __iter__(self):
        return iter(self.index)

    def __contains__(self, hash_):
        return hash_ in self.index

    def dir_hashes(self):
        """Iterate over .dir hashes stored in the index."""
        yield from (hash_ for hash_, is_dir in self.index.items() if is_dir)

    def clear(self):
        """Clear this index (to force re-indexing later)."""
        from diskcache import Timeout

        try:
            self.index.clear()
        except Timeout as exc:
            raise ObjectDBError("Failed to clear ODB index") from exc

    def update(self, dir_hashes: Iterable[str], file_hashes: Iterable[str]):
        """Update this index, adding the specified hashes."""
        from diskcache import Timeout

        try:
            with self.index.transact():
                for hash_ in dir_hashes:
                    self.index[hash_] = True
            with self.index.transact():
                for hash_ in file_hashes:
                    self.index[hash_] = False
        except Timeout as exc:
            raise ObjectDBError("Failed to update ODB index") from exc

    def intersection(self, hashes: Set[str]):
        """Iterate over values from `hashes` which exist in the index."""
        yield from hashes.intersection(self.index.keys())
Esempio n. 3
0
    utc = UTC()

cache = Index(get_cachedir())
max_age = NODE_SETTINGS['max_cache_age']
utc_stamp = datetime.datetime.now(utc)  # use local time for console

# reset timestamp if needed
if 'utc-time' in cache:
    stamp = cache['utc-time']
    cache_age = utc_stamp - stamp  # this is a timedelta
    print('Cache age is: {} sec'.format(cache_age.seconds))
    print('Maximum cache age: {} sec'.format(max_age))
    if cache_age.seconds > max_age:
        print('Cache data is too old!!')
        print('Stale data will be removed!!')
        cache.clear()

size = len(cache)
print('{} items currently in cache.'.format(size))
print('Cache items: {}'.format(list(cache)))

try:
    res = update_state()
    size = len(cache)
except:  # noqa: E722
    print('No data available, cache was NOT updated')
else:
    if size < 1:
        print('No data available (live or cached)')
        exit(1)
Esempio n. 4
0
class connection(connection_base):
    def __init__(self, dbdir=None, baseiri=None, clear=False):
        '''
        Versa connection object built from DiskCache collection object
        '''
        self._dbdir = dbdir
        self._db = Index(dbdir)
        if clear: self._db.clear()
        self._ensure_abbreviations()
        #self.create_model()
        self._baseiri = baseiri
        self._abbr_index = 0
        return

    def copy(self, contents=True):
        '''Create a copy of this model, optionally without contents (i.e. just configuration)'''
        cp = connection(dbdir=self._dbdir, baseiri=self._baseiri)
        # FIXME!!!!!
        if contents: cp.add_many(self._relationships)
        return cp

    def query(self, expr):
        '''Execute a Versa query'''
        raise NotImplementedError

    def size(self):
        '''Return the number of links in the model'''
        count = 0
        for origin in self._db:
            if origin.startswith('@'):
                continue
            for rel, targetplus in self._db[origin].items():
                count += len(targetplus)
        return count
        #return  self._db_coll.count() - connection.META_ITEM_COUNT

    def __iter__(self):
        abbrevs = self._abbreviations()
        cursor = self._db_coll.find()
        index = 0
        for origin in self._db:
            if origin.startswith('@'):
                continue
            for rel, targetplus in self._db[origin].items():
                count += len(targetplus)
                for target, attribs in targetplus:
                    yield index, (origin, rel.format(**abbrevs), target.format(**abbrevs), attribs)
                    index += 1

    # FIXME: Statement indices don't work sensibly without some inefficient additions. Use e.g. match for delete instead
    def match(self, origin=None, rel=None, target=None, attrs=None, include_ids=False):
        '''
        Iterator over relationship IDs that match a pattern of components

        origin - (optional) origin of the relationship (similar to an RDF subject). If omitted any origin will be matched.
        rel - (optional) type IRI of the relationship (similar to an RDF predicate). If omitted any relationship will be matched.
        target - (optional) target of the relationship (similar to an RDF object), a boolean, floating point or unicode object. If omitted any target will be matched.
        attrs - (optional) attribute mapping of relationship metadata, i.e. {attrname1: attrval1, attrname2: attrval2}. If any attribute is specified, an exact match is made (i.e. the attribute name and value must match).
        include_ids - If true include statement IDs with yield values
        '''
        abbrevs = self._abbreviations()
        index = 0
        if origin is None:
            extent = self._db
        else:
            extent = [origin]

        for origin in extent:
            if origin.startswith('@'):
                continue
            for xrel, xtargetplus in self._db.get(origin, {}).items():
                xrel = xrel.format(**abbrevs)
                if rel and rel != xrel:
                    continue
                for xtarget, xattrs in xtargetplus:
                    index += 1
                    # FIXME: only expand target abbrevs if of resource type?
                    xtarget = xtarget.format(**abbrevs)
                    if target and target != xtarget:
                        continue
                    matches = True
                    if attrs:
                        for k, v in attrs.items():
                            if k not in xattrs or xattrs.get(k) != v:
                                matches = False
                    if matches:
                        if include_ids:
                            yield index, (origin, xrel, xtarget, xattrs)
                        else:
                            yield origin, xrel, xtarget, xattrs

        return

    def multimatch(self, origin=None, rel=None, target=None, attrs=None, include_ids=False):
        '''
        Iterator over relationship IDs that match a pattern of components, with multiple options provided for each component

        origin - (optional) origin of the relationship (similar to an RDF subject), or set of values. If omitted any origin will be matched.
        rel - (optional) type IRI of the relationship (similar to an RDF predicate), or set of values. If omitted any relationship will be matched.
        target - (optional) target of the relationship (similar to an RDF object), a boolean, floating point or unicode object, or set of values. If omitted any target will be matched.
        attrs - (optional) attribute mapping of relationship metadata, i.e. {attrname1: attrval1, attrname2: attrval2}. If any attribute is specified, an exact match is made (i.e. the attribute name and value must match).
        include_ids - If true include statement IDs with yield values
        '''
        raise NotImplementedError
        origin = origin if origin is None or isinstance(origin, set) else set([origin])
        rel = rel if rel is None or isinstance(rel, set) else set([rel])
        target = target if target is None or isinstance(target, set) else set([target])
        for index, curr_rel in enumerate(self._relationships):
            matches = True
            if origin and curr_rel[ORIGIN] not in origin:
                matches = False
            if rel and curr_rel[RELATIONSHIP] not in rel:
                matches = False
            if target and curr_rel[TARGET] not in target:
                matches = False
            if attrs:
                for k, v in attrs.items():
                    if k not in curr_rel[ATTRIBUTES] or curr_rel[ATTRIBUTES].get(k) != v:
                        matches = False
            if matches:
                if include_ids:
                    yield index, (curr_rel[0], curr_rel[1], curr_rel[2], curr_rel[3].copy())
                else:
                    yield (curr_rel[0], curr_rel[1], curr_rel[2], curr_rel[3].copy())
        return

    def add(self, origin, rel, target, attrs=None):
        '''
        Add one relationship to the model

        origin - origin of the relationship (similar to an RDF subject)
        rel - type IRI of the relationship (similar to an RDF predicate)
        target - target of the relationship (similar to an RDF object), a boolean, floating point or unicode object
        attrs - optional attribute mapping of relationship metadata, i.e. {attrname1: attrval1, attrname2: attrval2}
        '''
        if not origin: 
            raise ValueError('Relationship origin cannot be null')
        if not rel: 
            raise ValueError('Relationship ID cannot be null')

        attrs = attrs or {}

        origin_obj = self._db.get(origin)
        rel = self._abbreviate(rel)
        target = self._abbreviate(target)
        
        
        rel_info = {'rid': rel, 'instances': [[target, attrs]]}
        if origin_obj is None:
            self._db[origin] = {rel: [(target, attrs)]}
        else:
            origin_obj.setdefault(rel, []).append((target, attrs))
            self._db[origin] = origin_obj
        return

    def add_many(self, rels):
        '''
        Add a list of relationships to the extent

        rels - a list of 0 or more relationship tuples, e.g.:
        [
            (origin, rel, target, {attrname1: attrval1, attrname2: attrval2}),
        ]

        origin - origin of the relationship (similar to an RDF subject)
        rel - type IRI of the relationship (similar to an RDF predicate)
        target - target of the relationship (similar to an RDF object), a boolean, floating point or unicode object
        attrs - optional attribute mapping of relationship metadata, i.e. {attrname1: attrval1, attrname2: attrval2}
        '''
        for curr_rel in rels:
            attrs = {}
            if len(curr_rel) == 3:
                origin, rel, target = curr_rel
            elif len(curr_rel) == 4:
                origin, rel, target, attrs = curr_rel
            else:
                raise ValueError
            self.add(origin, rel, target, attrs)
        return

    #FIXME: Replace with a match_to_remove method
    def remove(self, index):
        '''
        Delete one or more relationship, by index, from the extent

        index - either a single index or a list of indices
        '''
        raise NotImplementedError
        if hasattr(index, '__iter__'):
            ind = set(index)
        else:
            ind = [index]

        # Rebuild relationships, excluding the provided indices
        self._relationships = [r for i, r in enumerate(self._relationships) if i not in ind]

    def __getitem__(self, i):
        raise NotImplementedError

    def __eq__(self, other):
        return repr(other) == repr(self)

    def _abbreviations(self):
        abbrev_obj = self._db['@_abbreviations']
        return abbrev_obj
        
    def _abbreviate(self, rid):
        '''
        Abbreviate a relationship or resource ID target for efficient storage
        in the DB. Works only with a prefix/suffix split of hierarchical HTTP-like IRIs,
        e.g. 'http://example.org/spam/eggs' becomes something like '{a23}eggs'
        and afterward there will be an entry in the prefix map from 'a23' to 'http://example.org/spam/'
        The map can then easily be used with str.format
        '''
        if not isinstance(rid, str) or '/' not in rid or not iri.matches_uri_syntax(rid):
            return rid
        head, tail = rid.rsplit('/', 1)
        head += '/'
        pmap = self._db['@_abbreviations']
        assert pmap is not None
        #FIXME: probably called too often to do this every time
        inv_pmap = {v: k for k, v in pmap.items()}
        if head in inv_pmap:
            prefix = inv_pmap[head]
        else:
            prefix = f'a{self._abbr_index}'
            pmap[prefix] = head
            self._abbr_index += 1
            self._db['@_abbreviations'] = pmap
        post_rid = '{' + prefix + '}' + tail
        return post_rid
        
    def _ensure_abbreviations(self):
        if '@_abbreviations' not in self._db:
            self._db['@_abbreviations'] = {}
        return
Esempio n. 5
0
class connection(connection_base):
    def __init__(self, dbdir=None, baseiri=None, clear=False):
        '''
        Versa connection object built from DiskCache collection object
        '''
        self._dbdir = dbdir
        self._db = Index(dbdir)
        if clear: self._db.clear()
        self._ensure_abbreviations()
        #self.create_model()
        self._baseiri = baseiri
        self._abbr_index = 0
        return

    def copy(self, contents=True):
        '''Create a copy of this model, optionally without contents (i.e. just configuration)'''
        cp = connection(dbdir=self._dbdir, baseiri=self._baseiri)
        # FIXME!!!!!
        if contents: cp.add_many(self._relationships)
        return cp

    def query(self, expr):
        '''Execute a Versa query'''
        raise NotImplementedError

    def size(self):
        '''Return the number of links in the model'''
        count = 0
        for origin in self._db:
            if origin.startswith('@'):
                continue
            for rel, targetplus in self._db[origin].items():
                count += len(targetplus)
        return count
        #return  self._db_coll.count() - connection.META_ITEM_COUNT

    def __iter__(self):
        abbrevs = self._abbreviations()
        cursor = self._db_coll.find()
        index = 0
        for origin in self._db:
            if origin.startswith('@'):
                continue
            for rel, targetplus in self._db[origin].items():
                try:
                    rel = rel.format(**abbrevs)
                except (KeyError, ValueError):
                    pass
                count += len(targetplus)
                for target, attribs in targetplus:
                    try:
                        target = target.format(**abbrevs)
                    except (KeyError, ValueError):
                        pass
                    yield index, (origin, rel, target, attribs)
                    index += 1

    # FIXME: Statement indices don't work sensibly without some inefficient additions. Use e.g. match for delete instead
    def match(self,
              origin=None,
              rel=None,
              target=None,
              attrs=None,
              include_ids=False):
        '''
        Iterator over relationship IDs that match a pattern of components

        origin - (optional) origin of the relationship (similar to an RDF subject). If omitted any origin will be matched.
        rel - (optional) type IRI of the relationship (similar to an RDF predicate). If omitted any relationship will be matched.
        target - (optional) target of the relationship (similar to an RDF object), a boolean, floating point or unicode object. If omitted any target will be matched.
        attrs - (optional) attribute mapping of relationship metadata, i.e. {attrname1: attrval1, attrname2: attrval2}. If any attribute is specified, an exact match is made (i.e. the attribute name and value must match).
        include_ids - If true include statement IDs with yield values
        '''
        abbrevs = self._abbreviations()
        index = 0
        if origin is None:
            extent = self._db
        else:
            extent = [origin]

        for origin in extent:
            if origin.startswith('@'):
                continue
            for xrel, xtargetplus in self._db.get(origin, {}).items():
                try:
                    xrel = xrel.format(**abbrevs)
                except (KeyError, ValueError):
                    pass
                if rel and rel != xrel:
                    continue
                for xtarget, xattrs in xtargetplus:
                    index += 1
                    # FIXME: only expand target abbrevs if of resource type?
                    try:
                        xtarget = xtarget.format(**abbrevs)
                    except (KeyError, ValueError):
                        pass
                    if target and target != xtarget:
                        continue
                    matches = True
                    if attrs:
                        for k, v in attrs.items():
                            if k not in xattrs or xattrs.get(k) != v:
                                matches = False
                    if matches:
                        if include_ids:
                            yield index, (origin, xrel, xtarget, xattrs)
                        else:
                            yield origin, xrel, xtarget, xattrs

        return

    def multimatch(self,
                   origin=None,
                   rel=None,
                   target=None,
                   attrs=None,
                   include_ids=False):
        '''
        Iterator over relationship IDs that match a pattern of components, with multiple options provided for each component

        origin - (optional) origin of the relationship (similar to an RDF subject), or set of values. If omitted any origin will be matched.
        rel - (optional) type IRI of the relationship (similar to an RDF predicate), or set of values. If omitted any relationship will be matched.
        target - (optional) target of the relationship (similar to an RDF object), a boolean, floating point or unicode object, or set of values. If omitted any target will be matched.
        attrs - (optional) attribute mapping of relationship metadata, i.e. {attrname1: attrval1, attrname2: attrval2}. If any attribute is specified, an exact match is made (i.e. the attribute name and value must match).
        include_ids - If true include statement IDs with yield values
        '''
        raise NotImplementedError
        origin = origin if origin is None or isinstance(origin, set) else set(
            [origin])
        rel = rel if rel is None or isinstance(rel, set) else set([rel])
        target = target if target is None or isinstance(target, set) else set(
            [target])
        for index, curr_rel in enumerate(self._relationships):
            matches = True
            if origin and curr_rel[ORIGIN] not in origin:
                matches = False
            if rel and curr_rel[RELATIONSHIP] not in rel:
                matches = False
            if target and curr_rel[TARGET] not in target:
                matches = False
            if attrs:
                for k, v in attrs.items():
                    if k not in curr_rel[
                            ATTRIBUTES] or curr_rel[ATTRIBUTES].get(k) != v:
                        matches = False
            if matches:
                if include_ids:
                    yield index, (curr_rel[0], curr_rel[1], curr_rel[2],
                                  curr_rel[3].copy())
                else:
                    yield (curr_rel[0], curr_rel[1], curr_rel[2],
                           curr_rel[3].copy())
        return

    def add(self, origin, rel, target, attrs=None):
        '''
        Add one relationship to the model

        origin - origin of the relationship (similar to an RDF subject)
        rel - type IRI of the relationship (similar to an RDF predicate)
        target - target of the relationship (similar to an RDF object), a boolean, floating point or unicode object
        attrs - optional attribute mapping of relationship metadata, i.e. {attrname1: attrval1, attrname2: attrval2}
        '''
        if not origin:
            raise ValueError('Relationship origin cannot be null')
        if not rel:
            raise ValueError('Relationship ID cannot be null')

        attrs = attrs or {}

        origin_obj = self._db.get(origin)
        rel = self._abbreviate(rel)
        target = self._abbreviate(target)

        if origin_obj is None:
            self._db[origin] = {rel: [(target, attrs)]}
        else:
            origin_obj.setdefault(rel, []).append((target, attrs))
            self._db[origin] = origin_obj
        return

    def add_many(self, rels):
        '''
        Add a list of relationships to the extent

        rels - a list of 0 or more relationship tuples, e.g.:
        [
            (origin, rel, target, {attrname1: attrval1, attrname2: attrval2}),
        ]

        origin - origin of the relationship (similar to an RDF subject)
        rel - type IRI of the relationship (similar to an RDF predicate)
        target - target of the relationship (similar to an RDF object), a boolean, floating point or unicode object
        attrs - optional attribute mapping of relationship metadata, i.e. {attrname1: attrval1, attrname2: attrval2}
        '''
        for curr_rel in rels:
            attrs = {}
            if len(curr_rel) == 3:
                origin, rel, target = curr_rel
            elif len(curr_rel) == 4:
                origin, rel, target, attrs = curr_rel
            else:
                raise ValueError
            self.add(origin, rel, target, attrs)
        return

    #FIXME: Replace with a match_to_remove method
    def remove(self, index):
        '''
        Delete one or more relationship, by index, from the extent

        index - either a single index or a list of indices
        '''
        raise NotImplementedError
        if hasattr(index, '__iter__'):
            ind = set(index)
        else:
            ind = [index]

        # Rebuild relationships, excluding the provided indices
        self._relationships = [
            r for i, r in enumerate(self._relationships) if i not in ind
        ]

    def __getitem__(self, i):
        raise NotImplementedError

    def __eq__(self, other):
        return repr(other) == repr(self)

    def _abbreviations(self):
        abbrev_obj = self._db['@_abbreviations']
        return abbrev_obj

    def _abbreviate(self, rid):
        '''
        Abbreviate a relationship or resource ID target for efficient storage
        in the DB. Works only with a prefix/suffix split of hierarchical HTTP-like IRIs,
        e.g. 'http://example.org/spam/eggs' becomes something like '{a23}eggs'
        and afterward there will be an entry in the prefix map from 'a23' to 'http://example.org/spam/'
        The map can then easily be used with str.format
        '''
        if not isinstance(
                rid, str) or '/' not in rid or not iri.matches_uri_syntax(rid):
            return rid
        head, tail = rid.rsplit('/', 1)
        head += '/'
        pmap = self._db['@_abbreviations']
        assert pmap is not None
        #FIXME: probably called too often to do this every time
        inv_pmap = {v: k for k, v in pmap.items()}
        if head in inv_pmap:
            prefix = inv_pmap[head]
        else:
            prefix = f'a{self._abbr_index}'
            pmap[prefix] = head
            self._abbr_index += 1
            self._db['@_abbreviations'] = pmap
        post_rid = '{' + prefix + '}' + tail.replace('{', '{{').replace(
            '}', '}}')
        return post_rid

    def _ensure_abbreviations(self):
        if '@_abbreviations' not in self._db:
            self._db['@_abbreviations'] = {}
        return