def bodies():
    i = 1
    for key in annotations():
        key = key.decode(
            'utf-8'
        )  # !!! NOTE Sent as hexdigest, but received as bytes, must be decoded.
        try:
            content = storage.get(key)
        except KeyError:
            MSG = "Text for id: {} not found.".format(hexdigest(key))
            logger.debug(MSG)
            if DEBUG > 5:
                print(MSG)
            continue
        if content is None:  # Due to a bug, e.g.
            MSG = "Text for id: {} is empty.".format(hexdigest(key))
            logger.debug(MSG)
            if DEBUG > 5:
                print(MSG)
            continue
        content = content.replace(b"\t", b" ").replace(b'"', b'""').replace(
            b"\n", b"\\n").replace(b"\000", b" ").replace(b"\x0d", b" ")
        content = content.decode('utf8')
        if SHORTEN > 0:
            content = content[:SHORTEN] + "..."
        yield (i, key, content)
        i += 1
    def get(self, key):
        """Returns a content stored under
        the supplied key.

        Arguments:
        - `key`: Key of a content to be deleted.
        """
        key = intdigest(key)

        if self.locs.check(key) >= 0:
            return self._get_from_dirs(key)

        c_key = self.resolve_compressed(key)
        logger.debug("PhysKey: %d" % c_key)
        content = self.db.get(c_key)
        if content is None:
            return None
        if content[:2] == b'x\x9c':
            try:
                content = zlib.decompress(content)
            except zlib.error:
                pass  # Not a compressed format
        loaded_hash = self.hash(content)
        stored_hash = hexdigest(key)
        if (loaded_hash != stored_hash):
            logger.error("Hashes are different!")
        else:
            logger.info("Hashes are ok! %s" % loaded_hash)
        return content
    def remove(self, key):
        """Removes a content stored under
        the supplied key

        Arguments:
        - `key`: Key of a content to be deleted.
        """

        c_key = self.resolve_compressed(key)
        self.db.remove(c_key)
        return hexdigest(key)
    def resolve(self, key):
        """Figure out a content existence stored
        under the supplied key.

        Arguments:
        - `key`: Key of a content to be checked.
        """
        if type(key)!=str:
            key=hexdigest(key)
        if self.conn.check(key):
            return key
        raise KeyError("no such key")
 def resolve_compressed(self, key, no_raise=False):
     """Resolve compression bit in key.
     if file is compressed, then return
     Arguments:
     - `key`:
     """
     key = intdigest(key)
     if self.db.check(key) >= 0:
         return key
     if no_raise:
         return False
     raise KeyError("no content for key: " + hexdigest(key))
    def put(self, content, metadata=None):
        key = intdigest(self._hash(content))
        compressed = False
        org_size = len(content)
        if metadata is not None:
            for mk in ["Content-Type", "mimetype", "mime-type", "Mime-Type"]:
                if mk in metadata:
                    md = metadata[mk]
                    mdl = md
                    if type(mdl) != list:
                        mdl = [mdl]
                    for md_ in mdl:
                        if md_.find('compressed') >= 0:
                            compressed = True
                            break
                        if md_ in COMP_MIMES:
                            compressed = True
                            break
                    if compressed:
                        break
                    filename = metadata.get("File-Name", None)
                    if filename:
                        for ext in COMP_EXT:
                            if filename.endswith(ext):
                                compressed = True
                                break
                    logger.debug("STORAGE got mime(s):" + str(md))

        # c_key=key << 8
        new_md = {}
        if not compressed and len(
                content) <= self.size_tr and self.zlib_level > 0:
            #            if type(content)==str:
            #                content=content.encode("")
            new_content = zlib.compress(content, self.zlib_level)
            if len(content) > len(new_content):
                content = new_content
                new_md['nfo:uncompressedSize'] = org_size
            else:
                logger.info("STORAGE: Compressed is bigger, than original.")
        self.db.set(key, content)
        if new_md:
            metadata.update(new_md)
        return hexdigest(key)
Exemple #7
0
    def proc_attrs(self, attrs):
        hid=attrs['hid']
        key=hexdigest(hid)
        #l=Literal(key)
        logger.debug((hid, key))
        Q='''
        SELECT DISTINCT ?date ?title ?file ?id ?mimetype
        WHERE {
           ?ann a oa:Annotation .
           ?ann oa:annotatedAt ?date .
           ?ann oa:hasTarget ?target .
           ?ann oa:hasBody ?body .
        OPTIONAL { ?target nie:title ?title } .
           ?body nie:identifier "''' + key + '''" .
           ?target nfo:fileName ?file .
           ?target nmo:mimeType ?mimetype .
           ?target nie:identifier ?id .
        }
        '''

        logger.debug(Q)
        yield from self.sparql(Q, self.doc)
 def hash(self, content):
     return hexdigest(self._hash(
         content))  # NOTE: Digest for original non-compressed content.