def bodies(): i = 1 for key in annotations(): key = key.decode( 'utf-8' ) # !!! NOTE Sent as hexdigest, but received as bytes, must be decoded. try: content = storage.get(key) except KeyError: MSG = "Text for id: {} not found.".format(hexdigest(key)) logger.debug(MSG) if DEBUG > 5: print(MSG) continue if content is None: # Due to a bug, e.g. MSG = "Text for id: {} is empty.".format(hexdigest(key)) logger.debug(MSG) if DEBUG > 5: print(MSG) continue content = content.replace(b"\t", b" ").replace(b'"', b'""').replace( b"\n", b"\\n").replace(b"\000", b" ").replace(b"\x0d", b" ") content = content.decode('utf8') if SHORTEN > 0: content = content[:SHORTEN] + "..." yield (i, key, content) i += 1
def get(self, key): """Returns a content stored under the supplied key. Arguments: - `key`: Key of a content to be deleted. """ key = intdigest(key) if self.locs.check(key) >= 0: return self._get_from_dirs(key) c_key = self.resolve_compressed(key) logger.debug("PhysKey: %d" % c_key) content = self.db.get(c_key) if content is None: return None if content[:2] == b'x\x9c': try: content = zlib.decompress(content) except zlib.error: pass # Not a compressed format loaded_hash = self.hash(content) stored_hash = hexdigest(key) if (loaded_hash != stored_hash): logger.error("Hashes are different!") else: logger.info("Hashes are ok! %s" % loaded_hash) return content
def remove(self, key): """Removes a content stored under the supplied key Arguments: - `key`: Key of a content to be deleted. """ c_key = self.resolve_compressed(key) self.db.remove(c_key) return hexdigest(key)
def resolve(self, key): """Figure out a content existence stored under the supplied key. Arguments: - `key`: Key of a content to be checked. """ if type(key)!=str: key=hexdigest(key) if self.conn.check(key): return key raise KeyError("no such key")
def resolve_compressed(self, key, no_raise=False): """Resolve compression bit in key. if file is compressed, then return Arguments: - `key`: """ key = intdigest(key) if self.db.check(key) >= 0: return key if no_raise: return False raise KeyError("no content for key: " + hexdigest(key))
def put(self, content, metadata=None): key = intdigest(self._hash(content)) compressed = False org_size = len(content) if metadata is not None: for mk in ["Content-Type", "mimetype", "mime-type", "Mime-Type"]: if mk in metadata: md = metadata[mk] mdl = md if type(mdl) != list: mdl = [mdl] for md_ in mdl: if md_.find('compressed') >= 0: compressed = True break if md_ in COMP_MIMES: compressed = True break if compressed: break filename = metadata.get("File-Name", None) if filename: for ext in COMP_EXT: if filename.endswith(ext): compressed = True break logger.debug("STORAGE got mime(s):" + str(md)) # c_key=key << 8 new_md = {} if not compressed and len( content) <= self.size_tr and self.zlib_level > 0: # if type(content)==str: # content=content.encode("") new_content = zlib.compress(content, self.zlib_level) if len(content) > len(new_content): content = new_content new_md['nfo:uncompressedSize'] = org_size else: logger.info("STORAGE: Compressed is bigger, than original.") self.db.set(key, content) if new_md: metadata.update(new_md) return hexdigest(key)
def proc_attrs(self, attrs): hid=attrs['hid'] key=hexdigest(hid) #l=Literal(key) logger.debug((hid, key)) Q=''' SELECT DISTINCT ?date ?title ?file ?id ?mimetype WHERE { ?ann a oa:Annotation . ?ann oa:annotatedAt ?date . ?ann oa:hasTarget ?target . ?ann oa:hasBody ?body . OPTIONAL { ?target nie:title ?title } . ?body nie:identifier "''' + key + '''" . ?target nfo:fileName ?file . ?target nmo:mimeType ?mimetype . ?target nie:identifier ?id . } ''' logger.debug(Q) yield from self.sparql(Q, self.doc)
def hash(self, content): return hexdigest(self._hash( content)) # NOTE: Digest for original non-compressed content.