def export_graph( graph: Graph, s3_file_name: str, s3_endpoint: S3ObjectStore = None, data_source_code: str = None ) -> bool: """Export the content of the given rdflib.Graph as a Turtle file to the given S3 bucket""" log_rule(f"Uploading in-memory graph as {s3_file_name} to S3") if Path(s3_file_name).suffix == '.gz': content_encoding = 'gzip' else: content_encoding = None uploader = s3_endpoint.uploader_for( s3_file_name, mime=MIME_TURTLE, content_encoding=content_encoding, dataset_code=data_source_code ) # # Unfortunately this is all happening in memory, the rdflib serializer does not seem # to support streaming. # chunk_max_size = 5 * 1024 * 1024 # 5Mb is minimum serializer = plugin.get('ttl', plugin.Serializer)(graph) buf = BytesIO() with gzip.GzipFile(mode='wb', fileobj=buf) as stream: serializer.serialize(stream, encoding="UTF-8") chunk = bytearray(chunk_max_size) chunk_size = chunk_max_size buf.seek(0) while chunk_size == chunk_max_size: chunk_size = buf.readinto(chunk) log_item(f"{s3_file_name} chunk_size", str(chunk_size)) if chunk_size > 0: uploader.part(chunk[0:chunk_size]) log_item(f"{s3_file_name} completing with", str(chunk_size)) return uploader.complete()
class VCRHTTPResponse(HTTPResponse): """ Stub response class that gets returned instead of a HTTPResponse """ def __init__(self, recorded_response): self.fp = None self.recorded_response = recorded_response self.reason = recorded_response["status"]["message"] self.status = self.code = recorded_response["status"]["code"] self.version = None self._content = BytesIO(self.recorded_response["body"]["string"]) self._closed = False headers = self.recorded_response["headers"] # Since we are loading a response that has already been serialized, our # response is no longer chunked. That means we don't want any # libraries trying to process a chunked response. By removing the # transfer-encoding: chunked header, this should cause the downstream # libraries to process this as a non-chunked response. te_key = [h for h in headers.keys() if h.upper() == "TRANSFER-ENCODING"] if te_key: del headers[te_key[0]] self.headers = self.msg = parse_headers(headers) self.length = compat.get_header(self.msg, "content-length") or None @property def closed(self): # in python3, I can't change the value of self.closed. So I' # twiddling self._closed and using this property to shadow the real # self.closed from the superclas return self._closed def read(self, *args, **kwargs): return self._content.read(*args, **kwargs) def readall(self): return self._content.readall() def readinto(self, *args, **kwargs): return self._content.readinto(*args, **kwargs) def readline(self, *args, **kwargs): return self._content.readline(*args, **kwargs) def readlines(self, *args, **kwargs): return self._content.readlines(*args, **kwargs) def seekable(self): return self._content.seekable() def tell(self): return self._content.tell() def isatty(self): return self._content.isatty() def seek(self, *args, **kwargs): return self._content.seek(*args, **kwargs) def close(self): self._closed = True return True def getcode(self): return self.status def isclosed(self): return self.closed def info(self): return parse_headers(self.recorded_response["headers"]) def getheaders(self): message = parse_headers(self.recorded_response["headers"]) return list(compat.get_header_items(message)) def getheader(self, header, default=None): values = [v for (k, v) in self.getheaders() if k.lower() == header.lower()] if values: return ", ".join(values) else: return default def readable(self): return self._content.readable()