Beispiel #1
0
def export_graph(
        graph: Graph,
        s3_file_name: str,
        s3_endpoint: S3ObjectStore = None,
        data_source_code: str = None
) -> bool:
    """Export the content of the given rdflib.Graph as a Turtle file to the given S3 bucket"""
    log_rule(f"Uploading in-memory graph as {s3_file_name} to S3")
    if Path(s3_file_name).suffix == '.gz':
        content_encoding = 'gzip'
    else:
        content_encoding = None
    uploader = s3_endpoint.uploader_for(
        s3_file_name,
        mime=MIME_TURTLE,
        content_encoding=content_encoding,
        dataset_code=data_source_code
    )
    #
    # Unfortunately this is all happening in memory, the rdflib serializer does not seem
    # to support streaming.
    #
    chunk_max_size = 5 * 1024 * 1024  # 5Mb is minimum
    serializer = plugin.get('ttl', plugin.Serializer)(graph)
    buf = BytesIO()
    with gzip.GzipFile(mode='wb', fileobj=buf) as stream:
        serializer.serialize(stream, encoding="UTF-8")
    chunk = bytearray(chunk_max_size)
    chunk_size = chunk_max_size
    buf.seek(0)

    while chunk_size == chunk_max_size:
        chunk_size = buf.readinto(chunk)
        log_item(f"{s3_file_name} chunk_size", str(chunk_size))
        if chunk_size > 0:
            uploader.part(chunk[0:chunk_size])
    log_item(f"{s3_file_name} completing with", str(chunk_size))
    return uploader.complete()
Beispiel #2
0
class VCRHTTPResponse(HTTPResponse):
    """
    Stub response class that gets returned instead of a HTTPResponse
    """

    def __init__(self, recorded_response):
        self.fp = None
        self.recorded_response = recorded_response
        self.reason = recorded_response["status"]["message"]
        self.status = self.code = recorded_response["status"]["code"]
        self.version = None
        self._content = BytesIO(self.recorded_response["body"]["string"])
        self._closed = False

        headers = self.recorded_response["headers"]
        # Since we are loading a response that has already been serialized, our
        # response is no longer chunked.  That means we don't want any
        # libraries trying to process a chunked response.  By removing the
        # transfer-encoding: chunked header, this should cause the downstream
        # libraries to process this as a non-chunked response.
        te_key = [h for h in headers.keys() if h.upper() == "TRANSFER-ENCODING"]
        if te_key:
            del headers[te_key[0]]
        self.headers = self.msg = parse_headers(headers)

        self.length = compat.get_header(self.msg, "content-length") or None

    @property
    def closed(self):
        # in python3, I can't change the value of self.closed.  So I'
        # twiddling self._closed and using this property to shadow the real
        # self.closed from the superclas
        return self._closed

    def read(self, *args, **kwargs):
        return self._content.read(*args, **kwargs)

    def readall(self):
        return self._content.readall()

    def readinto(self, *args, **kwargs):
        return self._content.readinto(*args, **kwargs)

    def readline(self, *args, **kwargs):
        return self._content.readline(*args, **kwargs)

    def readlines(self, *args, **kwargs):
        return self._content.readlines(*args, **kwargs)

    def seekable(self):
        return self._content.seekable()

    def tell(self):
        return self._content.tell()

    def isatty(self):
        return self._content.isatty()

    def seek(self, *args, **kwargs):
        return self._content.seek(*args, **kwargs)

    def close(self):
        self._closed = True
        return True

    def getcode(self):
        return self.status

    def isclosed(self):
        return self.closed

    def info(self):
        return parse_headers(self.recorded_response["headers"])

    def getheaders(self):
        message = parse_headers(self.recorded_response["headers"])
        return list(compat.get_header_items(message))

    def getheader(self, header, default=None):
        values = [v for (k, v) in self.getheaders() if k.lower() == header.lower()]

        if values:
            return ", ".join(values)
        else:
            return default

    def readable(self):
        return self._content.readable()