Exemple #1
0
def open_stream(filename):
    """
    Open a file and return a stream to the file.

    If filename starts with "http:" or "https:" then file is assumed
    to be a URL.

    If filename starts with "blob:" then file is assumed to be held
    within Azure as a BLOB. This expects the following environment
    variables to be set:

    * BLOB_SAS_TOKEN
    * BLOB_ACCOUNT_NAME
    * BLOB_CONTAINER_NAME

    Otherwise, the filename is assumed to be held on the file
    system.

    :param filename: file name or URL
    :type filename: str or unicode
    :return: open stream
    :rtype: cStringIO.StringI (URL or file system) OR io.BytesIO (blob)
    """
    assert filename, "Filename must not be ''"

    is_url = (filename.lower().startswith(HTTP)
              or filename.lower().startswith(HTTPS))
    is_blob = (filename.lower().startswith(BLOB))

    if is_url:
        import requests
        from io import StringIO

        stream = requests.get(filename, stream=True).raw
        stream.decode_content = True
        stream = StringIO(stream.read())

    elif is_blob:
        import io
        import os
        from azure.storage.blob import BlobService

        sas_token = os.environ['BLOB_SAS_TOKEN']
        if sas_token[0] == '?':
            sas_token = sas_token[1:]

        blob_service = BlobService(
            account_name=os.environ['BLOB_ACCOUNT_NAME'], sas_token=sas_token)
        filename = filename[len(BLOB):]
        blob = blob_service.get_blob_to_bytes(
            os.environ['BLOB_CONTAINER_NAME'], filename)
        stream = io.BytesIO(blob)

    else:
        stream = open(filename, 'rb')

    return stream
Exemple #2
0
def open_stream(filename):
    """
    Open a file and return a stream.
    """
    assert filename, "Filename must not be ''"

    is_url = (filename.lower().startswith(HTTP)
              or filename.lower().startswith(HTTPS))
    is_blob = (filename.lower().startswith(BLOB))

    if is_url:
        import requests
        from cStringIO import StringIO

        stream = requests.get(filename, stream=True).raw
        stream.decode_content = True
        stream = StringIO(stream.read())

    elif is_blob:
        import io
        import os
        from azure.storage.blob import BlobService

        sas_token = os.environ['BLOB_SAS_TOKEN']
        if sas_token[0] == '?':
            sas_token = sas_token[1:]

        blob_service = BlobService(
            account_name=os.environ['BLOB_ACCOUNT_NAME'], sas_token=sas_token)
        filename = filename[len(BLOB):]
        blob = blob_service.get_blob_to_bytes(
            os.environ['BLOB_CONTAINER_NAME'], filename)
        stream = io.BytesIO(blob)

    else:
        from cStringIO import StringIO
        stream = StringIO(open(filename).read())

    return stream
Exemple #3
0
class BlobReader(Reader):
    def __init__(self, account, key, container):
        self.block_blob_service = BlobService(account_name=account,
                                              account_key=key)
        self.container = container

    def get_data(self, name):
        counter = BLOB_RETRIES
        while counter:
            try:
                data = self.block_blob_service.get_blob_to_bytes(
                    self.container, name)
            except AzureException as azure_exc:
                counter -= 1
            else:
                return data
        raise RuntimeError("Couldn't read from blob, %s" % (azure_exc.args[0]))

    def to_file(self, handle, blobpath):
        counter = BLOB_RETRIES
        while counter:
            try:
                self.block_blob_service.get_blob_to_file(
                    self.container,
                    blobpath,
                    handle,
                    max_connections=2,
                    progress_callback=None)
            except AzureException as azure_exc:
                counter -= 1
            else:
                return
        raise RuntimeError("Couldn't download blob, %s" % (azure_exc.args[0]))

    def list(self, prefix):
        return self.block_blob_service.list_blobs(self.container, prefix)