def open_stream(filename): """ Open a file and return a stream to the file. If filename starts with "http:" or "https:" then file is assumed to be a URL. If filename starts with "blob:" then file is assumed to be held within Azure as a BLOB. This expects the following environment variables to be set: * BLOB_SAS_TOKEN * BLOB_ACCOUNT_NAME * BLOB_CONTAINER_NAME Otherwise, the filename is assumed to be held on the file system. :param filename: file name or URL :type filename: str or unicode :return: open stream :rtype: cStringIO.StringI (URL or file system) OR io.BytesIO (blob) """ assert filename, "Filename must not be ''" is_url = (filename.lower().startswith(HTTP) or filename.lower().startswith(HTTPS)) is_blob = (filename.lower().startswith(BLOB)) if is_url: import requests from io import StringIO stream = requests.get(filename, stream=True).raw stream.decode_content = True stream = StringIO(stream.read()) elif is_blob: import io import os from azure.storage.blob import BlobService sas_token = os.environ['BLOB_SAS_TOKEN'] if sas_token[0] == '?': sas_token = sas_token[1:] blob_service = BlobService( account_name=os.environ['BLOB_ACCOUNT_NAME'], sas_token=sas_token) filename = filename[len(BLOB):] blob = blob_service.get_blob_to_bytes( os.environ['BLOB_CONTAINER_NAME'], filename) stream = io.BytesIO(blob) else: stream = open(filename, 'rb') return stream
def open_stream(filename): """ Open a file and return a stream. """ assert filename, "Filename must not be ''" is_url = (filename.lower().startswith(HTTP) or filename.lower().startswith(HTTPS)) is_blob = (filename.lower().startswith(BLOB)) if is_url: import requests from cStringIO import StringIO stream = requests.get(filename, stream=True).raw stream.decode_content = True stream = StringIO(stream.read()) elif is_blob: import io import os from azure.storage.blob import BlobService sas_token = os.environ['BLOB_SAS_TOKEN'] if sas_token[0] == '?': sas_token = sas_token[1:] blob_service = BlobService( account_name=os.environ['BLOB_ACCOUNT_NAME'], sas_token=sas_token) filename = filename[len(BLOB):] blob = blob_service.get_blob_to_bytes( os.environ['BLOB_CONTAINER_NAME'], filename) stream = io.BytesIO(blob) else: from cStringIO import StringIO stream = StringIO(open(filename).read()) return stream
class BlobReader(Reader): def __init__(self, account, key, container): self.block_blob_service = BlobService(account_name=account, account_key=key) self.container = container def get_data(self, name): counter = BLOB_RETRIES while counter: try: data = self.block_blob_service.get_blob_to_bytes( self.container, name) except AzureException as azure_exc: counter -= 1 else: return data raise RuntimeError("Couldn't read from blob, %s" % (azure_exc.args[0])) def to_file(self, handle, blobpath): counter = BLOB_RETRIES while counter: try: self.block_blob_service.get_blob_to_file( self.container, blobpath, handle, max_connections=2, progress_callback=None) except AzureException as azure_exc: counter -= 1 else: return raise RuntimeError("Couldn't download blob, %s" % (azure_exc.args[0])) def list(self, prefix): return self.block_blob_service.list_blobs(self.container, prefix)