def get_blob_client_by_uri(storage_uri):
        container_name, storage_name, key = StorageUtilities.get_storage_from_uri(storage_uri)

        blob_service = BlockBlobService(account_name=storage_name, account_key=key)
        blob_service.create_container(container_name)

        return blob_service, container_name
Example #2
0
class BlobUploader(object):

    def __init__(self, blob_container=None, make_container_public=False):
        """
        Class to handle uploading to an azure blob connection.

        :param make_container_public: True iff you are okay with public read access to your data. Useful for teaching a course
        :return:
        """
        self.blob_container = blob_container or BLOB_CONTAINER
        self.blob_service = BlockBlobService(account_name=BLOB_ACCOUNTNAME, account_key=BLOB_ACCOUNTKEY)
        # if make_container_public:
        #     self.blob_service.create_container(BLOB_CONTAINER, public_access=PublicAccess)
        # else:
        #     self.blob_service.create_container(BLOB_CONTAINER)

    def put_json_file(self, file_obj, filename):
        """
        Put a file into azure blob store.

        Allows user to specify format. For example, once could use:
        <prefix>/YYYYMMDD.json
        """
        file_obj.seek(0)
        self.blob_service.create_blob_from_path(
            self.blob_container, 
            filename, 
            file_obj.name, 
            content_settings=ContentSettings(content_type="text/json")
        )
    def test_sas_signed_identifier(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._create_block_blob()

        access_policy = AccessPolicy()
        access_policy.start = '2011-10-11'
        access_policy.expiry = '2018-10-12'
        access_policy.permission = BlobPermissions.READ
        identifiers = {'testid': access_policy}

        resp = self.bs.set_container_acl(self.container_name, identifiers)

        token = self.bs.generate_blob_shared_access_signature(
            self.container_name,
            blob_name,
            id='testid'
            )

        # Act
        service = BlockBlobService(
            self.settings.STORAGE_ACCOUNT_NAME,
            sas_token=token,
            request_session=requests.Session(),
        )
        self._set_test_proxy(service, self.settings)
        result = service.get_blob_to_bytes(self.container_name, blob_name)

        # Assert
        self.assertEqual(self.byte_data, result.content)
    def test_sas_access_blob(self):
        # SAS URL is calculated from storage key, so this test runs live only
        if TestMode.need_recording_file(self.test_mode):
            return

        # Arrange
        blob_name = self._create_block_blob()
        
        token = self.bs.generate_blob_shared_access_signature(
            self.container_name,
            blob_name,
            permission=BlobPermissions.READ,
            expiry=datetime.utcnow() + timedelta(hours=1),
        )

        # Act
        service = BlockBlobService(
            self.settings.STORAGE_ACCOUNT_NAME,
            sas_token=token,
            request_session=requests.Session(),
        )
        self._set_test_proxy(service, self.settings)
        result = service.get_blob_to_bytes(self.container_name, blob_name)

        # Assert
        self.assertEqual(self.byte_data, result.content)
Example #5
0
        def update(self, area, selector, content_type, buffer):
            assert area is not None, 'area is none; should already be validated'

            area_config = config.load_area(area)
            
            storage_config = config.load_storage(area_config['storage'])
            
            area = area.lower()

            # httplib.HTTPConnection.debuglevel = 1
            # http.client.HTTPConnection.debuglevel = 1

            blob_service = BlockBlobService(account_name=storage_config['name'], account_key=storage_config['key1'])

            hash = base64.b64encode(hashlib.md5(buffer).digest())

            content_settings = ContentSettings(content_md5=hash)
            if content_type is not None and len(content_type) > 0:
                content_settings.content_type = content_type

            blob_service.create_blob_from_bytes(
                area_config['container'],
                selector,
                buffer,
                content_settings=content_settings,
                validate_content=False
            )

            return hash
Example #6
0
class azureobject(object):
    def __init__(self, azure_config):
        if 'account name' in azure_config and azure_config['account name'] is not None and 'account key' in azure_config and azure_config['account key'] is not None and 'container' in azure_config and azure_config['container'] is not None:
            self.conn = BlockBlobService(account_name=azure_config['account name'], account_key=azure_config['account key'])
            self.container = azure_config['container']
        else:
            raise Exception("Cannot connect to Azure without account name, account key, and container specified")
    def get_key(self, key_name):
        new_key = azurekey(self, key_name, load=False)
        if new_key.exists():
            new_key.get_properties()
            new_key.does_exist = True
        else:
            new_key.does_exist = False
        return new_key
    def search_key(self, key_name):
        for blob in self.conn.list_blobs(self.container, prefix=key_name, delimiter='/'):
            if blob.name == key_name:
                return azurekey(self, blob.name)
        return None
    def list_keys(self, prefix):
        output = list()
        for blob in self.conn.list_blobs(self.container, prefix=prefix, delimiter='/'):
            output.append(azurekey(self, blob.name))
        return output
    def sas_with_signed_identifiers(self):
        container_name = self._create_container()
        self.service.create_blob_from_text(container_name, 'blob1', b'hello world')

        # Set access policy on container
        access_policy = AccessPolicy(permission=ContainerPermissions.READ,
                                     expiry=datetime.utcnow() + timedelta(hours=1))
        identifiers = {'id': access_policy}
        acl = self.service.set_container_acl(container_name, identifiers)

        # Wait 30 seconds for acl to propagate
        time.sleep(30)

        # Indicates to use the access policy set on the container
        token = self.service.generate_container_shared_access_signature(
            container_name,
            id='id'
        )

        # Create a service and use the SAS
        sas_service = BlockBlobService(
            account_name=self.account.account_name,
            sas_token=token,
        )

        blob = sas_service.get_blob_to_text(container_name, 'blob1')
        content = blob.content # hello world
        
        self.service.delete_container(container_name)
    def get_blob_client_by_uri(storage_uri, session):
        storage = StorageUtilities.get_storage_from_uri(storage_uri, session)

        blob_service = BlockBlobService(
            account_name=storage.storage_name,
            token_credential=storage.token)
        blob_service.create_container(storage.container_name)
        return blob_service, storage.container_name, storage.file_prefix
    def request_session(self):
        # A custom request session may be used to set special network options
        session = requests.Session()
        client = BlockBlobService(account_name='<account_name>', account_key='<account_key>', 
                                  request_session=session)

        # Set later
        client = BlockBlobService(account_name='<account_name>', account_key='<account_key>')
        client.request_session = session
    def protocol(self):
        # https is the default protocol and is strongly recommended for security 
        # However, http may be used if desired
        client = BlockBlobService(account_name='<account_name>', account_key='<account_key>', 
                                  protocol='http')

        # Set later
        client = BlockBlobService(account_name='<account_name>', account_key='<account_key>')
        client.protocol = 'http'
Example #11
0
    def store(self):
        from azure.storage.blob import BlockBlobService

        container = uuid()
        conn_string = create_azure_conn_string(load_azure_credentials())
        s = BlockBlobService(connection_string=conn_string)

        yield AzureBlockBlobStore(conn_string=conn_string, container=container,
                                  public=False)
        s.delete_container(container)
Example #12
0
	def test_get_put_blob(self):
		import config as config
		account_name = config.STORAGE_ACCOUNT_NAME
		account_key = config.STORAGE_ACCOUNT_KEY
		block_blob_service = BlockBlobService(account_name, account_key)
		block_blob_service.create_blob_from_path(
			'cont2',
			'sunset.png',
			'sunset.png',)	
		block_blob_service.get_blob_to_path('cont2', 'sunset.png', 'out-sunset.png')
Example #13
0
 def block_blob_service(self):
     from azure.storage.blob import BlockBlobService, PublicAccess
     block_blob_service = BlockBlobService(
         connection_string=self.conn_string)
     if self.create_if_missing:
         block_blob_service.create_container(
             self.container,
             public_access=PublicAccess.Container if self.public else None
         )
     return block_blob_service
Example #14
0
class _BlobStorageFileHandler(object):

    def __init__(self,
                  account_name=None,
                  account_key=None,
                  protocol='https',
                  container='logs',
                  zip_compression=False,
                  max_connections=1,
                  max_retries=5,
                  retry_wait=1.0,
                  is_emulated=False):
        self.service = BlockBlobService(account_name=account_name,
                                        account_key=account_key,
                                        is_emulated=is_emulated,
                                        protocol=protocol)
        self.container_created = False
        hostname = gethostname()
        self.meta = {'hostname': hostname.replace('_', '-'),
                     'process': os.getpid()}
        self.container = (container % self.meta).lower()
        self.meta['hostname'] = hostname
        self.zip_compression = zip_compression
        self.max_connections = max_connections
        self.max_retries = max_retries
        self.retry_wait = retry_wait

    def put_file_into_storage(self, dirName, fileName):
        """
        Ship the outdated log file to the specified blob container.
        """
        if not self.container_created:
            self.service.create_container(self.container)
            self.container_created = True
        fd, tmpfile_path = None, ''
        try:
            file_path = os.path.join(dirName, fileName)
            if self.zip_compression:
                suffix, content_type = '.zip', 'application/zip'
                fd, tmpfile_path = mkstemp(suffix=suffix)
                with os.fdopen(fd, 'wb') as f:
                    with ZipFile(f, 'w', ZIP_DEFLATED) as z:
                        z.write(file_path, arcname=fileName)
                file_path = tmpfile_path
            else:
                suffix, content_type = '', 'text/plain'
            self.service.create_blob_from_path(container_name=self.container,
                                               blob_name=fileName+suffix,
                                               file_path=file_path,
                                               content_settings=ContentSettings(content_type=content_type),
                                               max_connections=self.max_connections
                                               )  # max_retries and retry_wait no longer arguments in azure 0.33
        finally:
            if self.zip_compression and fd:
                os.remove(tmpfile_path)
Example #15
0
def upload_file(STORAGE_NAME, STORAGE_KEY, NEW_CONTAINER_NAME, file, path, extension, content_type):
    """create blob service, and upload files to container"""
    
    blob_service = BlockBlobService(account_name= STORAGE_NAME, account_key=STORAGE_KEY)
    
    try:
        blob_service.create_blob_from_path(NEW_CONTAINER_NAME, file, path, content_settings=ContentSettings(content_type= content_type+extension))    
        print("{} // BLOB upload status: successful".format(file))

    except:
        print("{} // BLOB upload status: failed".format(file))
Example #16
0
def make_public_container(STORAGE_NAME, STORAGE_KEY, NEW_CONTAINER_NAME):
    """"create blob service, blob container and set it to public access. return blob service"""
    
    blob_service = BlockBlobService(account_name= STORAGE_NAME, account_key=STORAGE_KEY)
    new_container_status = blob_service.create_container(NEW_CONTAINER_NAME) 
    blob_service.set_container_acl(NEW_CONTAINER_NAME, public_access=PublicAccess.Container)
    
    if new_container_status == True:
        print('{} BLOB container has been successfully created: {}'.format(NEW_CONTAINER_NAME, new_container_status))
    else:
        print('{} something went wrong: check parameters and subscription'.format(NEW_CONTAINER_NAME))
    def test_create_container_with_public_access_container(self):
        # Arrange
        container_name = self._get_container_reference()

        # Act
        created = self.bs.create_container(container_name, None, 'container')
        anonymous_service = BlockBlobService(self.settings.STORAGE_ACCOUNT_NAME)

        # Assert
        self.assertTrue(created)
        anonymous_service.list_blobs(container_name)
    def test_create_container_with_public_access_blob(self):
        # Arrange
        container_name = self._get_container_reference()

        # Act
        created = self.bs.create_container(container_name, None, 'blob')
        self.bs.create_blob_from_text(container_name, 'blob1', u'xyz')
        anonymous_service = BlockBlobService(self.settings.STORAGE_ACCOUNT_NAME)

        # Assert
        self.assertTrue(created)
        anonymous_service.get_blob_to_text(container_name, 'blob1')
Example #19
0
        def prepare(self, area):
            assert area is not None, 'area is none; should already be validated'

            area_config = config.load_area(area)

            storage_config = config.load_storage(area_config['storage'])

            blob_service = BlockBlobService(account_name=storage_config['name'], account_key=storage_config['key1'])

            blob_service.create_container(area_config['container'])

            blob_service.set_container_acl(area_config['container'], public_access=PublicAccess.Container)
Example #20
0
    def store(self):
        class ExtendedKeysStore(ExtendedKeyspaceMixin, AzureBlockBlobStore):
            pass
        from azure.storage.blob import BlockBlobService

        container = uuid()
        conn_string = create_azure_conn_string(load_azure_credentials())
        s = BlockBlobService(connection_string=conn_string)

        yield ExtendedKeysStore(conn_string=conn_string,
                                container=container, public=False)
        s.delete_container(container)
def get_wav_file(account, item):
    # define blob service

    block_blob_service = BlockBlobService(
        account_name=account,
    )

    # get wav file
        # note: code currently sends 'audio'; should I split, or hardcode?
    blob = block_blob_service.get_blob_to_bytes('audio', item[6:])

    return BytesIO(blob.content)
Example #22
0
def test_azure_setgetstate():
    from azure.storage.blob import BlockBlobService
    container = uuid()
    conn_string = create_azure_conn_string(load_azure_credentials())
    s = BlockBlobService(connection_string=conn_string)
    store = AzureBlockBlobStore(conn_string=conn_string, container=container)
    store.put(u'key1', b'value1')

    buf = pickle.dumps(store, protocol=2)
    store = pickle.loads(buf)

    assert store.get(u'key1') == b'value1'
    s.delete_container(container)
Example #23
0
def initialize_backend():
    global _blob_service
    global _container
    global _timeout

    _blob_service = BlockBlobService(
        account_name=getenv_required(_ENV_ACCOUNT_NAME),
        account_key=getenv_required(_ENV_ACCOUNT_KEY))
    _container = getenv(_ENV_CONTAINER, _DEFAULT_CONTAINER)
    _timeout = getenv_int(_ENV_TIMEOUT, _DEFAULT_TIMEOUT)

    _blob_service.create_container(
        _container, fail_on_exist=False, timeout=_timeout)
Example #24
0
def delete_container(STORAGE_NAME, STORAGE_KEY, CONTAINER_NAME):

    ##############################################################
    #RUN THIS ONLY IF YOU WANT TO DELETE A CONTAINTER            #
    #REMEMBER TO DOWNLOAD YOUR DATA BEFORE DELETING THE CONTAINER#
    #IMPORTANT: YOU WILL LOOSE YOUR BLOB INTO THE CONTAINER      #
    ##############################################################

    blob_service = BlockBlobService(account_name= STORAGE_NAME, account_key=STORAGE_KEY)

    #delete container
    delete_container = blob_service.delete_container(CONTAINER_NAME)
    print("{} delition status success: {}".format(CONTAINER_NAME, delete_container))
Example #25
0
def loaddata():
    ACCOUNT_NAME = "<account name>"
    ACCOUNT_KEY = "<acccount key>"
    CONTAINER_NAME = "<container name>"

    blobService = BlockBlobService(account_name=ACCOUNT_NAME, account_key=ACCOUNT_KEY)
    blobService.get_blob_to_path(CONTAINER_NAME, 'startups.csv', 'startups.csv')

    dataset = pd.read_csv('startups.csv')
    #print ('Startups dataset shape: {}'.format(dataset.shape))

    X = dataset.iloc[:,:-1].values
    y = dataset.iloc[:,4].values
    return X, y
Example #26
0
 def spider_closed(self, spider):
     self.exporter.finish_exporting()
     file = self.files.pop(spider)
     filename = file.name
     newname = filename[:-5]+'-'+datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S")+'.tsv'
     file.close()
     os.rename(filename, newname)
     if UPLOAD_TO_AZURE_STORAGE:
         block_blob_service = BlockBlobService(account_name=AZURE_ACCOUNT_NAME, account_key=AZURE_ACCOUNT_KEY)
         block_blob_service.create_blob_from_path(AZURE_CONTAINER,
                                                 newname,
                                                 newname,
                                                 content_settings=ContentSettings(content_type='text/tab-separated-values')
                                                         )
    def create_blob_sas_defintion(self, storage_account_name, vault_url):
        """
        Creates a service SAS definition with access to a blob container.
        """

        from azure.storage.blob import BlockBlobService, ContainerPermissions
        from azure.keyvault.models import SasTokenType, SasDefinitionAttributes
        from azure.keyvault import SecretId

        # create the blob sas definition template
        # the sas template uri for service sas definitions contains the storage entity url with the template token
        # this sample demonstrates constructing the template uri for a blob container, but a similar approach can
        # be used for all other storage service, i.e. File, Queue, Table

        # create a template sas token for the container
        service = BlockBlobService(account_name=storage_account_name,
                                   # don't sign the template with the storage account key use key 00000000
                                   account_key='00000000')
        permissions = ContainerPermissions(read=True, write=True, delete=True, list=True)
        temp_token = service.generate_container_shared_access_signature(container_name='blobcontainer',
                                                                        permission=permissions,
                                                                        expiry='2020-01-01')

        # use the BlockBlobService to construct the template uri for the container sas definition
        blob_sas_template_uri = service.make_container_url(container_name='blobcontainer',
                                                           protocol='https',
                                                           sas_token=temp_token)
        # create the sas definition in the vault
        attributes = SasDefinitionAttributes(enabled=True)
        blob_sas_def = self.client.set_sas_definition(vault_base_url=vault_url,
                                                               storage_account_name=storage_account_name,
                                                               sas_definition_name='blobcontall',
                                                               template_uri=blob_sas_template_uri,
                                                               sas_type=SasTokenType.service,
                                                               validity_period='PT2H',
                                                               sas_definition_attributes=attributes)

        # use the sas definition to provision a sas token and use it to  create a BlockBlobClient
        # which can interact with blobs in the container

        # get the secret_id of the container sas definition and get the token from the vault as a secret
        sas_secret_id = SecretId(uri=blob_sas_def.secret_id)
        blob_sas_token = self.client.get_secret(vault_base_url=sas_secret_id.vault,
                                                         secret_name=sas_secret_id.name,
                                                         secret_version=sas_secret_id.version).value
        service = BlockBlobService(account_name=storage_account_name,
                                   sas_token=blob_sas_token)
        service.create_blob_from_text(container_name='blobcontainer',
                                      blob_name='blob2',
                                      text=u'test blob2 data')
        blobs = list(service.list_blobs(container_name='blobcontainer'))

        for blob in blobs:
            service.delete_blob(container_name='blobcontainer',
                                blob_name=blob.name)
    def upload_assets(self, blob_client: azureblob.BlockBlobService):
        """
        Uploads a the file specified in the json parameters file into a storage container that will 
        delete it's self after 7 days 

        :param blob_client: A blob service client used for making blob operations.
        :type blob_client: `azure.storage.blob.BlockBlobService`
        """
        input_container_name = "fgrp-" + self.job_id
        output_container_name = "fgrp-" + self.job_id + '-output'

        # Create input container
        blob_client.create_container(input_container_name, fail_on_exist=False)
        logger.info('creating a storage container: {}'.format(input_container_name))

        # Create output container
        blob_client.create_container(output_container_name, fail_on_exist=False)
        logger.info('creating a storage container: {}'.format(output_container_name))

        full_sas_url_input = 'https://{}.blob.core.windows.net/{}?{}'.format(
            blob_client.account_name,
            input_container_name,
            utils.get_container_sas_token(
                blob_client,
                input_container_name,
                ContainerPermissions.READ +
                ContainerPermissions.LIST))
        full_sas_url_output = 'https://{}.blob.core.windows.net/{}?{}'.format(
            blob_client.account_name,
            output_container_name,
            utils.get_container_sas_token(
                blob_client,
                output_container_name,
                ContainerPermissions.READ +
                ContainerPermissions.LIST +
                ContainerPermissions.WRITE))

        # Set the storage info for the container.
        self.storage_info = utils.StorageInfo(
            input_container_name,
            output_container_name,
            full_sas_url_input,
            full_sas_url_output)

        # Upload the asset file that will be rendered and
        scenefile = ctm.get_scene_file(self.parameters_file)
        for file in os.listdir("Assets"):
            if scenefile == file:
                file_path = Path("Assets/" + file)
                utils.upload_file_to_container(blob_client, input_container_name, file_path)
    def test_response_callback(self):
        # Arrange
        service = BlockBlobService(self.account_name, self.account_key)
        name = self.get_resource_name('cont')

        # Act
        def callback(response):
            response.status = 200
            response.headers.clear()

        # Force an exists call to succeed by resetting the status
        service.response_callback = callback

        # Assert
        exists = service.exists(name)
        self.assertTrue(exists)
Example #30
0
 def _client(self):
     if not self.__client:
         self.__client = BlockBlobService(
             connection_string=self._connection_string)
         self.__client.create_container(
             self._container_name, fail_on_exist=False)
     return self.__client
Example #31
0
 def container_client(self):
     return BlockBlobService(self.storage_account, self.storage_key)
from ckanapi.errors import CKANAPIError
from datetime import datetime
from dateutil import parser as dateparser
from tempfile import mkdtemp
# noinspection PyPackageRequirements
from azure.storage.blob.models import ResourceProperties

# Read configuration information and initialize

Config = ConfigParser.ConfigParser()
Config.read('azure.ini')

ckanjson_dir = Config.get('working', 'ckanjson_directory')

block_blob_service = BlockBlobService(
    Config.get('azure-blob-storage', 'account_name'),
    Config.get('azure-blob-storage', 'account_key'))

ckan_container = Config.get('azure-blob-storage', 'account_obd_container')
gcdocs_container = Config.get('azure-blob-storage', 'account_gcdocs_container')
doc_intake_dir = Config.get('working', 'intake_directory')

# Setup logging

logger = logging.getLogger('base')
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
fh = logging.FileHandler(datetime.now().strftime(
    Config.get('working', 'error_logfile')))
ch.setLevel(logging.INFO)
Example #33
0
# How this works:
Example #34
0
sto_accountKey = configData['sto_accountKey']
log_name = configData['logName']
log_level = configData['logLevel']
purge_log = configData['purgeLog']

# Get a fresh API access token...
response = amspy.get_access_token(account_name, account_key)
resjson = response.json()
access_token = resjson["access_token"]

# Get Asset by using the list_media_asset method and the Asset ID
response = amspy.list_media_asset(access_token, OUTPUTASSETID)
if (response.status_code == 200):
    resjson = response.json()
    # Get the container name from the Uri
    outputAssetContainer = resjson['d']['Uri'].split('/')[3]
    print(outputAssetContainer)

### Use the Azure Blob Blob Service library from the Azure Storage SDK.
block_blob_service = BlockBlobService(account_name=sto_account_name,
                                      account_key=sto_accountKey)
generator = block_blob_service.list_blobs(outputAssetContainer)
for blob in generator:
    print(blob.name)
    if (blob.name.endswith(".vtt")):
        blobText = block_blob_service.get_blob_to_text(outputAssetContainer,
                                                       blob.name)
        print("\n\n##### WEB VTT ######")
        print(blobText.content)
        block_blob_service.get_blob_to_path(outputAssetContainer, blob.name,
                                            "output/" + blob.name)
Example #35
0
def upload_recording(filename: str, config):
    upload_light = led.led(16) # GPIO 20 is the Uploading indicator
    upload_light.on()

    try:
        logger.write('Uploading status file...')
        status_file = status.update_status(False, True, False)
        status.upload_status(status_file, False)
        logger.write('Upload complete.')
    except Exception as e:
        logger.write('An error occurred while uploading a status file.')
        logger.write(str(e))

    try:
        start = time.time()
        logger.write('Uploading...')

        credential_path = 'credentials.ini'
        credentials = configparser.ConfigParser()
        credentials.read(credential_path)

        container = config.get('Cloud', 'container')

        username = credentials.get('Azure', 'Username')
        password = credentials.get('Azure', 'Password')

        block_blob_service = BlockBlobService(account_name=username, account_key=password)

        # Force chunked uploading and set upload block sizes to 8KB
        block_blob_service.MAX_SINGLE_PUT_SIZE=16
        block_blob_service.MAX_BLOCK_SIZE=8*1024

        timestamp = os.path.basename(filename).split('.')[0]
        extension = os.path.basename(filename).split('.')[1]

        timestamp_day = timestamp.split('_')[1]
        timestamp_time = timestamp.split('_')[2]

        timestamp_day = timestamp_day.replace('-', '_')
        timestamp_time = timestamp_time.replace('-', '_')

        blob_name = timestamp_day + '/' + timestamp_time + '/recording.' + extension

        block_blob_service.create_blob_from_path(container, blob_name, filename)

        end = time.time()
        elapsed = end - start

        logger.write('Upload Succeeded: ' + blob_name)
        logger.write('Upload took ' + str(elapsed) + ' seconds.\n')

        if REMOVE_RECORDINGS:
            os.remove(filename)

    except Exception as e:
        logger.write('CheckConfig: There was an error uploading to the cloud.')
        logger.write(str(e))
        upload_light.off()
        return

    upload_light.off()
    logger.write('Upload complete')

    try:
        logger.write('Uploading status file...')
        status_file = status.update_status(False, False, False)
        status.upload_status(status_file, False)
        logger.write('Upload complete.')
    except Exception as e:
        logger.write('An error occurred while uploading a status file.')
        logger.write(str(e))
Example #36
0
class AMLMonitor:
    def __init__(self, request_id, list_jobs_submitted, request_name,
                 request_submission_timestamp, model_version):
        self.request_id = request_id
        self.jobs_submitted = list_jobs_submitted
        self.request_name = request_name  # None if not provided by the user
        self.request_submission_timestamp = request_submission_timestamp  # str
        self.model_version = model_version  # str

        storage_account_name = os.getenv('STORAGE_ACCOUNT_NAME')
        storage_account_key = os.getenv('STORAGE_ACCOUNT_KEY')
        self.internal_storage_service = BlockBlobService(
            account_name=storage_account_name, account_key=storage_account_key)
        self.internal_datastore = {
            'account_name': storage_account_name,
            'account_key': storage_account_key,
            'container_name': api_config.INTERNAL_CONTAINER
        }
        self.aml_output_container = api_config.AML_CONTAINER
        self.internal_container = api_config.INTERNAL_CONTAINER

    def get_total_jobs(self):
        return len(self.jobs_submitted)

    def check_job_status(self):
        print('AMLMonitor, check_job_status() called.')
        all_jobs_finished = True
        status_tally = defaultdict(int)

        for job_id, job in self.jobs_submitted.items():
            pipeline_run = job['pipeline_run']
            status = pipeline_run.get_status(
            )  # common values returned include Running, Completed, and Failed - March 19 apparently Finished is the enumeration

            print('request_id {}, job_id {}, status is {}'.format(
                self.request_id, job_id, status))
            status_tally[status] += 1

            if status not in api_config.AML_CONFIG[
                    'completed_status']:  # else all_job_finished will not be flipped
                all_jobs_finished = False

        return all_jobs_finished, status_tally

    def _download_read_json(self, blob_path):
        blob = self.internal_storage_service.get_blob_to_text(
            self.aml_output_container, blob_path)
        stream = io.StringIO(blob.content)
        result = json.load(stream)
        return result

    def _generate_urls_for_outputs(self):
        try:
            request_id = self.request_id
            request_name, request_submission_timestamp = self.request_name, self.request_submission_timestamp

            blob_paths = {
                'detections':
                '{}/{}_detections_{}_{}.json'.format(
                    request_id, request_id, request_name,
                    request_submission_timestamp),
                'failed_images':
                '{}/{}_failed_images_{}_{}.json'.format(
                    request_id, request_id, request_name,
                    request_submission_timestamp),
                # list of images do not have request_name and timestamp in the file name so score.py can locate it easily
                'images':
                '{}/{}_images.json'.format(request_id, request_id)
            }

            expiry = datetime.utcnow() + timedelta(
                days=api_config.EXPIRATION_DAYS)

            urls = {}
            for output, blob_path in blob_paths.items():
                sas = self.internal_storage_service.generate_blob_shared_access_signature(
                    self.internal_container,
                    blob_path,
                    permission=BlobPermissions.READ,
                    expiry=expiry)
                url = self.internal_storage_service.make_blob_url(
                    self.internal_container, blob_path, sas_token=sas)
                urls[output] = url
            return urls
        except Exception as e:
            raise RuntimeError(
                'An error occurred while generating URLs for the output files. '
                + 'Please contact us to retrieve your results. ' +
                'Error: {}'.format(str(e)))

    def aggregate_results(self):
        print('AMLMonitor, aggregate_results() called')

        # The more efficient method is to know the run_id which is the folder name that the result is written to.
        # Since we can't reliably get the run_id after submitting the run, resort to listing all blobs in the output
        # container and match by the request_id

        # listing all (up to a large limit) because don't want to worry about generator next_marker
        datastore_aml_container = copy.deepcopy(self.internal_datastore)
        datastore_aml_container['container_name'] = self.aml_output_container
        list_blobs = SasBlob.list_blobs_in_container(
            api_config.MAX_BLOBS_IN_OUTPUT_CONTAINER,
            datastore=datastore_aml_container,
            blob_suffix='.json')
        all_detections = []
        failures = []
        num_aggregated = 0
        for blob_path in list_blobs:
            if blob_path.endswith('.json'):
                # blob_path is azureml/run_id/output_requestID/out_file_name.json
                out_file_name = blob_path.split('/')[-1]
                # "request" is part of the AML job_id
                if out_file_name.startswith('detections_request{}_'.format(
                        self.request_id)):
                    all_detections.extend(self._download_read_json(blob_path))
                    num_aggregated += 1
                    print('Number of results aggregated: ', num_aggregated)
                elif out_file_name.startswith('failures_request{}_'.format(
                        self.request_id)):
                    failures.extend(self._download_read_json(blob_path))

        print('aggregate_results(), length of all_detections: {}'.format(
            len(all_detections)))

        detection_output_content = {
            'info': {
                'detector': 'megadetector_v{}'.format(self.model_version),
                'detection_completion_time': get_utc_time(),
                'format_version': api_config.OUTPUT_FORMAT_VERSION
            },
            'detection_categories': api_config.DETECTION_CATEGORIES,
            'images': all_detections
        }
        # order the json output keys
        detection_output_content = OrderedDict([
            ('info', detection_output_content['info']),
            ('detection_categories',
             detection_output_content['detection_categories']),
            ('images', detection_output_content['images'])
        ])

        detection_output_str = json.dumps(detection_output_content, indent=1)

        # upload aggregated results to output_store
        self.internal_storage_service.create_blob_from_text(
            self.internal_container,
            '{}/{}_detections_{}_{}.json'.format(
                self.request_id, self.request_id, self.request_name,
                self.request_submission_timestamp),
            detection_output_str,
            max_connections=4)
        print('aggregate_results(), detections uploaded')

        print('aggregate_results(), number of failed images: {}'.format(
            len(failures)))
        failures_str = json.dumps(failures, indent=1)
        self.internal_storage_service.create_blob_from_text(
            self.internal_container, '{}/{}_failed_images_{}_{}.json'.format(
                self.request_id, self.request_id, self.request_name,
                self.request_submission_timestamp), failures_str)
        print('aggregate_results(), failures uploaded')

        output_file_urls = self._generate_urls_for_outputs()
        return output_file_urls
Example #37
0
import string


def find_person_in_string(s):
    if "Nicholas" in s:
        return "Nicholas"
    if "Ben" in s:
        return "Ben"
    if "Rob" in s:
        return "Rob"


# initialise blob service
block_blob_service = BlockBlobService(
    account_name='nikolas',
    account_key=
    'b/qWJCuFxdUD4A9Y6erFvXwqMcUBNJz+MAHHADXWN4v+8JRMxMfIW+nqeGKfUFhP1xcb5GJzA2OSuVEs3rVr0Q=='
)

block_blob_service.create_blob_from_path(
    'addresses',
    'zoom_0.mp4',
    'zoom_0.mp4',
    content_settings=ContentSettings(content_type='video/mp4'))

#get url
# block_blob_service.set_container_acl("addresses",{"AccessPolicy": "abc"})
sas_token = block_blob_service.generate_blob_shared_access_signature(
    "addresses",
    "zoom_0.mp4",
    permission=BlobPermissions().READ,
Example #38
0
class DataSet:
    @classmethod
    def fromstrings(cls, start_date_string, end_date_string):
        start_date = date(int(start_date_string[0:4]),
                          int(start_date_string[4:6]),
                          int(start_date_string[6:8]))
        end_date = date(int(end_date_string[0:4]), int(end_date_string[4:6]),
                        int(end_date_string[6:8]))

        return cls(start_date, end_date)

    def __init__(self, start_date, end_date):
        self.start_date = start_date
        self.end_date = end_date

        self.config = configparser.ConfigParser()
        self.config.read('ds.config')
        self.ds = self.config['DecisionService']
        self.cache_folder = self.ds['CacheFolder']
        self.joined_examples_container = self.ds['JoinedExamplesContainer']
        self.experimental_unit_duration_days = self.ds[
            'ExperimentalUnitDurationDays']

        # https://azure-storage.readthedocs.io/en/latest/_modules/azure/storage/blob/models.html#BlobBlock
        self.block_blob_service = BlockBlobService(
            account_name=self.ds['AzureBlobStorageAccountName'],
            account_key=self.ds['AzureBlobStorageAccountKey'])

        # Lookback 'experimental_unit_duration_days' for events
        self.start_date_withlookback = start_date + timedelta(
            days=-int(self.experimental_unit_duration_days))

        self.ordered_joined_events_filename = os.path.join(
            self.cache_folder,
            'data_{0}-{1}.json'.format(start_date.strftime('%Y%m%d'),
                                       end_date.strftime('%Y%m%d')))

        # create scoring directories for [start_date, end_date] range
        self.scoring_dir = os.path.join(self.cache_folder, 'scoring')
        if not os.path.exists(self.scoring_dir):
            os.makedirs(self.scoring_dir)

    def download_events(self):
        temp = []

        for current_date in dates_in_range(self.start_date_withlookback,
                                           self.end_date):
            blob_prefix = current_date.strftime(
                '%Y/%m/%d/'
            )  #'{0}/{1}/{2}/'.format(current_date.year, current_date.month, current_date.day)
            temp += filter(
                lambda b: b.properties.content_length != 0,
                self.block_blob_service.list_blobs(
                    self.joined_examples_container, prefix=blob_prefix))

        self.joined = list(map(parse_name, temp))

        self.global_idx = {}
        self.global_model_idx = {}
        self.data = []

        def load_data(ts, blob):
            jd = JoinedData(self.block_blob_service, self.cache_folder,
                            self.joined_examples_container, ts, blob)
            jd.index()
            return jd

        print("Downloading & indexing events...")
        with Pool(processes=8) as p:
            self.data = p.map(lambda x: load_data(x[0], x[1]), self.joined)
            for jd in self.data:
                reader = jd.reader()
                for evt in jd.ids:
                    # print("'{0}' <- {1}" .format(evt.evt_id, reader))
                    self.global_idx[evt.evt_id] = reader

    def build_model_history(self):
        print('Found {0} events. Sorting data files by time...'.format(
            len(self.global_idx)))
        self.data.sort(key=lambda jd: jd.ts)

        # reproduce training, by using trackback files
        self.model_history = list(
            get_checkpoint_models(self.block_blob_service,
                                  self.start_date_withlookback, self.end_date))
        with Pool(5) as p:
            self.model_history = p.map(
                lambda x: CheckpointedModel(self.block_blob_service, x[
                    0], self.cache_folder, x[1], x[2]), self.model_history)
            for m in self.model_history:
                if m.model_id is not None:
                    self.global_model_idx[m.model_id] = m

        self.model_history.sort(key=lambda jd: jd.ts)

    def get_online_settings(self):
        online_settings_blob = CachedBlob(self.block_blob_service,
                                          self.cache_folder, 'mwt-settings',
                                          'client')
        return json.load(
            open(online_settings_blob.filename, 'r', encoding='utf8'))

    def create_files(self):
        for local_date in dates_in_range(self.start_date, self.end_date):
            scoring_dir_date = os.path.join(self.scoring_dir,
                                            local_date.strftime('%Y/%m/%d'))
            if os.path.exists(scoring_dir_date):
                rmtree(scoring_dir_date)
            os.makedirs(scoring_dir_date)

        ordered_joined_events = open(self.ordered_joined_events_filename,
                                     'w',
                                     encoding='utf8')
        num_events_counter = 0
        missing_events_counter = 0

        model_history_withindaterange = filter(
            lambda x: x.ts.date() >= self.start_date, self.model_history)
        print('Creating {0} scoring models...'.format(
            len(list(model_history_withindaterange))))

        for m in self.model_history:
            # for scoring and ips calculations, we only consider models within [start_date, end_date]
            if m.ts.date() < self.start_date:
                continue

            print('Creating scoring models {0}...'.format(
                m.ts.strftime('%Y/%m/%d %H:%M:%S')))
            num_valid_events = 0

            if m.model_id is None:
                # no modelid available, skipping scoring event creation
                for event_id in m.trackback_ids:
                    # print("'{0}'" .format(event_id))
                    if event_id in self.global_idx:
                        # print("found '{0}'" .format(event_id))
                        line = self.global_idx[event_id].read(event_id)
                        if line:
                            line = line.strip() + ('\n')
                            _ = ordered_joined_events.write(line)
                            num_events_counter += 1
                            num_valid_events += 1
                    else:
                        missing_events_counter += 1
            else:
                for event_id in m.trackback_ids:
                    if event_id in self.global_idx:
                        line = self.global_idx[event_id].read(event_id)
                        if line:
                            line = line.strip() + ('\n')

                            _ = ordered_joined_events.write(line)
                            num_events_counter += 1
                            num_valid_events += 1

                            scoring_model_id = json.loads(line)['_model_id']
                            if scoring_model_id is None:
                                continue  # this can happen at the very beginning if no model was available

                            if scoring_model_id not in self.global_model_idx:
                                continue  # this can happen if the event was scored using a model that lies outside our model history

                            scoring_model = self.global_model_idx[
                                scoring_model_id]
                            if scoring_model.ts.date() >= self.start_date:
                                #                           the event was scored using a model which was generated prior to start_date
                                #                           so we can exclude it from scoring
                                scoring_filename = os.path.join(
                                    self.scoring_dir,
                                    scoring_model.ts.strftime('%Y'),
                                    scoring_model.ts.strftime('%m'),
                                    scoring_model.ts.strftime('%d'),
                                    scoring_model_id + '.json')

                                # with open(scoring_filename, 'a', encoding='utf8') as scoring_file:
                                #     _ = scoring_file.write(line)

                    else:
                        missing_events_counter += 1

                if num_valid_events > 0:
                    scoring_model_filename = os.path.join(
                        self.scoring_dir, m.ts.strftime('%Y'),
                        m.ts.strftime('%m'), m.ts.strftime('%d'),
                        m.model_id + '.model')

                    _ = ordered_joined_events.write(
                        json.dumps({
                            '_tag':
                            'save_{0}'.format(scoring_model_filename)
                        }) + ('\n'))

        ordered_joined_events.close()

    def train_models(self):
        model_history_prestart = list(
            filter(lambda x: x.ts.date() < self.start_date,
                   self.model_history))
        model_init = max(model_history_prestart, key=lambda x: x.ts)
        model_init_name = model_init.trackback.filename.rsplit(
            '.trackback', 1)[0]

        print("Warm start model: '{0}'".format(model_init_name))

        # Download model_init (and make sure it works on windows)
        model_init_info = re.split('[/\\\\]+', model_init_name)[-4:]
        container = model_init_info[0]
        name = model_init_info[1] + '/' + model_init_info[
            2] + '/' + model_init_info[3]
        CachedBlob(self.block_blob_service, self.cache_folder, container, name)

        online_args = self.get_online_settings()['TrainArguments']

        vw_cmdline = 'vw ' + self.ordered_joined_events_filename + ' --json --save_resume --preserve_performance_counters -i ' + model_init_name + ' ' + online_args
        # vw_cmdline += ' --quiet'
        print(vw_cmdline)

        os.system(vw_cmdline)
Example #39
0
def get_file_list_from_container(container, account_name, account_key):
    block_blob_service = BlockBlobService(account_name=account_name,
                                          account_key=account_key)
    generator = block_blob_service.list_blobs(container)
    for blob in generator:
        print(blob.name, blob.properties.last_modified)
Example #40
0
 def get_conn(self) -> BlockBlobService:
     """Return the BlockBlobService object."""
     conn = self.get_connection(self.conn_id)
     service_options = conn.extra_dejson
     return BlockBlobService(account_name=conn.login, account_key=conn.password, **service_options)
Example #41
0
import os
import boto3
import pymysql
import time
import MySQLdb
from azure.storage.blob import BlockBlobService
from azure.storage.blob import ContentSettings
from flask import Flask, request, send_from_directory, render_template

app = Flask(__name__)

block_blob_service = BlockBlobService(account_name='accoutName',
                                      account_key='accountKey')

blobStore = "blobStoreageURL"
localStore = "localPathToDefaultStorageOfFilesAndImages"

hostname = "azureHostName"
username = "******"
password = "******"
database = "azureDatabaseName"

mySQLCon = MySQLdb.connect(host="mysqlHostName",
                           user="******",
                           passwd="mysqlPassword",
                           db="mysqlDBName")


def doQuery(mySQLCon, cityName, fare1, fare2):
    cur = mySQLCon.cursor()
    cur.execute("Query to fetch description of file/image")
Example #42
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 8 2018

@author: "Anirban Das"
"""

import os
import getpass
import datetime , sys, time, csv
from azure.storage.blob import BlockBlobService, PublicAccess


block_blob_service = BlockBlobService(account_name='<Storage Account Name>', 
				account_key='<Key 1 of the Storage Account>')

container_name ='<Container to Upload Files>'
# Create the BlockBlockService
block_blob_service.create_container(container_name)

STATS_DIRECTORY = "."
# write local stats in a csv file
def write_local_stats(filename, stats_list):
    global STATS_DIRECTORY
    try:
        filepath = STATS_DIRECTORY.rstrip(os.sep) + os.sep + filename
        with open(filepath, 'w') as file:
            writer = csv.writer(file, delimiter=',')
            writer.writerows(stats_list)
    except :
Example #43
0
class AzureBlobWriter(BaseWriter):
    """
    Writes items to azure blob containers.

        - account_name (str)
            Public acces name of the azure account.

        - account_key (str)
            Public acces key to the azure account.

        - container (str)
            Blob container name.
    """
    supported_options = {
        'account_name': {
            'type': six.string_types,
            'env_fallback': 'EXPORTERS_AZUREWRITER_NAME'
        },
        'account_key': {
            'type': six.string_types,
            'env_fallback': 'EXPORTERS_AZUREWRITER_KEY'
        },
        'container': {
            'type': six.string_types
        }
    }
    hash_algorithm = 'md5'
    VALID_CONTAINER_NAME_RE = r'[a-zA-Z0-9-]{3,63}'

    def __init__(self, *args, **kw):
        from azure.storage.blob import BlockBlobService
        super(AzureBlobWriter, self).__init__(*args, **kw)
        account_name = self.read_option('account_name')
        account_key = self.read_option('account_key')

        self.container = self.read_option('container')
        if '--' in self.container or not re.match(self.VALID_CONTAINER_NAME_RE,
                                                  self.container):
            help_url = ('https://azure.microsoft.com/en-us/documentation'
                        '/articles/storage-python-how-to-use-blob-storage/')
            warnings.warn(
                "Container name %s doesn't conform with naming rules (see: %s)"
                % (self.container, help_url))

        self.azure_service = BlockBlobService(account_name, account_key)
        self.azure_service.create_container(self.container)
        self.logger.info('AzureBlobWriter has been initiated.'
                         'Writing to container {}'.format(self.container))
        self.set_metadata('files_counter', 0)
        self.set_metadata('blobs_written', [])

    def write(self, dump_path, group_key=None):
        self.logger.info('Start uploading {} to {}'.format(
            dump_path, self.container))
        self._write_blob(dump_path)
        self.set_metadata('files_counter',
                          self.get_metadata('files_counter') + 1)

    @retry_long
    def _write_blob(self, dump_path):
        blob_name = dump_path.split('/')[-1]
        self.azure_service.create_blob_from_path(
            self.read_option('container'),
            blob_name,
            dump_path,
            max_connections=5,
        )
        self.logger.info('Saved {}'.format(blob_name))
        self._update_metadata(dump_path, blob_name)

    def _update_metadata(self, dump_path, blob_name):
        buffer_info = self.write_buffer.metadata[dump_path]
        file_info = {
            'blob_name': blob_name,
            'size': buffer_info['size'],
            'hash': b64encode(unhexlify(buffer_info['file_hash'])),
            'number_of_records': buffer_info['number_of_records']
        }
        self.get_metadata('blobs_written').append(file_info)

    def _check_write_consistency(self):
        from azure.common import AzureMissingResourceHttpError
        for blob_info in self.get_metadata('blobs_written'):
            try:
                blob = self.azure_service.get_blob_properties(
                    self.read_option('container'), blob_info['blob_name'])
                blob_size = blob.properties.content_length
                blob_md5 = blob.properties.content_settings.content_md5
                if str(blob_size) != str(blob_info['size']):
                    raise InconsistentWriteState(
                        'File {} has unexpected size. (expected {} - got {})'.
                        format(blob_info['blob_name'], blob_info['size'],
                               blob_size))
                if str(blob_md5) != str(blob_info['hash']):
                    raise InconsistentWriteState(
                        'File {} has unexpected hash. (expected {} - got {})'.
                        format(blob_info['blob_name'], blob_info['hash'],
                               blob_md5))

            except AzureMissingResourceHttpError:
                raise InconsistentWriteState('Missing blob {}'.format(
                    blob_info['blob_name']))
        self.logger.info('Consistency check passed')
Example #44
0
# from predict import initialize, predict_image
from azure.storage.queue import QueueService, QueueMessageFormat
import base64
# from video2image import sampling
from concurrent import futures
from concurrent.futures import ThreadPoolExecutor


print("Azure Blob storage v12 - Python quickstart sample")
API_ENDPOINT = "http://127.0.0.1:5000/image"

account_name = "cowimagestorage"
account_key = ""

block_blob_service = BlockBlobService(
    account_name=account_name,
    account_key=account_key
)

<<<<<<< HEAD

queue = QueueService(connection_string="")
queue.encode_function = QueueMessageFormat.text_base64encode

def process_single_file(filename, blob, dirname=""):

    # image = {'imageData': open('../test_image/{filename}'.format(filename=filename), 'rb')}
    print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
    # with open('{dirname}/{filename}'.format(filename=filename, dirname=dirname), 'rb') as image:
=======
def process_single_file(filename):
    
Example #45
0
from datetime import datetime
from FlaskWebProject import app, db, login
from werkzeug.security import generate_password_hash, check_password_hash
from flask_login import UserMixin
from azure.storage.blob import BlockBlobService
import string
import random
from werkzeug import secure_filename
from flask import flash

blob_container = app.config['BLOB_CONTAINER']
blob_service = BlockBlobService(account_name=app.config['BLOB_ACCOUNT'],
                                account_key=app.config['BLOB_STORAGE_KEY'])


def id_generator(size=32, chars=string.ascii_uppercase + string.digits):
    return ''.join(random.choice(chars) for _ in range(size))


class User(UserMixin, db.Model):
    __tablename__ = 'users'
    id = db.Column(db.Integer, primary_key=True)
    username = db.Column(db.String(64), index=True, unique=True)
    password_hash = db.Column(db.String(128))

    def __repr__(self):
        return '<User {}>'.format(self.username)

    def set_password(self, password):
        self.password_hash = generate_password_hash(password)
Example #46
0
import os, uuid, sys, configparser
from azure.storage.blob import BlockBlobService, PublicAccess

config = configparser.ConfigParser()
config.read('config.ini')

# Create the BlockBlockService that is used to call the Blob service for the storage account
block_blob_service = BlockBlobService(
    account_name=config['DEFAULT']['storage_account_name'],
    account_key=config['DEFAULT']['storage_account_key'],
    endpoint_suffix="core.usgovcloudapi.net")

# Create a container called 'texts'.
container_name = 'texts'
block_blob_service.create_container(container_name)
print('created container')

# Set the permission so the blobs are public.
block_blob_service.set_container_acl(container_name,
                                     public_access=PublicAccess.Container)
print('permissions set')

path = '/Users/ephraimsalhanick/Desktop/AzureMLSI2018/Random_Speeches/txt'

for filename in os.listdir(path):

    # Upload the file to storage
    block_blob_service.create_blob_from_path(container_name, filename,
                                             path + '/' + filename)
    print('uploaded: ' + filename)
Example #47
0
def run_sample():
    try:
        # Create the BlockBlockService that is used to call the Blob service for the storage account
        block_blob_service = BlockBlobService(account_name='hackgt19', account_key='24wGa1RHd0BnemSDBbqRzvvTAB7Qy4IAN28E9de6OLR98wxnFljJXnKaBtzqJd2F53SmtNZP2NnZCPZkeL6wlQ==')

        # Create a container called 'quickstartblobs'.
        container_name ='quickstartblobs'
        block_blob_service.create_container(container_name)

        # Set the permission so the blobs are public.
        block_blob_service.set_container_acl(container_name, public_access=PublicAccess.Container)

        # Create a file in Documents to test the upload and download.
        local_path=os.path.expanduser("~/Documents")
        local_file_name ="QuickStart_" + str(uuid.uuid4()) + ".txt"
        full_path_to_file =os.path.join(local_path, local_file_name)

        # Write text to the file.
        file = open(full_path_to_file,  'w')
        file.write("Hello, World!")
        file.close()

        print("Temp file = " + full_path_to_file)
        print("\nUploading to Blob storage as blob" + local_file_name)

        # Upload the created file, use local_file_name for the blob name
        block_blob_service.create_blob_from_path(container_name, local_file_name, full_path_to_file)
        # List the blobs in the container
        print("\nList blobs in the container")
        generator = block_blob_service.list_blobs(container_name)
        for blob in generator:
            print("\t Blob name: " + blob.name)

        # Download the blob(s).
        # Add '_DOWNLOADED' as prefix to '.txt' so you can see both files in Documents.
        full_path_to_file2 = os.path.join(local_path, str.replace(local_file_name ,'.txt', '_DOWNLOADED.txt'))
        print("\nDownloading blob to " + full_path_to_file2)
        block_blob_service.get_blob_to_path(container_name, local_file_name, full_path_to_file2)

        sys.stdout.write("Sample finished running. When you hit <any key>, the sample will be deleted and the sample "
                         "application will exit.")
        sys.stdout.flush()
        input()

        # Clean up resources. This includes the container and the temp files
        block_blob_service.delete_container(container_name)
        os.remove(full_path_to_file)
        os.remove(full_path_to_file2)
    except Exception as e:
        print(e)
class AzureClient(BaseClient):
    def __init__(self, operation_name, configuration, directory_persistent,
                 directory_work_list, poll_delay_time, poll_maximum_time):
        super(AzureClient,
              self).__init__(operation_name, configuration,
                             directory_persistent, directory_work_list,
                             poll_delay_time, poll_maximum_time)
        if configuration['credhub_url'] is None:
            self.__setCredentials(configuration['client_id'],
                                  configuration['client_secret'],
                                  configuration['tenant_id'])
            self.resource_group = configuration['resource_group']
            self.storage_account_name = configuration['storageAccount']
            self.storage_account_key = configuration['storageAccessKey']
            self.subscription_id = configuration['subscription_id']
        else:
            self.logger.info('fetching creds from credhub')
            azure_config = self._get_credentials_from_credhub(configuration)
            self.__setCredentials(azure_config['client_id'],
                                  azure_config['client_secret'],
                                  azure_config['tenant_id'])
            self.resource_group = azure_config['resource_group']
            self.storage_account_name = azure_config['storageAccount']
            self.storage_account_key = azure_config['storageAccessKey']
            self.subscription_id = azure_config['subscription_id']

        self.block_blob_service = BlockBlobService(
            account_name=self.storage_account_name,
            account_key=self.storage_account_key)
        self.compute_client = ComputeManagementClient(self.__azureCredentials,
                                                      self.subscription_id)

        # +-> Check whether the given container exists and accessible
        if (not self.get_container()) or (not self.access_container()):
            msg = 'Could not find or access the given container.'
            self.last_operation(msg, 'failed')
            raise Exception(msg)

        # scsi_host_number would be used to determine lun to device mapping
        # scsi_host_number would be same for all data volumes/disks
        self.scsi_host_number = self.get_host_number_of_data_volumes()
        if not self.scsi_host_number:
            msg = 'Could not determine SCSI host number for data volume'
            self.last_operation(msg, 'failed')
            raise Exception(msg)
        self.instance_location = self.get_instance_location(
            configuration['instance_id'])
        if not self.instance_location:
            msg = 'Could not retrieve the location of the instance.'
            self.last_operation(msg, 'failed')
            raise Exception(msg)

        self.max_block_size = 100 * 1024 * 1024
        #list of regions where ZRS is supported
        self.zrs_supported_regions = [
            'westeurope', 'centralus', 'southeastasia', 'eastus2',
            'northeurope', 'francecentral'
        ]

        self.availability_zones = self._get_availability_zone_of_server(
            configuration['instance_id'])

    def __setCredentials(self, client_id, client_secret, tenant_id):
        self.__azureCredentials = ServicePrincipalCredentials(
            client_id=client_id, secret=client_secret, tenant=tenant_id)

    def get_container(self):
        try:
            container_props = self.block_blob_service.get_container_properties(
                self.CONTAINER)

            return container_props
        except Exception as error:
            self.logger.error(
                '[Azure] [STORAGE] ERROR: Unable to find container {}.\n{}'.
                format(self.CONTAINER, error))
            return None

    def access_container(self):
        # Test if the container is accessible
        try:
            key = '{}/{}'.format(self.GUID,
                                 'AccessTestByServiceFabrikPythonLibrary')
            self.block_blob_service.create_blob_from_text(
                self.CONTAINER, key, 'This is a sample text')
            self.block_blob_service.delete_blob(self.CONTAINER, key)
            return True
        except Exception as error:
            self.logger.error(
                '[Azure] [STORAGE] ERROR: Unable to access container {}.\n{}'.
                format(self.CONTAINER, error))
            return False

    def _get_availability_zone_of_server(self, instance_id):
        try:
            instance = self.compute_client.virtual_machines.get(
                self.resource_group, instance_id)
            return instance.zones
        except Exception as error:
            self.logger.error(
                '[Azure] ERROR: Unable to find or access attached volume for instance_id {}.{}'
                .format(instance_id, error))
            return None

    def get_snapshot(self, snapshot_name):
        try:
            snapshot = self.compute_client.snapshots.get(
                self.resource_group, snapshot_name)
            return Snapshot(snapshot.name, snapshot.disk_size_gb,
                            snapshot.provisioning_state)
        except Exception as error:
            self.logger.error(
                '[Azure] ERROR: Unable to find or access snapshot {}.\n{}'.
                format(snapshot_name, error))
            return None

    def get_volume(self, volume_name):
        try:
            volume = self.compute_client.disks.get(self.resource_group,
                                                   volume_name)
            return Volume(volume.name, volume.provisioning_state,
                          volume.disk_size_gb)
        except Exception as error:
            self.logger.error(
                '[Azure] ERROR: Unable to find or access volume/disk {}.\n{}'.
                format(volume_name, error))
            return None

    def get_host_number_of_data_volumes(self):
        '''
        This particual funtion is specific for Azure.
        This determines the scsi host number for the persistent disk.
        The host number along with lun would be required to find out device deterministic way.
        '''
        host_number = None
        try:
            device_persistent_volume = self.shell('cat {} | grep {}'.format(
                self.FILE_MOUNTS,
                self.DIRECTORY_PERSISTENT)).split(' ')[0][5:-1]
            device_paths = glob.glob(
                '/sys/bus/scsi/devices/*:*:*:*/block/{}'.format(
                    device_persistent_volume))
            if len(device_paths) > 1:
                raise Exception('Invalid device paths for device {}'.format(
                    device_persistent_volume))
            # Success: Go only one device path
            host_number = device_paths[0][22:-len('/block/{}'.format(
                device_persistent_volume))].split(':')[0]
        except Exception as error:
            self.logger.error(
                '[ERROR] [SCSI HOST NUMBER] [DATA VOLUME] Error while determining SCSI host number'
                'of persistent volume directory {}.{}'.format(
                    self.DIRECTORY_PERSISTENT, error))
        return host_number

    def get_instance_location(self, instance_id):
        try:
            instance = self.compute_client.virtual_machines.get(
                self.resource_group, instance_id)
            return instance.location
        except Exception as error:
            self.logger.error(
                '[Azure] ERROR: Unable to get location for instance_id {}.{}'.
                format(instance_id, error))
            return None

    def get_attached_volumes_for_instance(self, instance_id):
        try:
            instance = self.compute_client.virtual_machines.get(
                self.resource_group, instance_id)
            self.availability_zones = instance.zones
            volume_list = []
            for disk in instance.storage_profile.data_disks:
                device = None
                device_path = glob.glob(
                    self.DEVICE_PATH_TEMPLATE.format(self.scsi_host_number,
                                                     disk.lun))
                if len(device_path) != 1:
                    raise Exception(
                        'Expected number of device path not matching 1 != {} fo lun {}'
                        .format(len(device_path), disk.lun))
                device = '/dev/{}'.format(
                    self.shell('ls {}'.format(device_path[0])).rstrip())

                volume_list.append(
                    Volume(disk.name, 'none', disk.disk_size_gb, device))
            return volume_list
        except Exception as error:
            self.logger.error(
                '[Azure] ERROR: Unable to find or access attached volume for instance_id {}.{}'
                .format(instance_id, error))
            return []

    def get_persistent_volume_for_instance(self, instance_id):
        try:
            device = self.shell('cat {} | grep {}'.format(
                self.FILE_MOUNTS, self.DIRECTORY_PERSISTENT)).split(' ')[0][:8]
            for volume in self.get_attached_volumes_for_instance(instance_id):
                if volume.device == device:
                    self._add_volume_device(volume.id, device)
                    return volume
            return None
        except Exception as error:
            self.logger.error(
                '[ERROR] [GET PRESISTENT VOLUME] Unable to find persistent volume for instance {}.{}'
                .format(instance_id, error))
            return None

    def location_supports_zrs(self, location):
        return location in self.zrs_supported_regions

    def _create_snapshot(self, volume_id):
        log_prefix = '[SNAPSHOT] [CREATE]'
        snapshot = None
        self.logger.info('{} START for volume id {} with tags {}'.format(
            log_prefix, volume_id, self.tags))
        try:
            disk_info = self.compute_client.disks.get(self.resource_group,
                                                      volume_id)
            snapshot_name = self.generate_name_by_prefix(self.SNAPSHOT_PREFIX)
            if self.location_supports_zrs(disk_info.location):
                snapshot_creation_operation = self.compute_client.snapshots.create_or_update(
                    self.resource_group, snapshot_name, {
                        'location': disk_info.location,
                        'tags': self.tags,
                        'creation_data': {
                            'create_option': DiskCreateOption.copy,
                            'source_uri': disk_info.id
                        },
                        'sku': {
                            'name': 'Standard_ZRS'
                        }
                    })
            else:
                snapshot_creation_operation = self.compute_client.snapshots.create_or_update(
                    self.resource_group, snapshot_name, {
                        'location': disk_info.location,
                        'tags': self.tags,
                        'creation_data': {
                            'create_option': DiskCreateOption.copy,
                            'source_uri': disk_info.id
                        },
                        'sku': {
                            'name': 'Standard_LRS'
                        }
                    })

            self._wait(
                'Waiting for snapshot {} to get ready...'.format(
                    snapshot_name), lambda operation: operation.done() is True,
                None, snapshot_creation_operation)

            snapshot_info = snapshot_creation_operation.result()
            self.logger.info(
                'Snapshot creation response: {}'.format(snapshot_info))
            snapshot = Snapshot(snapshot_info.name, snapshot_info.disk_size_gb,
                                snapshot_info.provisioning_state)
            self._add_snapshot(snapshot.id)
            self.logger.info(
                '{} SUCCESS: snapshot-id={}, volume-id={} , tags={} '.format(
                    log_prefix, snapshot.id, volume_id, self.tags))
            self.output_json['snapshotId'] = snapshot.id
        except Exception as error:
            message = '{} ERROR: volume-id={}\n{}'.format(
                log_prefix, volume_id, error)
            self.logger.error(message)
            if snapshot:
                self.delete_snapshot(snapshot.id)
                snapshot = None
            raise Exception(message)

        return snapshot

    def _copy_snapshot(self, snapshot_id):
        return self.get_snapshot(snapshot_id)

    def _delete_snapshot(self, snapshot_id):
        log_prefix = '[SNAPSHOT] [DELETE]'

        try:
            snapshot_deletion_operation = self.compute_client.snapshots.delete(
                self.resource_group, snapshot_id)
            # TODO: can be implemented the following wait as 'operation.done() is True'
            self._wait(
                'Waiting for snapshot {} to be deleted...'.format(snapshot_id),
                lambda id: not self.get_snapshot(id), None, snapshot_id)
            snapshot_delete_response = snapshot_deletion_operation.result()
            self._remove_snapshot(snapshot_id)
            self.logger.info('{} SUCCESS: snapshot-id={}\n{}'.format(
                log_prefix, snapshot_id, snapshot_delete_response))
            return True
        except Exception as error:
            message = '{} ERROR: snapshot-id={}\n{}'.format(
                log_prefix, snapshot_id, error)
            self.logger.error(message)
            raise Exception(message)

    def _create_volume(self, size, snapshot_id=None):
        log_prefix = '[VOLUME] [CREATE]'
        volume = None

        try:
            disk_creation_operation = None
            disk_name = None
            if snapshot_id is not None:
                snapshot = self.compute_client.snapshots.get(
                    self.resource_group, snapshot_id)
                disk_name = self.generate_name_by_prefix(self.DISK_PREFIX)
                disk_creation_operation = self.compute_client.disks.create_or_update(
                    self.resource_group, disk_name, {
                        'location': self.instance_location,
                        'tags': self.tags,
                        'creation_data': {
                            'create_option': DiskCreateOption.copy,
                            'source_uri': snapshot.id
                        },
                        'zones': self.availability_zones
                    })
            else:
                disk_name = self.generate_name_by_prefix(self.DISK_PREFIX)
                disk_creation_operation = self.compute_client.disks.create_or_update(
                    self.resource_group, disk_name, {
                        'location': self.instance_location,
                        'tags': self.tags,
                        'disk_size_gb': size,
                        'creation_data': {
                            'create_option': DiskCreateOption.empty
                        },
                        'account_type': StorageAccountTypes.standard_lrs,
                        'zones': self.availability_zones
                    })

            self._wait(
                'Waiting for volume {} to get ready...'.format(disk_name),
                lambda operation: operation.done() is True, None,
                disk_creation_operation)

            disk = disk_creation_operation.result()
            volume = Volume(disk.name, 'none', disk.disk_size_gb)
            self._add_volume(volume.id)
            self.logger.info('{} SUCCESS: volume-id={} with tags={} '.format(
                log_prefix, volume.id, self.tags))
        except Exception as error:
            message = '{} ERROR: size={}\n{}'.format(log_prefix, size, error)
            self.logger.error(message)
            if volume:
                self.delete_volume(volume.id)
                volume = None
            raise Exception(message)

        return volume

    def _delete_volume(self, volume_id):
        log_prefix = '[VOLUME] [DELETE]'

        try:
            disk_deletion_operation = self.compute_client.disks.delete(
                self.resource_group, volume_id)

            self._wait(
                'Waiting for volume {} to be deleted...'.format(volume_id),
                lambda operation: operation.done() is True, None,
                disk_deletion_operation)
            delete_response = disk_deletion_operation.result()
            self._remove_volume(volume_id)
            self.logger.info(
                '{} SUCCESS: volume-id={} with tags={}\n{}'.format(
                    log_prefix, volume_id, self.tags, delete_response))
            return True
        except Exception as error:
            message = '{} ERROR: volume-id={}\n{}'.format(
                log_prefix, volume_id, error)
            self.logger.error(message)
            raise Exception(message)

    def _create_attachment(self, volume_id, instance_id):
        log_prefix = '[ATTACHMENT] [CREATE]'
        attachment = None

        try:
            virtual_machine = self.compute_client.virtual_machines.get(
                self.resource_group, instance_id)
            volume = self.compute_client.disks.get(self.resource_group,
                                                   volume_id)
            all_data_disks = virtual_machine.storage_profile.data_disks
            # traversing  through all disks and finding next balnk lun
            next_lun = 0
            for disk in all_data_disks:
                if disk.lun == next_lun:
                    next_lun += 1

            existing_devices_path = glob.glob(
                self.DEVICE_PATH_TEMPLATE.format(self.scsi_host_number,
                                                 next_lun))
            virtual_machine.storage_profile.data_disks.append({
                'lun':
                next_lun,
                'name':
                volume.name,
                'create_option':
                DiskCreateOptionTypes.attach,
                'managed_disk': {
                    'id': volume.id
                }
            })

            disk_attach_operation = self.compute_client.virtual_machines.create_or_update(
                self.resource_group, virtual_machine.name, virtual_machine)

            self._wait(
                'Waiting for attachment of volume {} to get ready...'.format(
                    volume_id), lambda operation: operation.done() is True,
                None, disk_attach_operation)

            updated_vm = disk_attach_operation.result()
            all_devices_path = glob.glob(
                self.DEVICE_PATH_TEMPLATE.format(self.scsi_host_number,
                                                 next_lun))
            new_devices_path = list(
                set(all_devices_path) - set(existing_devices_path))
            if len(new_devices_path) > 1:
                raise Exception(
                    'Found more than one new devices while attaching volume!')
            device = '/dev/{}'.format(
                self.shell('ls {}'.format(new_devices_path[0])).rstrip())
            self._add_volume_device(volume_id, device)
            attachment = Attachment(0, volume_id, instance_id)
            self._add_attachment(volume_id, instance_id)
            self.logger.info(
                '{} SUCCESS: volume-id={}, instance-id={}\n Updated vm:{}'.
                format(log_prefix, volume_id, instance_id, updated_vm))
        except Exception as error:
            message = '{} ERROR: volume-id={}, instance-id={}\n{}'.format(
                log_prefix, volume_id, instance_id, error)
            self.logger.error(message)

            # The following lines are a workaround in case of inconsistency:
            # The attachment process may end with throwing an Exception, e.g.
            # 'list index out of range', but the attachment has been successful. Therefore, we must
            # check whether the volume is attached and if yes, trigger the detachment
            # TODO : Following need to take care: volume.status is not in-use in case of Azure
            volume = self.compute_client.disks.get(self.resource_group,
                                                   volume_id)
            if volume.managed_by is not None:
                self.logger.warning(
                    '[VOLUME] [DELETE] Volume is attached although the attaching process failed, '
                    'triggering detachment')
                attachment = True

            if attachment:
                self.delete_attachment(volume_id, instance_id)
                attachment = None
            raise Exception(message)

        return attachment

    def _delete_attachment(self, volume_id, instance_id):
        log_prefix = '[ATTACHMENT] [DELETE]'

        try:
            virtual_machine = self.compute_client.virtual_machines.get(
                self.resource_group, instance_id)

            data_disks = virtual_machine.storage_profile.data_disks
            data_disks[:] = [
                disk for disk in data_disks if disk.name != volume_id
            ]
            disk_detach_operation = self.compute_client.virtual_machines.create_or_update(
                self.resource_group, virtual_machine.name, virtual_machine)

            self._wait(
                'Waiting for attachment of volume {} to be removed...'.format(
                    volume_id), lambda operation: operation.done() is True,
                None, disk_detach_operation)

            updated_vm = disk_detach_operation.result()
            self._remove_volume_device(volume_id)
            self._remove_attachment(volume_id, instance_id)
            self.logger.info(
                '{} SUCCESS: volume-id={}, instance-id={}\n updated vm: {}'.
                format(log_prefix, volume_id, instance_id, updated_vm))
            return True
        except Exception as error:
            message = '{} ERROR: volume-id={}, instance-id={}\n{}'.format(
                log_prefix, volume_id, instance_id, error)
            self.logger.error(message)
            raise Exception(message)

    def _find_volume_device(self, volume_id):
        # Nothing to do for Azure as the device name is specified manually while attaching a volume and therefore known
        pass

    def get_mountpoint(self, volume_id, partition=None):
        device = self._get_device_of_volume(volume_id)
        if not device:
            return None
        if partition:
            device += partition
        return device

    def _upload_to_blobstore(self,
                             blob_to_upload_path,
                             blob_target_name,
                             max_connections=2):
        log_prefix = '[AZURE STORAGE CONTAINER] [UPLOAD]'
        self.logger.info(
            '{} Started to upload the tarball to the object storage.'.format(
                log_prefix))
        try:
            self.block_blob_service.MAX_BLOCK_SIZE = self.max_block_size
            self.block_blob_service.create_blob_from_path(
                self.CONTAINER,
                blob_target_name,
                blob_to_upload_path,
                max_connections=max_connections)
            # TODO: need to check above 'blob_target_name'
            self.logger.info(
                '{} SUCCESS: blob_to_upload={}, blob_target_name={}, container={}'
                .format(log_prefix, blob_to_upload_path, blob_target_name,
                        self.CONTAINER))
            return True
        except Exception as error:
            message = '{} ERROR: blob_to_upload={}, blob_target_name={}, container={}\n{}'.format(
                log_prefix, blob_to_upload_path, blob_target_name,
                self.CONTAINER, error)
            self.logger.error(message)
            raise Exception(message)

    def _download_from_blobstore(self,
                                 blob_to_download_name,
                                 blob_download_target_path,
                                 max_connections=2):
        log_prefix = '[AZURE STORAGE CONTAINER] [DOWNLOAD]'
        self.logger.info(
            '{} Started to download the tarball to target {}.'.format(
                log_prefix, blob_download_target_path))
        try:
            self.block_blob_service.MAX_BLOCK_SIZE = self.max_block_size
            self.block_blob_service.get_blob_to_path(
                self.CONTAINER,
                blob_to_download_name,
                blob_download_target_path,
                max_connections=max_connections)
            self.logger.info(
                '{} SUCCESS: blob_to_download={}, blob_target_name={}, container={}'
                .format(log_prefix, blob_to_download_name, self.CONTAINER,
                        blob_download_target_path))
            return True
        except Exception as error:
            message = '{} ERROR: blob_to_download={}, blob_target_name={}, container={}\n{}'.format(
                log_prefix, blob_to_download_name, blob_download_target_path,
                self.CONTAINER, error)
            self.logger.error(message)
            raise Exception(message)

    def _download_from_blobstore_and_pipe_to_process(self, process,
                                                     blob_to_download_name,
                                                     segment_size):
        self.block_blob_service.get_blob_to_stream(self.CONTAINER,
                                                   blob_to_download_name,
                                                   process.stdin,
                                                   snapshot=None,
                                                   start_range=0,
                                                   end_range=segment_size - 1)
        return True
Example #49
0
import os
import time
import src.camera_helper as ch

from dotenv import load_dotenv
load_dotenv()

from flask import Flask
from flask import request
from flask import jsonify
app = Flask(__name__)

from azure.storage.blob import BlockBlobService
block_blob_service = BlockBlobService(os.getenv("AZURE_BLOB_ACCOUNT_NAME"),
                                      os.getenv("AZURE_BLOB_ACCOUNT_KEY"))
container_name = os.getenv("AZURE_BLOB_CONTAINER_NAME")

have_camera = False
camera = 0
try:
    from picamera import PiCamera
    print("imported")
    camera = PiCamera(resolution=(3280, 2464))
    print("set camera - taking warmup")
    full_path = './pictures/startup.png'
    print("Taking the picture")
    camera.capture(full_path)
    have_camera = True
except:
    print("No camera module")
Example #50
0

if __name__ == '__main__':
    storage_account = sys.argv[1]
    storage_key = sys.argv[2]
    batch_account = sys.argv[3]
    batch_key = sys.argv[4]
    batch_url = sys.argv[5]
    table_name = sys.argv[6]
    job_id = sys.argv[7]
    entity_pk = sys.argv[8]
    entity_rk = sys.argv[9]

    table_service = TableService(account_name=storage_account,
                                 account_key=storage_key)
    blob_service = BlockBlobService(account_name=storage_account,
                                    account_key=storage_key)
    credentials = batchauth.SharedKeyCredentials(batch_account, batch_key)
    batch_client = batch.BatchServiceClient(credentials, base_url=batch_url)
    entity = table_service.get_entity(table_name, entity_pk, entity_rk)

    wait_for_tasks_to_complete(table_service, batch_client, table_name, entity,
                               job_id)

    if table_name == 'DatabaseEntity':
        container_name = sys.argv[10]
        files = 0
        total_size = 0
        db_type = 'Nucleotide'
        generator = blob_service.list_blobs(container_name,
                                            prefix=entity_rk + '.')
        for blob in generator:
Example #51
0
File: azure.py Project: vyloy/dvc
class RemoteAzure(RemoteBase):
    scheme = "azure"
    REGEX = (
        r"azure://((?P<path>[^=;]*)?|("
        # backward compatibility
        r"(ContainerName=(?P<container_name>[^;]+);?)?"
        r"(?P<connection_string>.+)?)?)$"
    )
    REQUIRES = {"azure-storage-blob": BlockBlobService}
    PARAM_CHECKSUM = "etag"
    COPY_POLL_SECONDS = 5

    def __init__(self, repo, config):
        super(RemoteAzure, self).__init__(repo, config)

        self.url = config.get(Config.SECTION_REMOTE_URL, "azure://")
        match = re.match(self.REGEX, self.url)  # backward compatibility

        path = match.group("path")
        self.bucket = (
            urlparse(self.url if path else "").netloc
            or match.group("container_name")  # backward compatibility
            or os.getenv("AZURE_STORAGE_CONTAINER_NAME")
        )

        self.prefix = urlparse(self.url).path.lstrip("/") if path else ""

        self.connection_string = (
            config.get(Config.SECTION_AZURE_CONNECTION_STRING)
            or match.group("connection_string")  # backward compatibility
            or os.getenv("AZURE_STORAGE_CONNECTION_STRING")
        )

        if not self.bucket:
            raise ValueError("azure storage container name missing")

        if not self.connection_string:
            raise ValueError("azure storage connection string missing")

        self.__blob_service = None

        self.path_info = {"scheme": self.scheme, "bucket": self.bucket}

    @property
    def blob_service(self):
        if self.__blob_service is None:
            logger.debug("URL {}".format(self.url))
            logger.debug("Connection string {}".format(self.connection_string))
            self.__blob_service = BlockBlobService(
                connection_string=self.connection_string
            )
            logger.debug("Container name {}".format(self.bucket))
            try:  # verify that container exists
                self.__blob_service.list_blobs(
                    self.bucket, delimiter="/", num_results=1
                )
            except AzureMissingResourceHttpError:
                self.__blob_service.create_container(self.bucket)
        return self.__blob_service

    def remove(self, path_info):
        if path_info["scheme"] != self.scheme:
            raise NotImplementedError

        logger.debug(
            "Removing azure://{}/{}".format(
                path_info["bucket"], path_info["path"]
            )
        )

        self.blob_service.delete_blob(path_info["bucket"], path_info["path"])

    def _list_paths(self, bucket, prefix):
        blob_service = self.blob_service
        next_marker = None
        while True:
            blobs = blob_service.list_blobs(
                bucket, prefix=prefix, marker=next_marker
            )

            for blob in blobs:
                yield blob.name

            if not blobs.next_marker:
                break

            next_marker = blobs.next_marker

    def list_cache_paths(self):
        return self._list_paths(self.bucket, self.prefix)

    def upload(self, from_infos, to_infos, names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info["scheme"] != self.scheme:
                raise NotImplementedError

            if from_info["scheme"] != "local":
                raise NotImplementedError

            bucket = to_info["bucket"]
            path = to_info["path"]

            logger.debug(
                "Uploading '{}' to '{}/{}'".format(
                    from_info["path"], bucket, path
                )
            )

            if not name:
                name = os.path.basename(from_info["path"])

            cb = Callback(name)

            try:
                self.blob_service.create_blob_from_path(
                    bucket, path, from_info["path"], progress_callback=cb
                )
            except Exception:
                msg = "failed to upload '{}'".format(from_info["path"])
                logger.warning(msg)
            else:
                progress.finish_target(name)

    def download(
        self,
        from_infos,
        to_infos,
        no_progress_bar=False,
        names=None,
        resume=False,
    ):
        names = self._verify_path_args(from_infos, to_infos, names)

        for to_info, from_info, name in zip(to_infos, from_infos, names):
            if from_info["scheme"] != self.scheme:
                raise NotImplementedError

            if to_info["scheme"] != "local":
                raise NotImplementedError

            bucket = from_info["bucket"]
            path = from_info["path"]

            logger.debug(
                "Downloading '{}/{}' to '{}'".format(
                    bucket, path, to_info["path"]
                )
            )

            tmp_file = tmp_fname(to_info["path"])
            if not name:
                name = os.path.basename(to_info["path"])

            cb = None if no_progress_bar else Callback(name)

            makedirs(os.path.dirname(to_info["path"]), exist_ok=True)

            try:
                self.blob_service.get_blob_to_path(
                    bucket, path, tmp_file, progress_callback=cb
                )
            except Exception:
                msg = "failed to download '{}/{}'".format(bucket, path)
                logger.warning(msg)
            else:
                move(tmp_file, to_info["path"])

                if not no_progress_bar:
                    progress.finish_target(name)
Example #52
0
if __name__ == '__main__':
    if len(sys.argv) < 4:
        print(
            "Start and end dates are expected. Example: python {0} <joined_data> <start_model> <num_models>"
            .format(sys.argv[0]))

    joined_data = sys.argv[1]
    start_model = sys.argv[2]
    num_models = int(sys.argv[3])

    config = configparser.ConfigParser()
    config.read('ds.config')
    ds = config['DecisionService']
    cache_folder = ds['CacheFolder']
    block_blob_service = BlockBlobService(
        account_name=ds['AzureBlobStorageAccountName'],
        account_key=ds['AzureBlobStorageAccountKey'])

    joined_data_index = {}

    # index joined data
    with open(joined_data, 'r', encoding='utf8') as f:
        pos = f.tell()
        line = f.readline()
        while len(line) != 0:
            evt = json.loads(line)
            if '_eventid' in evt:
                joined_data_index[evt['_eventid']] = pos
            pos = f.tell()
            line = f.readline()
Example #53
0
class Blob:
    """
    Blob is the implementation for azure blob storage.

    Args:
        app: The current Flask app. If not provided init_app must be called before using this object.

    Returns:
        A `Blob` object.
    """

    # The container name can only contain letters, chars or '-'
    PROFILE_PICTURE_CONTAINER = 'profile-picture'

    def __init__(self, app=None):
        if app:
            self.init_app(app)

    def init_app(self, app):
        """
        Initializes the Blob object.

        Args:
            app: The currently running Flask app.
        """
        self.app = app
        self._is_production = self.app.config['PRODUCTION']
        if self._is_production:
            blob_config = self.app.config['BLOBSTORE']
            self._service = BlockBlobService(account_name=blob_config['ACCOUNT'],
                                             account_key=blob_config['ACCOUNT_KEY'])
            if not self._service.exists(self.PROFILE_PICTURE_CONTAINER):
                self._service.create_container(self.PROFILE_PICTURE_CONTAINER)

    def create_blob_from_bytes(self, container, name, byte_array):
        """
        Wraps create blob from bytes service if in production. Otherwise saves the data to disk.

        Args:
            container: The azure blob service container.
            name: The name of the blob.
            byte_array: The data to be put in the blob.
        """
        name = str(name)
        if self._is_production:
            self._service.create_blob_from_bytes(container, name, bytes(byte_array))
        else:
            # this just saves the file locally, used for dev and testing
            file_name = make_file_name(container, name)
            if not os.path.exists(os.path.dirname(file_name)):
                os.makedirs(os.path.dirname(file_name))
            with open(file_name, "wb") as blob_file:
                blob_file.write(byte_array)

    def delete_blob(self, container, name):
        """
        Wraps delete blob service if in production. Otherwise deletes the data from disk.

        Args:
            container: The container which contains the blob.
            name: The name of the blob in the container.
        """
        name = str(name)
        if self._is_production:
            self._service.delete_blob(container, name)
        else:
            file_name = make_file_name(container, name)
            try:
                os.remove(file_name)
            except OSError:
                pass


    def exists(self, container, name):
        """
        Wraps exists blob service if in production. Otherwise check if the file is on disk.

        Args:
            container: The container where the blob resides.
            name: The name of the blob.

        Returns:
            bool: Whether the blob exits.
        """
        name = str(name)
        if self._is_production:
            return self._service.exists(container, name)

        file_name = make_file_name(container, name)
        return os.path.isfile(file_name)

    def get_blob_to_bytes(self, container, name):
        """
        Wraps get blob to bytes service.

        Args:
            container: The container where the blob resides.
            name: The name of the blob.

        Returns:
            bytearray: The data contained in the blob.
        """
        name = str(name)
        if self._is_production:
            return bytearray(self._service.get_blob_to_bytes(container, name).content)
        else:
            if not self.exists(container, name):
                return None
            file_name = make_file_name(container, name)
            with open(file_name, "rb") as blob_file:
                file_contents = blob_file.read()
                file_bytes = bytearray(file_contents)
            return file_bytes
Example #54
0
#created a quickstart.py file to create container, blobs and test files in the blob . Wrote "Hello World" in that test file using this python program

import os
import uuid
import random
from azure.storage.blob import BlobServiceClient
from azure.storage.blob import BlockBlobService
from azure.storage.blob import BlobClient
from azure.storage.blob import ContainerClient

try:
    #Accessing Azure account using Account name and Account key

    block_blob_service = BlockBlobService(
        account_name='mcloudmesh',
        account_key=
        'yo86DzS1cZaV1DHzFyjpMkwIeW2a4LbSnQREJTRdTstjaLrOubU5iaDCmuiX7xsF5jcI1iNWFpLpquA6mu1T+w=='
    )
    print("Azure Blob storage v12 - Python quickstart sample")
    connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)
    container_name = "quickstart3" + str(uuid.uuid4())
    container_client = blob_service_client.create_container(container_name)

    # Create a file in local Documents directory to upload and download

    local_path = "c:/users/hp/quickstart/data"
    local_file_name = "quickstart3" + str(uuid.uuid4()) + ".txt"
    upload_file_path = os.path.join(local_path, local_file_name)

    # Write text to the file
Example #55
0
def start_storage(first_run):

    logging.info("Azure Storage starting.")

    current_path = dirname(abspath(__file__))
    path = "{}/last_dates.json".format(current_path)

    storage_time = format_date(args.storage_time_offset)
    time_format = str(storage_time)
    length_time_format = len(time_format) - 7
    time_format = time_format[:length_time_format]
    time_format_storage = datetime.datetime.strptime(time_format,
                                                     '%Y-%m-%d %H:%M:%S')

    try:
        all_dates = json.load(open(path))
    except Exception as e:
        logging.error(
            "Error: The file of the last dates could not be updated: '{}.".
            format(e))

    try:
        # Authentication
        logging.info("Storage: Authenticating.")
        if args.storage_auth_path:
            auth_fields = read_auth_path(args.storage_auth_path)
            block_blob_service = BlockBlobService(
                account_name=auth_fields['id'], account_key=auth_fields['key'])
            logging.info("Storage: Authenticated.")
        elif args.account_name and args.account_key:
            block_blob_service = BlockBlobService(
                account_name=args.account_name, account_key=args.account_key)
            logging.info("Storage: Authenticated.")
        else:
            logging.error(
                "Storage: No parameters have been provided for authentication."
            )

        logging.info("Storage: Getting containers.")
        # Getting containers from the storage account
        if container_format == '*':
            try:
                containers = block_blob_service.list_containers()
            except Exception as e:
                logging.error(
                    "Storage: The containers could not be obtained. '{}'.".
                    format(e))

        # Getting containers from the configuration file
        else:
            try:
                containers = [container_format]
            except Exception as e:
                logging.error(
                    "Storage: The containers could not be obtained. '{}'.".
                    format(e))

        # Getting blobs
        get_blobs(containers, block_blob_service, time_format_storage,
                  first_run, all_dates, path)

    except Exception as e:
        logging.error(" Storage account: '{}'.".format(e))

    logging.info("Storage: End")
Example #56
0
svc_pr = ServicePrincipalAuthentication(
    tenant_id=tenant_id,
    service_principal_id=service_principal_id,
    service_principal_password=service_principal_password,
)

ws = Workspace(ws.subscription_id, ws.resource_group, ws.name, auth=svc_pr)
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep="\n")

def_blob_store = ws.get_default_datastore()

print("Blobstore's name: {}".format(def_blob_store.name))

# create a list of datasets stored in blob
print("Checking for new datasets")
blob_service = BlockBlobService(def_blob_store.account_name,
                                def_blob_store.account_key)
generator = blob_service.list_blobs(def_blob_store.container_name,
                                    prefix="prednet/data/raw_data")
datasets = []
for blob in generator:
    dataset = blob.name.split("/")[3]
    if (dataset not in datasets and dataset.startswith("UCSD")
            and not dataset.endswith("txt")):
        datasets.append(dataset)
        print("Found dataset:", dataset)

# Get all published pipeline objects in the workspace
all_pub_pipelines = PublishedPipeline.list(ws)

# Create a list of datasets for which we have (old) and don't have (new) a
# published pipeline
Example #57
0
 def __init__(self, account_name, account_key, container_name):
     self.container_name = container_name
     self.blob_service = BlockBlobService(account_name=account_name,
                                          account_key=account_key)
Example #58
0
def download_container(app_id,
                       log_dir,
                       container=None,
                       conn_string=None,
                       account_name=None,
                       sas_token=None,
                       start_date=None,
                       end_date=None,
                       overwrite_mode=0,
                       dry_run=False,
                       version=2,
                       verbose=False,
                       create_gzip_mode=-1,
                       delta_mod_t=3600,
                       max_connections=4,
                       confirm=False,
                       report_progress=True,
                       if_match=None,
                       keep_invalid_eof=False,
                       max_download_size=None):
    t_start = time.time()
    if not container:
        container = app_id

    print('------' * 10)
    print('Current UTC time: {}'.format(
        datetime.datetime.now(datetime.timezone.utc)))
    print('app_id: {}'.format(app_id))
    print('container: {}'.format(container))
    print('log_dir: {}'.format(log_dir))
    print('Start Date: {}'.format(start_date))
    print('End Date: {}'.format(end_date))
    print('Overwrite mode: {}'.format(overwrite_mode))
    print('dry_run: {}'.format(dry_run))
    print('version: {}'.format(version))
    print('create_gzip_mode: {}'.format(create_gzip_mode))
    print('------' * 10)

    if not dry_run:
        os.makedirs(os.path.join(log_dir, app_id), exist_ok=True)

    output_fp = None
    if version == 1:  # using C# api for uncooked logs
        output_fp = os.path.join(
            log_dir, app_id, app_id + '_' + start_date.strftime("%Y-%m-%d") +
            '_' + end_date.strftime("%Y-%m-%d") + '.json')
        print('Destination: {}'.format(output_fp))
        do_download = True
        if os.path.isfile(output_fp):
            if overwrite_mode in {0, 3, 4}:
                print('Output file already exits. Not downloading'.format(
                    output_fp))
                do_download = False
            elif overwrite_mode == 1 and input(
                    'Output file already exits. Do you want to overwrite [Y/n]? '
                    .format(output_fp)) not in {'Y', 'y'}:
                do_download = False

        if do_download:
            if dry_run:
                print('--dry_run - Not downloading!')
            else:
                print('Downloading...', end='')
                try:
                    import requests
                    LogDownloaderURL = "https://cps-staging-exp-experimentation.azurewebsites.net/api/Log?account={ACCOUNT_NAME}&key={ACCOUNT_KEY}&start={START_DATE}&end={END_DATE}&container={CONTAINER}"
                    conn_string_dict = dict(
                        x.split('=', 1) for x in conn_string.split(';'))
                    if not conn_string_dict['AccountName'] or len(
                            conn_string_dict['AccountKey']) != 88:
                        print("Error: Invalid Azure Storage ConnectionString.")
                        sys.exit()
                    url = LogDownloaderURL.format(
                        ACCOUNT_NAME=conn_string_dict['AccountName'],
                        ACCOUNT_KEY=conn_string_dict['AccountKey'].replace(
                            '+', '%2b'),
                        CONTAINER=container,
                        START_DATE=start_date.strftime("%Y-%m-%d"),
                        END_DATE=(
                            end_date +
                            datetime.timedelta(days=1)).strftime("%Y-%m-%d"))
                    r = requests.post(url)
                    open(output_fp, 'wb').write(r.content)
                    print(' Done!\n')
                except Exception as e:
                    print('Error: {}'.format(e))

    else:  # using BlockBlobService python api for cooked logs
        try:
            print(
                'Establishing Azure Storage BlockBlobService connection using ',
                end='')
            if sas_token and account_name:
                print('sas token...')
                bbs = BlockBlobService(account_name=account_name,
                                       sas_token=sas_token)
            else:
                print('connection string...')
                bbs = BlockBlobService(connection_string=conn_string)
            # List all blobs and download them one by one
            print('Getting blobs list...')
            blobs = bbs.list_blobs(container)
        except Exception as e:
            if type(e.args[0]) == str and e.args[0].startswith(
                    'The specified container does not exist.'):
                print("Error: The specified container ({}) does not exist.".
                      format(container))
            else:
                print("Error:\nType: {}\nArgs: {}".format(
                    type(e).__name__, e.args))
            sys.exit()

        print('Iterating through blobs...\n')
        selected_fps = []
        for blob in blobs:
            if '/data/' not in blob.name:
                if verbose:
                    print('{} - Skip: Non-data blob\n'.format(blob.name))
                continue

            blob_day = datetime.datetime.strptime(
                blob.name.split('/data/', 1)[1].split('_', 1)[0], '%Y/%m/%d')
            if (start_date
                    and blob_day < start_date) or (end_date
                                                   and end_date < blob_day):
                if verbose:
                    print('{} - Skip: Outside of date range\n'.format(
                        blob.name))
                continue

            try:
                bp = bbs.get_blob_properties(container, blob.name)

                if confirm:
                    if input("{} - Do you want to download [Y/n]? ".format(
                            blob.name)) not in {'Y', 'y'}:
                        print()
                        continue

                fp = os.path.join(log_dir, app_id, blob.name.replace('/', '_'))
                selected_fps.append(fp)
                if os.path.isfile(fp):
                    file_size = os.path.getsize(fp)
                    if overwrite_mode == 0:
                        if verbose:
                            print('{} - Skip: Output file already exits\n'.
                                  format(blob.name))
                        continue
                    elif overwrite_mode in {1, 3, 4}:
                        if file_size == bp.properties.content_length:  # file size is the same, skip!
                            if verbose:
                                print(
                                    '{} - Skip: Output file already exits with same size\n'
                                    .format(blob.name))
                            continue
                        print(
                            'Output file already exits: {}\nLocal size: {:.3f} MB\nAzure size: {:.3f} MB'
                            .format(fp, file_size / (1024**2),
                                    bp.properties.content_length / (1024**2)))
                        if overwrite_mode in {
                                3, 4
                        } and file_size > bp.properties.content_length:  # local file size is larger, skip with warning!
                            print(
                                '{} - Skip: Output file already exits with larger size\n'
                                .format(blob.name))
                            continue
                        if overwrite_mode == 1 and input(
                                "Do you want to overwrite [Y/n]? ") not in {
                                    'Y', 'y'
                                }:
                            print()
                            continue
                else:
                    file_size = None

                print('Processing: {} (size: {:.3f}MB - Last modified: {})'.
                      format(blob.name,
                             bp.properties.content_length / (1024**2),
                             bp.properties.last_modified))
                # check if blob was modified in the last delta_mod_t sec
                if datetime.datetime.now(
                        datetime.timezone.utc
                ) - bp.properties.last_modified < datetime.timedelta(
                        0, delta_mod_t):
                    if overwrite_mode < 2:
                        if input(
                                "Azure blob currently in use (modified in the last delta_mod_t={} sec). Do you want to download anyway [Y/n]? "
                                .format(delta_mod_t)) not in {'Y', 'y'}:
                            print()
                            continue
                    elif overwrite_mode == 4:
                        print(
                            'Azure blob currently in use (modified in the last delta_mod_t={} sec). Skipping!\n'
                            .format(delta_mod_t))
                        continue
                    if if_match != '*':  # when if_match is not '*', reset max_connections to 1 to prevent crash if azure blob is modified during download
                        max_connections = 1

                if dry_run:
                    print('--dry_run - Not downloading!')
                else:
                    t0 = time.time()
                    process_checker = update_progress if report_progress == True else None
                    if overwrite_mode in {3, 4} and file_size:
                        if max_download_size is None or file_size < max_download_size:
                            print('Check validity of remote file... ', end='')
                            temp_fp = fp + '.temp'
                            cmpsize = min(file_size, 8 * 1024**2)
                            bbs.get_blob_to_path(
                                container,
                                blob.name,
                                temp_fp,
                                max_connections=max_connections,
                                start_range=file_size - cmpsize,
                                end_range=file_size - 1,
                                if_match=if_match)
                            if cmp_files(fp, temp_fp, -cmpsize):
                                print('Valid!')
                                print(
                                    'Resume downloading to temp file with max_connections = {}...'
                                    .format(max_connections))
                                bbs.get_blob_to_path(
                                    container,
                                    blob.name,
                                    temp_fp,
                                    progress_callback=process_checker,
                                    max_connections=max_connections,
                                    start_range=os.path.getsize(fp),
                                    if_match=if_match,
                                    end_range=max_download_size)
                                download_time = time.time() - t0
                                download_size_MB = os.path.getsize(temp_fp) / (
                                    1024**2)  # file size in MB
                                print('\nAppending to local file...')
                                with open(fp, 'ab') as f1, open(temp_fp,
                                                                'rb') as f2:
                                    shutil.copyfileobj(
                                        f2, f1, length=100 * 1024**2
                                    )  # writing chunks of 100MB to avoid consuming memory
                                print(
                                    'Appending completed. Deleting temp file...'
                                )
                                os.remove(temp_fp)
                            else:
                                os.remove(temp_fp)
                                print('Invalid! - Skip\n')
                                continue
                            print(
                                'Downloaded {:.3f} MB in {:.1f} sec. ({:.3f} MB/sec) - Total elapsed time: {:.1f} sec.'
                                .format(download_size_MB, download_time,
                                        download_size_MB / download_time,
                                        time.time() - t0))
                    else:
                        print(
                            'Downloading with max_connections = {}...'.format(
                                max_connections))
                        bbs.get_blob_to_path(container,
                                             blob.name,
                                             fp,
                                             progress_callback=process_checker,
                                             max_connections=max_connections,
                                             if_match=if_match,
                                             start_range=0,
                                             end_range=max_download_size)
                        download_time = time.time() - t0
                        download_size_MB = os.path.getsize(fp) / (
                            1024**2)  # file size in MB
                        print(
                            '\nDownloaded {:.3f} MB in {:.1f} sec. ({:.3f} MB/sec)'
                            .format(download_size_MB, download_time,
                                    download_size_MB / download_time))
                    if not keep_invalid_eof:
                        erase_invalid_end_line(fp)
                    print()
            except Exception as e:
                print('Error: {}'.format(e))

        if create_gzip_mode > -1:
            if selected_fps:
                selected_fps = [x for x in selected_fps if os.path.isfile(x)]
                if create_gzip_mode == 0:
                    models = {}
                    for fp in selected_fps:
                        models.setdefault(
                            os.path.basename(fp).split('_data_', 1)[0],
                            []).append(fp)
                    for model in models:
                        models[model].sort(key=lambda x: list(
                            map(int,
                                x.split('_data_')[1].split('_')[:3])))
                        start_date = '-'.join(
                            models[model][0].split('_data_')[1].split('_')[:3])
                        end_date = '-'.join(models[model][-1].split('_data_')
                                            [1].split('_')[:3])
                        output_fp = os.path.join(
                            log_dir, app_id, app_id + '_' + model + '_data_' +
                            start_date + '_' + end_date + '.json.gz')
                        print(
                            'Concat and zip files of LastConfigurationEditDate={} to: {}'
                            .format(model, output_fp))
                        if os.path.isfile(
                                output_fp
                        ) and __name__ == '__main__' and input(
                                'Output file already exits. Do you want to overwrite [Y/n]? '
                                .format(output_fp)) not in {'Y', 'y'}:
                            continue
                        if dry_run:
                            print('--dry_run - Not downloading!')
                        else:
                            with gzip.open(output_fp, 'wb') as f_out:
                                for fp in models[model]:
                                    print('Adding: {}'.format(fp))
                                    with open(fp, 'rb') as f_in:
                                        shutil.copyfileobj(
                                            f_in, f_out, length=100 * 1024**2
                                        )  # writing chunks of 100MB to avoid consuming memory
                elif create_gzip_mode == 1:
                    selected_fps.sort(key=lambda x: (list(
                        map(int,
                            x.split('_data_')[1].split('_')[:3])), -os.path.
                                                     getsize(x), x))
                    selected_fps_merged = []
                    last_fp_date = None
                    for fp in selected_fps:
                        fp_date = datetime.datetime.strptime(
                            '_'.join(fp.split('_data_')[1].split('_')[:3]),
                            "%Y_%m_%d")
                        if fp_date != last_fp_date:
                            selected_fps_merged.append(fp)
                            last_fp_date = fp_date

                    start_date = '-'.join(selected_fps_merged[0].split(
                        '_data_')[1].split('_')[:3])
                    end_date = '-'.join(selected_fps_merged[-1].split('_data_')
                                        [1].split('_')[:3])
                    output_fp = os.path.join(
                        log_dir, app_id, app_id + '_merged_data_' +
                        start_date + '_' + end_date + '.json.gz')
                    print(
                        'Merge and zip files of all LastConfigurationEditDate to: {}'
                        .format(output_fp))
                    if not os.path.isfile(
                            output_fp
                    ) or __name__ == '__main__' and input(
                            'Output file already exits. Do you want to overwrite [Y/n]? '
                            .format(output_fp)) in {'Y', 'y'}:
                        if dry_run:
                            for fp in selected_fps_merged:
                                print('Adding: {}'.format(fp))
                            print('--dry_run - Not downloading!')
                        else:
                            with gzip.open(output_fp, 'wb') as f_out:
                                for fp in selected_fps_merged:
                                    print('Adding: {}'.format(fp))
                                    with open(fp, 'rb') as f_in:
                                        shutil.copyfileobj(
                                            f_in, f_out, length=1024**3
                                        )  # writing chunks of 1GB to avoid consuming memory
                elif create_gzip_mode == 2:
                    selected_fps.sort(key=lambda x: (list(
                        map(int,
                            x.split('_data_')[1].split('_')[:3])), -os.path.
                                                     getsize(x), x))
                    start_date = '-'.join(
                        selected_fps[0].split('_data_')[1].split('_')[:3])
                    end_date = '-'.join(
                        selected_fps[-1].split('_data_')[1].split('_')[:3])
                    output_fp = os.path.join(
                        log_dir, app_id, app_id + '_deepmerged_data_' +
                        start_date + '_' + end_date + '.json.gz')
                    print(
                        'Merge, unique, sort, and zip files of all LastConfigurationEditDate to: {}'
                        .format(output_fp))
                    if not os.path.isfile(
                            output_fp
                    ) or __name__ == '__main__' and input(
                            'Output file already exits. Do you want to overwrite [Y/n]? '
                            .format(output_fp)) in {'Y', 'y'}:
                        d = {}
                        for fn in selected_fps:
                            print('Parsing: {}'.format(fn), end='', flush=True)
                            if not dry_run:
                                for x in open(fn, 'rb'):
                                    if x.startswith(
                                            b'{"_label_cost'
                                    ) and x.strip().endswith(
                                            b'}'):  # reading only cooked lined
                                        data = ds_parse.json_cooked(x)
                                        if data is not None and (
                                                data['ei'] not in d
                                                or float(data['cost']) <
                                                d[data['ei']][1]
                                        ):  # taking line with best reward
                                            d[data['ei']] = (data['ts'],
                                                             float(
                                                                 data['cost']),
                                                             x)
                            print(' - len(d): {}'.format(len(d)))

                        print('Writing to output .gz file...')
                        if dry_run:
                            print('--dry_run - Not downloading!')
                        else:
                            with gzip.open(output_fp, 'wb') as f:
                                i = 0
                                for x in sorted(d.values(), key=lambda x: x[
                                        0]):  # events are sorted by timestamp
                                    f.write(x[2])
                                    i += 1
                                    if i % 5000 == 0:
                                        update_progress(i, len(d))
                                update_progress(i, len(d))
                                print()
                else:
                    print(
                        'Unrecognized --create_gzip_mode: {}, skipping creating gzip files.'
                        .format(create_gzip_mode))
            else:
                print('No file downloaded, skipping creating gzip files.')

    print('Total elapsed time: {:.1f} sec.\n'.format(time.time() - t_start))
    return output_fp
import uuid
today = date.today()
coronaFileName = "WorldWideCovidNinty.csv"
AllCountryCoronainfoCSV = df.to_csv(index_label="idx", encoding="utf-8")

BdCoronaInfo = "BagnadeshCovidNinty.csv"
BdCoronaInfoCsv = BdDataFrame.to_csv(index_label="idx", encoding="utf-8")

TotalCoronaInfo = "TotalCovidNinty.csv"
TotalCoronaInfoCsv = TotalCoronaDataFrame.to_csv(index_label="idx",
                                                 encoding="utf-8")

#!pip install azure-storage-blob==0.37.1

#block_blob_service.create_container('mycontainer')

from azure.storage.blob import BlockBlobService
from azure.storage.blob import ContentSettings

block_blob_service = BlockBlobService(account_name='', account_key='')

#Upload the CSV file to Azure blob cloud
block_blob_service.create_blob_from_text('mycontainer', coronaFileName,
                                         AllCountryCoronainfoCSV)
block_blob_service.create_blob_from_text('mycontainer', BdCoronaInfo,
                                         BdCoronaInfoCsv)
block_blob_service.create_blob_from_text('mycontainer', TotalCoronaInfo,
                                         TotalCoronaInfoCsv)

print("Successfully end covitNinty Web Scraping...........")
Example #60
0
def run_sample(account_name, account_key, container_name):
    try:
        # Create the BlockBlockService that is used to call the Blob service for the storage account
        block_blob_service = BlockBlobService(
            account_name=account_name,
            account_key=account_key)

        # Create a container
        # すでに存在していた場合は何も起こらない。
        block_blob_service.create_container(container_name)

        # Set the permission so the blobs are public.
        block_blob_service.set_container_acl(container_name, public_access=PublicAccess.Container)

        # Create a file in the current folder to test the upload and download.
        #local_path=os.path.expanduser("~/Documents")
        local_path=os.getcwd()
        local_file_name ="QuickStart_" + str(uuid.uuid4()) + ".txt"
        full_path_to_file =os.path.join(local_path, local_file_name)

        # Write text to the file.
        file = open(full_path_to_file,  'w')
        file.write("Hello, World!")
        file.close()

        print("Temp file = " + full_path_to_file)
        print("\nUploading to Blob storage as blob " + local_file_name)

        # アップロードする Blob に付随させるメタデータ
        # value に空白文字を含めることはできるが、key に空白文字を含めると、アップロード用メソッドを実行したときエラーになる。
        # ポータル上で Blob のメタ情報を編集する場合も同じ。
        metadata = {'one': 'first first', 'two': 'second', 'three': 'third'}

        # Upload the created file, use local_file_name for the blob name
        block_blob_service.create_blob_from_path(container_name, local_file_name, full_path_to_file, metadata=metadata)

        # List the blobs in the container
        # Blob のメタ情報は、デフォルトでは取得されないので、include キーワード引数で指定してやる必要がある。
        print("\nList blobs in the container")
        generator = block_blob_service.list_blobs(container_name, include=Include.METADATA)
        for blob in generator:
            # blob は、azure.storage.blob.models.Blob オブジェクトである。
            # blob.properties は、azure.storage.blob.models.BlobProperties オブジェクトである。
            print("\t Blob name: " + blob.name)
            print("\t Blob type: " + blob.properties.blob_type)
            print("\t Blob content length: " + str(blob.properties.content_length))
            print("\t Last modified: " + str(blob.properties.last_modified))

            # list_blobs メソッドの include キーワード引数を指定しなかったなら、メタ情報を取得するのに改めて
            # get_blob_metadata メソッドを呼び出す必要がある。
            # metadata = block_blob_service.get_blob_metadata(container_name, blob.name)
            metadata = blob.metadata
            if metadata:
                print("\t Metadata:")
                for key, value in metadata.items():
                    print("\t\t key = " + key + ", value = " + value)
            print()

        # Download the blob(s).
        # Add '_DOWNLOADED' as prefix to '.txt' so you can see both files in the current folder.
        full_path_to_file2 = os.path.join(local_path, str.replace(local_file_name ,'.txt', '_DOWNLOADED.txt'))
        print("\nDownloading blob to " + full_path_to_file2)
        block_blob_service.get_blob_to_path(container_name, local_file_name, full_path_to_file2)

        sys.stdout.write("Sample finished running. When you hit <any key>, the sample will be deleted and the sample "
                         "application will exit.")
        sys.stdout.flush()
        input()

        # Clean up resources. This includes the uploaded blob and the temp files but not the container for safety.
        #block_blob_service.delete_container(container_name)
        block_blob_service.delete_blob(container_name, local_file_name)
        os.remove(full_path_to_file)
        os.remove(full_path_to_file2)
    except Exception as e:
        print(e)