def __init__(self, storage_config: Dict[str, Any],
                 storage_paths: List[str], local_dir: str):
        from azure.storage import blob

        connection_string = storage_config.get("connection_string")
        container = storage_config.get("container")
        account_url = storage_config.get("account_url")
        credential = storage_config.get("credential")

        if storage_config.get("connection_string"):
            self.client = blob.BlobServiceClient.from_connection_string(
                connection_string)
        elif account_url:
            self.client = blob.BlobServiceClient(account_url, credential)
        else:
            raise ValueError(
                "Either 'container_string' or 'account_url' must be specified."
            )

        if container is None:
            raise ValueError("'container' must be specified.")

        self.container_name = container if not container.endswith(
            "/") else container[:-1]

        self.local_dir = local_dir
        self.storage_paths = storage_paths
        self._file_records = {}  # type: Dict[str, datetime.datetime]
Exemple #2
0
    def __init__(self, resource, credential = None):
        self.resource = resource
        self.acc = urlsplit(resource).netloc.split('.')[0]

        if credential is None:
            # only import azure.identity when it is certain we want to use it
            # importing it takes several hundred milliseconds (and from a
            # glance seems to make some http requests)
            import azure.identity
            credential = azure.identity.DefaultAzureCredential()
        self.client = azblob.BlobServiceClient(
            resource,
            credential = credential,
        )
Exemple #3
0
    def __init__(self,
                 storage_account_name,
                 client_id,
                 tenant_id,
                 secret_key,
                 storage_account_token=None,
                 mq_username=None,
                 mq_password=None,
                 mq_ip=None,
                 mq_vhost=None):
        """
        abstraction around all azure blob related methods
        :param storage_account_name:
        :type storage_account_name:
        :param client_id: Azure AD client id (application id)
        :type client_id: str
        :param tenant_id: Azure AD tenant id
        :type tenant_id: str
        :param secret_key: secret key for the app
        :type secret_key: str
        :param mq_username: rabbitmq username
        :type mq_username: str
        :param mq_password: rabbitmq password
        :type mq_password: str
        :param mq_ip: rabbitmq IP address
        :type mq_ip: str
        :param mq_vhost: rabbitmq vhost
        :type mq_vhost: str
        """
        self.account_name = storage_account_name
        self.mq_username = mq_username
        self.mq_password = mq_password
        self.mq_ip = mq_ip
        self.mq_vhost = mq_vhost

        self.logger = self.__get_logger(add_azure_filter=True)

        if not storage_account_token:
            storage_account_token = self.__get_storage_account_token(
                client_id, secret_key, tenant_id)

        storage_account_url = self.__get_account_url(storage_account_name)

        self.blob_client = azureblob.BlobServiceClient(storage_account_url,
                                                       storage_account_token)
Exemple #4
0
def main(req: func.HttpRequest) -> func.HttpResponse:
    logging.info('Python HTTP trigger function processed a request.')

    try:
        now = datetime.datetime.now()
        desired_hour = req.params.get('hour')
        if desired_hour:
            desired_hour = int(desired_hour)
        else:
            desired_hour = now.hour

        blob_prefix = f'{os.environ["EVENT_HUB_NAME"]}/02/{now.year:02d}/{now.month:02d}/{now.day:02d}/{desired_hour:02d}'

        service = blob.BlobServiceClient(account_url=os.environ["STORAGE_ACCOUNT_URL"])
        container_client = service.get_container_client('readings')
        blobs_iterator = container_client.list_blobs(name_starts_with = blob_prefix)

        retreived_blobs = {}

        for i in blobs_iterator:
            index = int(i.name[-7:-5:])
            retreived_blobs[index] = i

        indexes = sorted(retreived_blobs, reverse=True)

        blob_client = container_client.get_blob_client(retreived_blobs[indexes[0]])
        stream_downloader = blob_client.download_blob()
        blob_contents = stream_downloader.content_as_text()
        blob_lines = blob_contents.split('\n')

        readings = []

        for i in blob_lines:
            reading = json.loads(i)
            body = base64.b64decode(reading['Body'])
            jbody = json.loads(body)
            if ('temperature' in jbody or 'humidity' in jbody):
                jbody['EnqueuedTimeUtc'] = reading['EnqueuedTimeUtc']
                readings.append(jbody)

        return func.HttpResponse(json.dumps(readings))

    except Exception as ex:
        return func.HttpResponse("Something went wrong.", status_code=500)
Exemple #5
0
    def __init__(self, storage_account, storage_container, prefix):
        self.storage_account = storage_account
        self.storage_container = storage_container
        self.prefix = prefix

        client_id = os.environ["CLIENT_ID"]
        secret_key = os.environ["SECRET_KEY"]
        tenant_id = os.environ["TENANT_ID"]

        storage_account_token = ClientSecretCredential(tenant_id, client_id,
                                                       secret_key)

        storage_account_url = "https://{}.blob.core.windows.net".format(
            self.storage_account)

        self.blob_service = azureblob.BlobServiceClient(
            storage_account_url, storage_account_token)

        self.blob_service.MAX_BLOCK_SIZE = 64 * 1024 * 1024
    finally:
        output.close()
    raise RuntimeError('could not write data to stream or decode bytes')


if __name__ == '__main__':

    start_time = datetime.datetime.now().replace(microsecond=0)
    print('Sample start: {}'.format(start_time))
    print()

    # Create the blob client, for use in obtaining references to
    # blob storage containers and uploading files to containers.

    blob_client = azureblob.BlobServiceClient(
        account_url="https://{}.blob.core.windows.net".format(
            config._STORAGE_ACCOUNT_NAME),
        credential=config._STORAGE_ACCOUNT_KEY)

    # Use the blob client to create the containers in Azure Storage if they
    # don't yet exist.

    input_container_name = 'input'
    container_client = blob_client.create_container(input_container_name)

    # The collection of data files that are to be processed by the tasks.
    input_file_paths = [
        os.path.join(sys.path[0], 'taskdata0.txt'),
        os.path.join(sys.path[0], 'taskdata1.txt'),
        os.path.join(sys.path[0], 'taskdata2.txt')
    ]
Exemple #7
0
    def CreateDiskFromSnapshotURI(
            self,
            snapshot: 'AZComputeSnapshot',
            snapshot_uri: str,
            region: Optional[str] = None,
            disk_name: Optional[str] = None,
            disk_name_prefix: Optional[str] = None,
            disk_type: str = 'Standard_LRS') -> 'AZComputeDisk':
        """Create a new disk based on a SAS snapshot URI.

    This is useful if e.g. one wants to make a copy of a disk in a separate
    Azure account. This method will create a temporary Azure Storage account
    within the destination account, import the snapshot from a downloadable
    link (the source account needs to share the snapshot through a SAS link)
    and then create a disk from the VHD file saved in storage. The Azure
    storage account is then deleted.

    Args:
      snapshot (AZComputeSnapshot): Source snapshot to use.
      snapshot_uri (str): The URI of the snapshot to copy.
      region (str): Optional. The region in which to create the disk. If not
          provided, the disk will be created in the default_region associated to
          the AZAccount object.
      disk_name (str): Optional. String to use as new disk name.
      disk_name_prefix (str): Optional. String to prefix the disk name with.
      disk_type (str): Optional. The sku name for the disk to create. Can be
          Standard_LRS, Premium_LRS, StandardSSD_LRS, or UltraSSD_LRS.
          Default is Standard_LRS.

    Returns:
      AZComputeDisk: Azure Compute Disk.

    Raises:
      RuntimeError: If the disk could not be created.
    """

        if not region:
            region = self.az_account.default_region

        # Create a temporary Azure account storage to import the snapshot
        storage_account_name = hashlib.sha1(
            snapshot.resource_id.encode('utf-8')).hexdigest()[:23]
        storage_account_url = 'https://{0:s}.blob.core.windows.net'.format(
            storage_account_name)
        # pylint: disable=line-too-long
        storage_account_id, storage_account_access_key = self.az_account.storage.CreateStorageAccount(
            storage_account_name, region=region)
        # pylint: enable=line-too-long
        blob_service_client = blob.BlobServiceClient(
            account_url=storage_account_url,
            credential=storage_account_access_key)

        # Create a container within the Storage to receive the imported snapshot
        container_name = storage_account_name + '-container'
        snapshot_vhd_name = snapshot.name + '.vhd'
        container_client = blob_service_client.get_container_client(
            container_name)
        try:
            logger.info('Creating blob container {0:s}'.format(container_name))
            container_client.create_container()
            logger.info('Blob container {0:s} successfully created'.format(
                container_name))
        except exceptions.ResourceExistsError:
            # The container already exists, so we can re-use it
            logger.warning(
                'Reusing existing container: {0:s}'.format(container_name))

        # Download the snapshot from the URI to the storage
        copied_blob = blob_service_client.get_blob_client(
            container_name, snapshot_vhd_name)
        logger.info(
            'Importing snapshot to container from URI {0:s}. '
            'Depending on the size of the snapshot, this process is going '
            'to take a while.'.format(snapshot_uri))
        copied_blob.start_copy_from_url(snapshot_uri)
        copy_status = copied_blob.get_blob_properties().copy.status
        while copy_status != 'success':
            sleep(
                5
            )  # Wait for the vhd to be imported in the Azure storage container
            copy_status = copied_blob.get_blob_properties().copy.status
            if copy_status in ('aborted', 'failed'):
                raise RuntimeError('Could not import the snapshot from URI '
                                   '{0:s}'.format(snapshot_uri))
            logger.debug(
                'Importing snapshot from URI {0:s}'.format(snapshot_uri))
        logger.info('Snapshot successfully imported from URI {0:s}'.format(
            snapshot_uri))

        if not disk_name:
            disk_name = common.GenerateDiskName(
                snapshot, disk_name_prefix=disk_name_prefix)

        # Create a new disk from the imported snapshot
        creation_data = {
            'location': region,
            'creation_data': {
                'source_uri': copied_blob.url,
                'storage_account_id': storage_account_id,
                'create_option': models.DiskCreateOption.import_enum
            },
            'sku': {
                'name': disk_type
            }
        }

        try:
            logger.info('Creating disk: {0:s}'.format(disk_name))
            request = self.compute_client.disks.create_or_update(
                self.az_account.default_resource_group_name, disk_name,
                creation_data)
            while not request.done():
                sleep(5)  # Wait 5 seconds before checking disk status again
            disk = request.result()
            logger.info('Disk {0:s} successfully created'.format(disk_name))
        except azure_exceptions.CloudError as exception:
            raise RuntimeError(
                'Could not create disk from URI {0:s}: {1:s}'.format(
                    snapshot_uri, str(exception)))

        # Cleanup the temporary account storage
        self.az_account.storage.DeleteStorageAccount(storage_account_name)

        return AZComputeDisk(self.az_account, disk.id, disk.name,
                             disk.location, disk.zones)
def generate_tasks(job_id, dataset_list, production, flags):
    """
    Adds a task for each input file in the collection to the specified job.
    :param str job_id: The ID of the job to which to add the tasks.
    :param list dataset_list: A collection of datasets names. One task will be
     created for each dataset.
    :param bool production: True if in production mode
    :param str flags: extra single letter flags to pass in
    """

    LOGGER.info('Adding {} tasks to job [{}]...'.format(
        len(dataset_list), job_id))

    tasks = list()

    for dataset in dataset_list:

        # Ensures that the container for the JSON config files exists
        blob_service_client = blob.BlobServiceClient(
            account_url=f"https://{STORAGE_ACCOUNT_NAME}.blob.core.windows.net",
            credential=SAS_TOKEN)
        try:
            container = blob_service_client.create_container(
                name=CONFIG_FOLDER)
        except azure_exception.ResourceExistsError:
            LOGGER.info(f"Container \"{CONFIG_FOLDER}\" already exists")
        if production:
            create_json_config(dataset, blob_service_client)
        flag_list = [
            f"--log ${{AZ_BATCH_NODE_ROOT_DIR}}/fsmounts/{FILE_SHARE_NAME}/"
            f"logs/{DATETIME_NOWISH}/{dataset.name}/${{AZ_BATCH_TASK_ID}}.log"
        ]
        for flag in flags:
            # Stops the force flag getting sent to derivatives
            if flag == "f" and isinstance(dataset, Derivative):
                continue
            else:
                flag_list.append(f"-{flag}")

        dependencies = []

        if isinstance(dataset, Dataset):
            flag_list.append(f"-t {dataset.timeout}")
            flag_list.append(f"-i '{dataset.name}/*'")
        elif isinstance(dataset, Derivative):  # these are derivatives
            # Create dependency
            dependencies = [
                generate_task_name(dependency)
                for dependency in dataset.dependencies
            ]
            flag_list.append(f"-d '{dataset.name}'")
        elif isinstance(dataset, Publication) and dataset.is_derivative:
            flag_list.append("-c")
            flag_list.append(f"-i '{dataset.name}/*'")
        command = generate_full_command(
            f"{dataset.base_fn} {' '.join(flag_list)}", production, dataset)

        tasks.append(
            create_task(dataset, command, dependencies,
                        dataset.max_wall_clock_minutes, production))
    return tasks