Python Bucket Exemples, google.cloud.storage.Bucket Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_app_gcs_transcript_utils.py Projet : fedex-2020/GoogleCloudPlatform

 def test_find_bucket_found_with_prefix(self):
     """Tests that bucket is found when given prefix."""
     bucket_foo = storage.Bucket(self.gcs_client, self.foo_name)
     bucket_bar = storage.Bucket(self.gcs_client, self.bar_name)
     bucket_iterator = iter([bucket_foo, bucket_bar])
     actual_output = gcs_transcript_utils.find_bucket_with_prefix(
         bucket_iterator, self.foo_name)
     expected_output = self.foo_name
     self.assertEqual(actual_output, expected_output)

Exemple #2

0

Afficher le fichier

Fichier : cloudcosts.py Projet : jupyterhub/mybinder.org-deploy

def publish_daily_cost(
    billing_bucket_name,
    target_bucket_name,
    target_object_name,
    kind="json",
    debug=False,
    dry_run=False,
):
    totals = {}
    client = storage.Client()

    bucket = storage.Bucket(client, billing_bucket_name)
    if kind == "csv":
        prefix = "report-"
    else:
        prefix = "billing-"
    blobs = bucket.list_blobs(prefix=prefix)

    for blob in blobs:
        buffer = io.StringIO(blob.download_as_string().decode())

        if kind == "csv":
            current_totals = totals_from_csv(buffer)
        else:
            current_totals = totals_from_json(buffer)

        for time_range, cost in current_totals.items():
            totals[time_range] = totals.get(time_range, 0) + cost

    # We want to push out sorted jsonl
    sorted_items = [{
        "version": 1,
        "start_time": start_time,
        "end_time": end_time,
        "cost": cost
    } for (start_time, end_time), cost in totals.items()]

    sorted_items.sort(key=lambda d: d["start_time"])

    if debug:
        for item in sorted_items:
            print(json.dumps(item))

    if not dry_run:
        target_bucket = storage.Bucket(client, target_bucket_name)
        blob = target_bucket.blob(target_object_name)

        target_buffer = io.StringIO()
        for item in sorted_items:
            target_buffer.write(json.dumps(item) + "\n")

        target_buffer.seek(0)

        blob.upload_from_file(target_buffer)

    return sorted_items

Exemple #3

0

Afficher le fichier

def archive_events(project,
                   log_name,
                   source_bucket,
                   destination_bucket,
                   date,
                   object_name_template='events-{date}.jsonl',
                   debug=False,
                   dry_run=False):
    storage_client = storage.Client()
    src_bucket = storage.Bucket(storage_client, source_bucket)
    dest_bucket = storage.Bucket(storage_client, destination_bucket)

    prefix = log_name + '/' + date.strftime('%Y/%m/%d')
    print(f'Finding blobs with prefix {prefix}')
    src_blobs = src_bucket.list_blobs(prefix=prefix)

    count = 0
    all_events = []
    for src_blob in src_blobs:
        with tempfile.TemporaryFile(mode='wb+') as temp:
            src_blob.download_to_file(temp)
            temp.seek(0)

            for line in temp:
                event = json.loads(json.loads(line)['jsonPayload']['message'])
                # Account for time when 'event' was nested
                if 'event' in event:
                    event.update(event['event'])
                    del event['event']
                event = process_event(event)
                if debug:
                    print(event)
                if not dry_run:
                    all_events.append(event)
                count += 1

    if not dry_run:
        # Timestamp is ISO8601 in UTC, so can be sorted lexicographically
        all_events.sort(key=lambda event: event['timestamp'])
        with tempfile.TemporaryFile(mode='w+') as out:
            for event in all_events:
                out.write(json.dumps(event) + '\n')
            out.seek(0)
            blob_name = object_name_template.format(
                date=date.strftime('%Y-%m-%d'))
            blob = dest_bucket.blob(blob_name)
            # Set metadata on the object so we know when this archive is for & how many events there are
            blob.metadata = {
                'Events-Date': date.strftime('%Y-%m-%d'),
                'Events-Count': len(all_events)
            }
            blob.upload_from_file(out)
            print(
                f'Uploaded {destination_bucket}/{blob_name} with {count} events'
            )

Exemple #4

0

Afficher le fichier

 def __init__(self, config):
     self.config = config
     self.client = storage.Client()
     self.input_bucket = storage.Bucket(self.client,
                                        self.config['input_bucket'])
     self.output_bucket = storage.Bucket(self.client,
                                         self.config['output_bucket'])
     self.reg = ProcessedRegister(self.output_bucket, self.config['symbol'],
                                  self.config['name'])
     self.old_processed_bnames = [p[0] for p in self.reg.processed]
     self.new_processed_bnames = []
     self.input_blobs = []

Exemple #5

0

Afficher le fichier

Fichier : google_cloud_storage.py Projet : shduttacheezit/parsons

    def download_blob(self, bucket_name, blob_name, local_path=None):
        """
        Gets a blob from a bucket

        `Args:`
            bucket_name: str
                The name of the bucket
            blob_name: str
                The name of the blob
            local_path: str
                The local path where the file will be downloaded. If not specified, a temporary
                file will be created and returned, and that file will be removed automatically
                when the script is done running.
        `Returns:`
            str
                The path of the downloaded file
        """

        if not local_path:
            local_path = files.create_temp_file_for_path('TEMPTHING')

        bucket = storage.Bucket(self.client, name=bucket_name)
        blob = storage.Blob(blob_name, bucket)

        logger.info(f'Downloading {blob_name} from {bucket_name} bucket.')
        with open(local_path, 'wb') as f:
            blob.download_to_file(f, client=self.client)
        logger.info(f'{blob_name} saved to {local_path}.')

        return local_path

Exemple #6

0

Afficher le fichier

Fichier : __init__.py Projet : wilsonfreitas/kyd-dataproc

 def update(self):
     source_bucket = storage.Bucket(self.staging.client,
                                    self.config['source_bucket'])
     blobs = source_bucket.list_blobs(prefix=self.config['source_prefix'])
     blobs = {b.name: b for b in blobs}
     # check against blob names
     input_bnames = set(blobs)
     logging.debug('input blobs = %s', len(input_bnames))
     old_processed = set(p[0] for p in self.processed)
     logging.debug('blobs already processed = %s', len(old_processed))
     bnames_to_process = list(input_bnames - old_processed)
     logging.debug('non processed blobs = %s', len(bnames_to_process))
     # check against timestamps
     bnames_to_process2 = [b for b, t in self.processed
                           if blobs[b].time_created > t]
     logging.debug('outdated blobs to process = %s',
                   len(bnames_to_process2))
     # select blobs to process
     bnames_to_process = set(bnames_to_process + bnames_to_process2)
     logging.debug('blobs to process = %s', len(bnames_to_process))
     procs = []
     for bname in bnames_to_process:
         blob = blobs[bname]
         fname = os.path.split(bname)[1]
         fname = os.path.join(self.data_dir, fname)
         with open(fname, 'wb+') as fp:
             blob.download_to_file(fp)
         procs.append((blob.name, dtfmt(blob.time_created)))
     if procs:
         logging.debug('updating processed %s', self.processed_fname)
         self.processed = self.processed + procs
         with open(self.processed_fname, 'w+') as fp:
             fp.write(json.dumps(self.processed))

Exemple #7

0

Afficher le fichier

def maybe_upload_file(local_path):
    '''Upload a file to remote cloud storage
    if the path starts with gs:// or s3://
    '''
    if local_path.startswith(('s3://', 'gs://')):
        prefix = local_path.split(':')[0]
        remote_bucket_path = local_path[len("s3://"):]  # same length
        bp = remote_bucket_path.split("/")
        bucket = bp[0]
        path = remote_bucket_path[1 + len(bucket):]

        # s3://example/file becomes s3:/example/file in Linux
        local_path = prefix + ':/' + remote_bucket_path
        if prefix == 's3':
            import boto3
            s3 = boto3.client('s3', endpoint_url=os.environ.get('S3_ENDPOINT'))
            s3.upload_file(local_path, bucket, path)

        elif prefix == 'gs':
            from google.cloud import storage
            client = storage.Client()

            Hbucket = storage.Bucket(client, bucket)
            blob = storage.Blob(path, Hbucket)
            blob.upload_from_filename(local_path)

Exemple #8

0

Afficher le fichier

Fichier : get_configs.py Projet : arashbastanfard/RoKiX-IoT-Platform

def get_configs(service_account_json_file,
                zip_filename='board_and_stream_cfg.zip',
                extract_dir='cfg'):
    '''
        Downloads and extracts board configurations from GCP
    '''
    with open(service_account_json_file, 'r') as sa_file:
        bucket_name = json.load(sa_file)['bucket']
    credentials = service_account.Credentials.from_service_account_file(
        args.sa_account_json_key)
    client = storage.Client(project=PROJECT_ID, credentials=credentials)
    bucket = storage.Bucket(client, bucket_name)
    configs = bucket.get_blob(zip_filename)
    configs.download_to_filename(zip_filename)

    with ZipFile(zip_filename) as zip_file:
        try:
            for member in zip_file.namelist():
                path, file_name = os.path.split(member)
                if os.path.splitext(
                        file_name)[1] in EXTENSIONS and path == 'board_config':
                    file_loc = zip_file.extract(member, extract_dir)
                    shutil.move(
                        file_loc,
                        os.path.join(extract_dir, os.path.basename(file_loc)))
                else:
                    zip_file.extract(member, extract_dir)
        finally:
            shutil.rmtree(os.path.join(extract_dir, 'board_config'))

Exemple #9

0

Afficher le fichier

Fichier : google_play_installs.py Projet : turlodales/covid19-app-system-public

def handler(event, context):
    # Get secret key
    secret_id = os.environ['secret_id']

    print("Access AWS Secrets Manager")
    secretsmanager = boto3.client('secretsmanager')
    response = secretsmanager.get_secret_value(SecretId=secret_id)

    key_file_name = "/tmp/api.json"

    print("Write API key to temporary file")
    key_file = open(key_file_name, "w")
    key_file.write(response['SecretString'])
    key_file.close()

    print("Connect to Google Play GCP Storage service via API key")
    storage_client = storage.Client.from_service_account_json(key_file_name)

    cloud_storage_bucket = 'some-bucket'

    print("Connect to GCP bucket")
    source_bucket = storage.Bucket(storage_client, cloud_storage_bucket)

    print("Connect to AWS S3 service")
    s3 = boto3.resource('s3')

    target_bucket_name = os.environ['target_bucket_name']

    print("Connect to AWS bucket " + target_bucket_name)
    target_bucket = s3.Bucket(target_bucket_name)

    prefix = 'stats/installs/installs_uk.nhs.covid19.production_'
    suffix = '_overview.csv'

    # Determine dates to get Google reports
    months = get_months_covering_data_as_of_today()
    length = len(months)
    i = 0
    while i < length:

        object_name = prefix + months[i] + suffix

        print(str(i + 1) + "/" + str(length) + ": " + object_name)

        try:

            # Download Google reports
            print("Download from GCP bucket")
            blob = source_bucket.get_blob(object_name)
            blob_text = blob.download_as_text(encoding='utf16')
            blob_bytes_utf8 = blob_text.encode('utf-8')

            # Upload reports to our AWS S3 bucket
            print("Upload to AWS bucket")
            target_bucket.put_object(Key=object_name, Body=blob_bytes_utf8)

        except Exception as e:
            print(e)

        i += 1

Exemple #10

0

Afficher le fichier

Fichier : _gcp_objstore.py Projet : ijlab/acquire

    def create_bucket(bucket, bucket_name):
        """Create and return a new bucket in the object store called
           'bucket_name'. This will raise an
           ObjectStoreError if this bucket already exists
        """
        new_bucket = _copy.copy(bucket)

        try:
            from google.cloud import storage as _storage
            client = new_bucket["client"]
            bucket_name = _sanitise_bucket_name(bucket_name,
                                                bucket["unique_suffix"])
            bucket_obj = _storage.Bucket(client, name=bucket_name)
            bucket_obj.location = bucket["bucket"].location
            bucket_obj.storage_class = "REGIONAL"
            new_bucket["bucket"] = client.create_bucket(bucket_obj)
            new_bucket["bucket_name"] = str(bucket_name)
        except Exception as e:
            # couldn't create the bucket - likely because it already
            # exists - try to connect to the existing bucket
            from Acquire.ObjectStore import ObjectStoreError
            raise ObjectStoreError(
                "Unable to create the bucket '%s', likely because it "
                "already exists: %s" % (bucket_name, str(e)))

        return new_bucket

Exemple #11

0

Afficher le fichier

Fichier : process.py Projet : stenpiren/churnr

def dump_features_to_gcs(ft_tables, dest, project, client):
    """ Dump generated tables as files on Google Cloud Storage"""
    logger.info('Dumping {} tables to {}...'.format(len(ft_tables), dest))

    gs_client = gcs.Client(project)
    split_uri = dest.split('/')
    filepath = '/'.join(split_uri[3:])
    bucket_name = split_uri[2]
    bucket = gcs.Bucket(gs_client, bucket_name)

    jobs = []
    for ft_table in ft_tables:
        filename_shard = ft_table.name + '{0:012d}'.format(0)
        blob = gcs.Blob(name=os.path.join(filepath, filename_shard),
                        bucket=bucket)
        if blob.exists():
            count = 0
            while blob.exists():
                logger.info(' -- Removing blob {}'.format(blob.path))
                blob.delete()

                count += 1
                filename_shard = ft_table.name + '{0:012d}'.format(count)
                blob = gcs.Blob(name=os.path.join(filepath, filename_shard),
                                bucket=bucket)

        path = dest + '/' + ft_table.name + '*'
        jobname = 'features_dump_job_' + str(uuid.uuid4())
        job = client.extract_table_to_storage(jobname, ft_table, path)
        job.destination_format = 'NEWLINE_DELIMITED_JSON'
        job.begin()

        jobs.append(job)

    return jobs

Exemple #12

0

Afficher le fichier

def create_regional_bucket(bucketname, region):
    '''
    Creates a storage bucket in the current region.
    '''
    client = storage.Client()
    b = storage.Bucket(bucketname)
    b.name = bucketname
    b.location = region
    try:
        final_bucket = client.create_bucket(b)
        return
    except google.api_core.exceptions.Conflict as ex:
        message = '''
            An attempt was made to create a bucket at %s.  However, the storage API indicated
            that this was an existing bucket.  Exception reported: %s  
        ''' % (bucketname, ex)
    except google.api_core.exceptions.BadRequest as ex:
        message = '''
            An attempt was made to create a bucket at %s.  However, the storage API indicated
            that there was an error during creation.  Exception reported: %s  
        ''' % (bucketname, ex)
    except Exception as ex:
        message = '''
            An attempt was made to create a bucket at %s.  However, there was an unexpected exception
            raised.  Exception reported: %s  
        ''' % (bucketname, ex)
    subject = 'Error with bucket creation'
    notify_admins(message, subject)

Exemple #13

0

Afficher le fichier

 def create_gcs_bucket(self):
     bucket = storage.Bucket(self.storage_client, name=self.bucket_name)
     if not bucket.exists():
         bucket.create(location=self.bucket_region)
         print('GCS bucket created.')
     else:
         print('GCS bucket found.')

Exemple #14

0

Afficher le fichier

Fichier : bucket_mover_tester.py Projet : cx-mab/professional-services

def _check_bucket_exists_and_delete(spinner, storage_client, bucket_name,
                                    project_name):
    """Checks if the bucket exists and delete it.

    If it already exists, prompt the user to make sure they want to delete it and everything in
    it.

    Args:
        spinner: The spinner displayed in the console
        storage_client: The storage client object used to access GCS
        bucket_name: The name of the bucket to check if it exists
        project_name: The name of the project to check the bucket exists in

    Raises:
        SystemExit: If the bucket already exists and the user does not choose to delete it
    """

    bucket = storage.Bucket(
        client=storage_client, name=bucket_name, user_project=project_name)
    if bucket.exists():
        spinner.hide()
        answer = raw_input(
            '\nWARNING!!! Bucket {} already exists in project {}\nType YES to confirm you want to'
            ' delete it: '.format(bucket_name, project_name))
        spinner.show()
        if answer != 'YES':
            spinner.fail('X')
            raise SystemExit()
        spinner.write('')
        bucket.delete(force=True)
        spinner.write('{} TESTING: Bucket {} deleted from project {}'.format(
            _CHECKMARK, bucket_name, project_name))

Exemple #15

0

Afficher le fichier

Fichier : test_app_gcs_transcript_utils.py Projet : fedex-2020/GoogleCloudPlatform

 def test_find_bucket_not_found_with_prefix(self):
     """Tests that exception is raised if bucket is not found.S"""
     bucket_foo = storage.Bucket(self.gcs_client, self.foo_name)
     bucket_iterator = iter([bucket_foo])
     self.assertRaises(NotFound,
                       gcs_transcript_utils.find_bucket_with_prefix,
                       bucket_iterator, self.bar_name)

Exemple #16

0

Afficher le fichier

Fichier : config.py Projet : easytester/kwola

    def listAllFilesInFolder(self, folder, useCacheBucket=False):
        if self['data_file_storage_method'] == 'local':
            dir = os.path.join(self.configurationDirectory, folder)
            if os.path.exists(dir):
                return os.listdir(dir)
            else:
                return []
        elif self['data_file_storage_method'] == 'gcs':
            if 'applicationId' not in self or self.applicationId is None:
                raise RuntimeError(
                    "Can't load object from google cloud storage without an applicationId, which is used to indicate the bucket."
                )

            storageClient = getSharedGCSStorageClient()

            bucketId = "kwola-testing-run-data-" + self.applicationId
            if useCacheBucket:
                bucketId += "-cache"
            applicationStorageBucket = storage.Bucket(storageClient, bucketId)

            blobs = applicationStorageBucket.list_blobs(prefix=folder,
                                                        delimiter="")

            return [blob.name[len(folder) + 1:] for blob in blobs]
        else:
            raise RuntimeError(
                f"Unexpected value {self['data_file_storage_method']} for configuration data_file_storage_method"
            )

Exemple #17

0

Afficher le fichier

Fichier : config.py Projet : easytester/kwola

    def deleteKwolaFileData(self, folder, fileName, useCacheBucket=False):
        filePath = os.path.join(folder, fileName)

        if self['data_serialization_encryption_key']:
            filePath += ".enc"

        try:
            if self['data_file_storage_method'] == 'local':
                os.unlink(os.path.join(self.configurationDirectory, filePath))
            elif self['data_file_storage_method'] == 'gcs':
                if 'applicationId' not in self or self.applicationId is None:
                    raise RuntimeError(
                        "Can't load object from google cloud storage without an applicationId, which is used to indicate the bucket."
                    )

                storageClient = getSharedGCSStorageClient()
                bucketId = "kwola-testing-run-data-" + self.applicationId
                if useCacheBucket:
                    bucketId += "-cache"
                applicationStorageBucket = storage.Bucket(
                    storageClient, bucketId)
                objectBlob = storage.Blob(filePath, applicationStorageBucket)
                objectBlob.delete()
                return
            else:
                raise RuntimeError(
                    f"Unexpected value {self['data_file_storage_method']} for configuration data_file_storage_method"
                )
        except FileNotFoundError:
            return
        except google.cloud.exceptions.NotFound:
            return

Exemple #18

0

Afficher le fichier

    def is_updated_after(self, bucket, object, ts):
        """
        Checks if an object is updated in Google Cloud Storage.

        :param bucket: The Google cloud storage bucket where the object is.
        :type bucket: str
        :param object: The name of the object to check in the Google cloud
            storage bucket.
        :type object: str
        :param ts: The timestamp to check against.
        :type ts: datetime.datetime
        """
        client = self.get_conn()
        bucket = storage.Bucket(client=client, name=bucket)
        blob = bucket.get_blob(blob_name=object)
        blob.reload()

        blob_update_time = blob.updated

        if blob_update_time is not None:
            import dateutil.tz

            if not ts.tzinfo:
                ts = ts.replace(tzinfo=dateutil.tz.tzutc())

            self.log.info("Verify object date: %s > %s", blob_update_time, ts)

            if blob_update_time > ts:
                return True

        return False

Exemple #19

0

Afficher le fichier

Fichier : http.py Projet : monty241/open-raadsinformatie

    def get_bucket(self):
        """Get the bucket defined by 'bucket_name' from the storage_client.
        Throws a ValueError when bucket_name is not set. If the bucket does not
        exist in GCS, a new bucket will be created.
        """
        if self._bucket:
            return self._bucket

        if not self.bucket_name:
            raise ValueError("The 'bucket_name' needs to be set.")

        try:
            self._bucket = self.storage_client.get_bucket(self.bucket_name)
        except (exceptions.NotFound, exceptions.Forbidden):
            bucket = storage.Bucket(self.storage_client, name=self.bucket_name)
            bucket.versioning_enabled = True
            bucket.lifecycle_rules = [{
                'action': {
                    'type': 'SetStorageClass',
                    'storageClass': 'NEARLINE'
                },
                'condition': {
                    'numNewerVersions': 1,
                    'matchesStorageClass': ['REGIONAL', 'STANDARD'],
                    'age': 30
                }
            }]
            try:
                bucket.create(location='europe-west4')
            except exceptions.Conflict:
                raise
            self._bucket = self.storage_client.get_bucket(self.bucket_name)

        return self._bucket

Exemple #20

0

Afficher le fichier

    def delete_blob(self, audioName: str):
        """Deletes a blob from the bucket."""

        bucket = storage.Bucket(self.storage_client, name=bucket_name)
        blob = bucket.blob(audioName)
        blob.delete()

        print("Blob {} deleted.".format(audioName))

Exemple #21

0

Afficher le fichier

Fichier : __init__.py Projet : NoopDog/azul

 def _get_blob(self, bucket_name: str, blob_name: str) -> gcs.Blob:
     """
     Get a Blob object by name.
     """
     with aws.service_account_credentials():
         client = gcs.Client()
     bucket = gcs.Bucket(client, bucket_name)
     return bucket.get_blob(blob_name)

Exemple #22

0

Afficher le fichier

Fichier : google_storage_wrapper.py Projet : GitHK/coldsync

def create_bucket(client, bucket_name, location, storage_class):
    # Set properties on a plain resource object.
    bucket = storage.Bucket(client, name=bucket_name)
    bucket.location = location
    bucket.storage_class = storage_class

    bucket = client.create_bucket(bucket)
    return bucket

Exemple #23

0

Afficher le fichier

def get_gcs_bucket(d_pl_options):
    path_segments = d_pl_options[fidscs_globals.OPT_NAME_WORK_DIR][5:].split(
        '/')
    gcs_bucket = path_segments[0]
    beam_gcp_project = d_pl_options[fidscs_globals.OPT_NAME_PROJECT]
    return gcs.Bucket(get_gcs_client(),
                      name=gcs_bucket,
                      user_project=beam_gcp_project)

Exemple #24

0

Afficher le fichier

    def create_bucket(self):
        """Creates a new bucket."""

        bucket = storage.Bucket(self.storage_client, name=bucket_name)

        bucket.location = "europe-west1"

        self.storage_client.create_bucket(bucket)

Exemple #25

0

Afficher le fichier

def list_bucket_contents():
    files = []
    client = storage.Client()
    bucket = storage.Bucket(client, 'broad-dsp-monster-hca-prod-lattice')
    all_blobs = list(client.list_blobs(bucket))
    for blob in all_blobs:
        files.append(blob.name)
    return files

Exemple #26

0

Afficher le fichier

def _uri_to_blob(creds, uri, conn=None):
    assert uri.startswith('gs://')
    url_tup = urlparse(uri)
    bucket_name = url_tup.netloc
    if conn is None:
        conn = calling_format.connect(creds)
    b = storage.Bucket(conn, name=bucket_name)
    return storage.Blob(url_tup.path.lstrip('/'), b)

Exemple #27

0

Afficher le fichier

 def is_dir(self, cred_id, bucket, path):
     client = self.__get_client(cred_id)
     bucket = storage.Bucket(client, bucket)
     prefix = f"{path}/"
     for blob in bucket.list_blobs(prefix=prefix):
         if blob.name.startswith(prefix):
             return True
     return False

Exemple #28

0

Afficher le fichier

def _uri_to_blob(creds, uri, conn=None):
    assert uri.startswith('gs://')
    url_tup = urlparse(uri)
    bucket_name = url_tup.netloc
    if conn is None:
        conn = calling_format.connect(creds)
    b = storage.Bucket(conn, name=bucket_name)
    # Use 10MB chunk size
    return storage.Blob(url_tup.path, b, chunk_size=1048576 * 10)

Exemple #29

0

Afficher le fichier

Fichier : config.py Projet : easytester/kwola

    def saveKwolaFileData(self,
                          folder,
                          fileName,
                          fileData,
                          useCacheBucket=False):
        filePath = os.path.join(folder, fileName)

        if self['data_serialization_encryption_key']:
            nonceData = os.urandom(16)

            keyHash = hashlib.sha256()
            keyHash.update(
                bytes(
                    self['data_serialization_encryption_key'] + folder +
                    fileName, "utf8"))
            cipher = Cipher(algorithms.AES(keyHash.digest()),
                            modes.CTR(nonceData))

            encryptor = cipher.encryptor()
            fileData = nonceData + encryptor.update(
                fileData) + encryptor.finalize()
            filePath += ".enc"
        else:
            cipher = None

        if self['data_file_storage_method'] == 'local':
            # Todo - we shouldn't be making these os.path.exists calls every single time we save file data
            # Its inefficient.
            if not os.path.exists(
                    os.path.join(self.configurationDirectory, folder)):
                try:
                    os.mkdir(os.path.join(self.configurationDirectory, folder))
                except FileExistsError:
                    # This just means there is a race condition and multiple threads attempted
                    # to create this folder at the same time.
                    pass

            with open(os.path.join(self.configurationDirectory, filePath),
                      'wb') as f:
                f.write(fileData)
        elif self['data_file_storage_method'] == 'gcs':
            if 'applicationId' not in self or self.applicationId is None:
                raise RuntimeError(
                    "Can't load object from google cloud storage without an applicationId, which is used to indicate the bucket."
                )

            storageClient = getSharedGCSStorageClient()
            bucketId = "kwola-testing-run-data-" + self.applicationId
            if useCacheBucket:
                bucketId += "-cache"
            applicationStorageBucket = storage.Bucket(storageClient, bucketId)
            objectBlob = storage.Blob(filePath, applicationStorageBucket)
            objectBlob.upload_from_string(fileData)
        else:
            raise RuntimeError(
                f"Unexpected value {self['data_file_storage_method']} for configuration data_file_storage_method"
            )

Exemple #30

0

Afficher le fichier

    def up(self):
        self.project = self.config.get('project', os.environ['GCLOUD_PROJECT'])
        self.gs = storage.Client(project=self.project)

        self.bucket_name = self.config['bucket']
        self.bucket = storage.Bucket(self.gs, name=self.bucket_name)
        self.ensure_bucket()

        super().up()