def test_blob():
    """Provides a pre-existing blob in the test bucket."""
    bucket = storage.Client().bucket(BUCKET)
    blob = Blob('encryption_test_sigil',
                bucket, encryption_key=TEST_ENCRYPTION_KEY_DECODED)
    content = 'Hello, is it me you\'re looking for?'
    blob.upload_from_string(content)
    return blob.name, content
def download_to_file(client, to_delete):
    # [START download_to_file]
    from google.cloud.storage import Blob

    client = storage.Client(project='my-project')
    bucket = client.get_bucket('my-bucket')
    encryption_key = 'c7f32af42e45e85b9848a6a14dd2a8f6'
    blob = Blob('secure-data', bucket, encryption_key=encryption_key)
    blob.upload_from_string('my secret message.')
    with open('/tmp/my-secure-file', 'wb') as file_obj:
        blob.download_to_file(file_obj)
    # [END download_to_file]

    to_delete.append(blob)
def download_to_file(client, to_delete):
    # [START download_to_file]
    from google.cloud.storage import Blob

    client = storage.Client(project="my-project")
    bucket = client.get_bucket("my-bucket")
    encryption_key = "c7f32af42e45e85b9848a6a14dd2a8f6"
    blob = Blob("secure-data", bucket, encryption_key=encryption_key)
    blob.upload_from_string("my secret message.")
    with open("/tmp/my-secure-file", "wb") as file_obj:
        blob.download_to_file(file_obj)
    # [END download_to_file]

    to_delete.append(blob)
Beispiel #4
0
def get_data():
    """
       Retrieve the data file from GCP Storage, and return
       the file as a dictionary.
       Create the file, with dummy data, if it don't exist.
    """
    # Introduce a delay here.
    do_delay()
    # Start of the actual function
    rtn = None
    storage_client = storage.Client()
    bucket_name = current_app.config.get('DATA_BUCKET_NAME')
    print('bucket_name=%s' % bucket_name)
    try:
        bucket = storage_client.get_bucket(bucket_name)
    except Exception as e:
        bucket = storage_client.create_bucket(bucket_name)
    # Test if the data file is found in the bucket, and
    # create it if it doesn't exist.
    blob = Blob(current_app.config.get('DATA_FILE_NAME'), bucket)
    if not blob.exists():
        # Open the initial data file
        init_fname = current_app.config.get('INIT_DATA_FILE')
        with open(init_fname) as infile:
            init_data = json.load(infile)
        # Copy it to the storage bucket
        blob.upload_from_string(json.dumps(init_data, indent=4))
    data_str = blob.download_as_string()
    rtn = json.loads(data_str)
    print('GOT BAG DATA:')
    print(json.dumps(rtn, indent=4))
    return rtn, blob
Beispiel #5
0
 def write_data(self,
                data: str,
                bucket_name: str,
                name: str,
                content_type: str = None):
     blob = Blob(name, self.get_bucket(bucket_name))
     blob.upload_from_string(data, content_type=content_type)
Beispiel #6
0
def handle_bq_lock(gcs_client: storage.Client, lock_blob: storage.Blob,
                   next_job_id: Optional[str]):
    """Reclaim the lock blob for the new job id (in-place) or delete the lock
    blob if next_job_id is None."""
    try:
        if next_job_id:
            if lock_blob.exists(client=gcs_client):
                lock_blob.upload_from_string(
                    next_job_id,
                    if_generation_match=lock_blob.generation,
                    client=gcs_client)
            else:  # This happens when submitting the first job in the backlog
                lock_blob.upload_from_string(next_job_id,
                                             if_generation_match=0,
                                             client=gcs_client)
        else:
            print("releasing lock at: "
                  f"gs://{lock_blob.bucket.name}/{lock_blob.name}")
            lock_blob.delete(
                if_generation_match=lock_blob.generation,
                client=gcs_client,
            )
    except google.api_core.exceptions.PreconditionFailed as err:
        raise exceptions.BacklogException(
            f"The lock at gs://{lock_blob.bucket.name}/{lock_blob.name} "
            f"was changed by another process.") from err
Beispiel #7
0
def test_blob():
    """Provides a pre-existing blob in the test bucket."""
    bucket = storage.Client().bucket(BUCKET)
    blob_name = "test_blob_{}".format(uuid.uuid4().hex)
    blob = Blob(
        blob_name,
        bucket,
        encryption_key=TEST_ENCRYPTION_KEY_DECODED,
    )
    content = "Hello, is it me you're looking for?"
    blob.upload_from_string(content)

    yield blob.name, content

    # To delete an encrypted blob, you have to provide the same key
    # used for the blob. When you provide a wrong key, you'll get
    # NotFound.
    try:
        # Clean up for the case that the rotation didn't occur.
        blob.delete()
    except NotFound as e:
        # For the case that the rotation succeeded.
        print("Ignoring 404, detail: {}".format(e))
        blob = Blob(blob_name,
                    bucket,
                    encryption_key=TEST_ENCRYPTION_KEY_2_DECODED)
        blob.delete()
Beispiel #8
0
def upload(csvdata, bucketname, blobname):
    client = storage.Client()
    bucket = client.get_bucket(bucketname)
    blob = Blob(blobname, bucket)
    blob.upload_from_string(csvdata)
    gcslocation = 'gs://{}/{}'.format(bucketname, blobname)
    logging.info('Uploaded {} ...'.format(gcslocation))
    return gcslocation
Beispiel #9
0
 def _set_data(self, **kwargs) -> dict:
     df = kwargs.get('data_frame')
     ts = time.time_ns()
     blob_name_parts = os.path.splitext(self.__blob_name)
     blob_name = blob_name_parts[0] + '_' + str(ts) + blob_name_parts[1]
     blob = Blob(blob_name, self.__bucket)
     blob.upload_from_string(df.to_csv(), self.__file_format)
     return dict(record_count=df.shape[0])
def test_blob():
    """Provides a pre-existing blob in the test bucket."""
    bucket = storage.Client().bucket(BUCKET)
    blob = Blob('encryption_test_sigil',
                bucket,
                encryption_key=TEST_ENCRYPTION_KEY_DECODED)
    content = 'Hello, is it me you\'re looking for?'
    blob.upload_from_string(content)
    return blob.name, content
Beispiel #11
0
def store_well(uwi):
    bucketName = request.get_json()['bucket']

    logging.info('Storing well {} to bucket {}.'.format(uwi, bucketName))

    # Fetch entity with id
    ds = datastore.Client(project=globalproject)
    kind = wellKind
    query = ds.query(kind=kind)
    query.order = ['uwi']
    result = get_fetch_results(query, uwi)
    if len(result) == 0:
        return make_response("Well not found", 404)
    entity = result[0]

    geolocation = {}
    geolocation['longitude'] = entity['longitude']
    geolocation['latitude'] = entity['latitude']

    wellObj = {}
    wellObj['md'] = entity['md']
    wellObj['uwi'] = entity['uwi']
    wellObj['name'] = entity['name']
    wellObj['location'] = geolocation
    wellObj['spuddate'] = entity['spuddate']

    jsonObj = json.dumps(wellObj)

    gcs = storage.Client(project=globalproject)

    try:
        # Check if the bucket exists
        bucket = gcs.get_bucket(bucketName)

        #store json to bucket
        filename = str(id)
        blob = Blob(filename, bucket)
        try:
            data = jsonObj.encode('utf-8')
            blob.upload_from_string(data, content_type='text/plain')
            logging.info("File " + filename + " stored in bucket " +
                         bucketName)
            return make_response("Successfully stored in GCS", 200)
        except:
            return make_response('Error: Cannot store json object', 404)
    except exceptions.NotFound:
        return make_response(
            'Error: Bucket {} does not exist.'.format(bucketName), 404)
    except exceptions.BadRequest:
        return make_response(
            'Error: Invalid bucket name {}'.format(bucketName), 400)
    except exceptions.Forbidden:
        return make_response(
            'Error: Forbidden, Access denied for bucket {}'.format(bucketName),
            403)
Beispiel #12
0
def add(x):
    time.sleep(1)
    bucket = client.get_bucket('testdownload12')
    blob = Blob("%s" % x, bucket)
    x = x.encode("UTF8")
    whoisdata = whois.whois(x)

    whoisdata = json.dumps(whoisdata, indent=4, sort_keys=True, default=str)

    blob.upload_from_string(whoisdata)
    return x
 def get_container(self):
     try:
         container = self.storage_client.get_bucket(self.CONTAINER)
         # Test if the container is accessible
         blob = Blob('AccessTestByServiceFabrikPythonLibrary', container)
         blob.upload_from_string(
             'Sample Message for AccessTestByServiceFabrikPythonLibrary', content_type='text/plain')
         blob.delete()
         return container
     except Exception as error:
         self.logger.error('[GCP] [STORAGE] ERROR: Unable to find or access container {}.\n{}'.format(
             self.CONTAINER, error))
         return None
Beispiel #14
0
def download_to_file(client, to_delete):
    # [START download_to_file]
    from google.cloud.storage import Blob

    client = storage.Client(project='my-project')
    bucket = client.get_bucket('my-bucket')
    encryption_key = 'c7f32af42e45e85b9848a6a14dd2a8f6'
    blob = Blob('secure-data', bucket, encryption_key=encryption_key)
    blob.upload_from_string('my secret message.')
    with open('/tmp/my-secure-file', 'wb') as file_obj:
        blob.download_to_file(file_obj)
    # [END download_to_file]

    to_delete.append(blob)
Beispiel #15
0
def download_to_file(to_delete):
    # [START download_to_file]
    from google.cloud.storage import Blob

    client = storage.Client(project="my-project")
    bucket = client.get_bucket("my-bucket")
    encryption_key = "c7f32af42e45e85b9848a6a14dd2a8f6"
    blob = Blob("secure-data", bucket, encryption_key=encryption_key)
    blob.upload_from_string("my secret message.")
    with open("/tmp/my-secure-file", "wb") as file_obj:
        client.download_to_file(blob, file_obj)
    # [END download_to_file]

    to_delete.append(blob)
Beispiel #16
0
def upload_blob_string(bucket_name, csvString, destination_blob_name):
    client = storage.Client()
    bucket = client.get_bucket(bucket_name)
    blob = Blob(destination_blob_name, bucket)
    return blob.upload_from_string(
        data=csvString,
        content_type='text/csv')
Beispiel #17
0
 def upload_data(self, data, dest, contentType=None):
     if self.use_gcs:
         args = {}
         if not contentType is None:
             args = {'content_type': contentType}
         blob = Blob(dest, self.gcs_bucket)
         blob.upload_from_string(data, **args)
     else:
         args = {
             'Body': data,
             'Bucket': self.bucket,
             'Key': dest,
         }
         if not contentType is None:
             args['ContentType'] = contentType
         self.s3.put_object(**args)
    def store_file_to_gcs(self, bucket_name, datastr, filename):

        if self.check_bucket(bucket_name) == False:
            self.create_bucket(bucket_name)
            print 'bucket created!'

        bucket = self.gcs.get_bucket(bucket_name)
        blob = Blob(filename, bucket)

        try:
            blob.upload_from_string(datastr)
            print 'blob stored ok'
            return True
        except Exception:
            print('Error: Cannot upload the file {}'.format(filename))
            return False
Beispiel #19
0
    def store_file_to_gcs(self, bucket_name, filename, content):

        if self.check_bucket(bucket_name):
            try:
                print "content: ", content
                print "inside store_file_to_gcs"
                bucket = self.gcs.get_bucket(bucket_name)
                print bucket
                blob = Blob(filename, bucket)
                print "blob", blob
                blob.upload_from_string(content, content_type='application/json')
                print "upload successful"
                return True
            except IOError as e:
                print e
                #print ('Error: Cannot find the file {}'.format(filename))
        return False
Beispiel #20
0
def handle_bq_lock(gcs_client: storage.Client,
                   lock_blob: storage.Blob,
                   next_job_id: Optional[str],
                   table: bigquery.TableReference,
                   retry_attempt_cnt: Optional[int] = None):
    """Reclaim the lock blob for the new job id (in-place) or delete the lock
    blob if next_job_id is None."""
    try:
        if next_job_id:
            lock_blob_contents = json.dumps(
                dict(job_id=next_job_id,
                     table=table.to_api_repr(),
                     retry_attempt_cnt=retry_attempt_cnt))
            logging.log_with_table(
                table,
                f"Writing the following content to lock_blob {lock_blob.name}:"
                f" {dict(job_id=next_job_id, table=table.to_api_repr(), retry_attempt_cnt=retry_attempt_cnt)}"
            )
            if lock_blob.exists(client=gcs_client):
                lock_blob.upload_from_string(
                    lock_blob_contents,
                    if_generation_match=lock_blob.generation,
                    client=gcs_client)
            else:  # This happens when submitting the first job in the backlog
                lock_blob.upload_from_string(
                    lock_blob_contents,
                    if_generation_match=0,  # noqa: E126
                    client=gcs_client)
        else:
            logging.log_with_table(
                table, "releasing lock at: "
                f"gs://{lock_blob.bucket.name}/{lock_blob.name}")
            lock_blob.delete(
                if_generation_match=lock_blob.generation,
                client=gcs_client,
            )
    except (google.api_core.exceptions.PreconditionFailed,
            google.api_core.exceptions.NotFound) as err:
        if isinstance(err, google.api_core.exceptions.PreconditionFailed):
            raise exceptions.BacklogException(
                f"The lock at gs://{lock_blob.bucket.name}/{lock_blob.name} "
                f"was changed by another process.") from err
        logging.log_with_table(
            table,
            "Tried deleting a lock blob that was either already deleted "
            "or never existed.")
Beispiel #21
0
def master(event, context):
    try:
        pubsub_message = base64.b64decode(event['data']).decode('utf-8')           
        msg = json.loads(pubsub_message)

        logging.debug("Image: " + msg['id'])
        
        ##1. Get image details
        response = requests.get("http://interview.agileengine.com/images/" + msg['id'],headers={'Authorization': 'Bearer ' + msg['token']})
        image_detail = json.loads(response.text)

        ##2. Save metadata into Firestore 
        db = firestore.Client()
        
        data_tag = []
        tag_values = image_detail['tags'].strip().split(" ")
        for tag in tag_values:
            data_tag.append({ u'tag': tag })

        data = {
                u'id': msg['id'],
                u'author': image_detail['author'],
                u'camera': image_detail['camera'],
                u'tags': data_tag,
                u'full_picture': image_detail['full_picture'],
                u'cropped_picture': image_detail['cropped_picture'],
                u'updated': datetime.today()
            }
            
        db.collection(u'Photos').document(msg['id']).set(data)
        
        ##3. Download image from URL
        datatoupload = requests.get(image_detail['full_picture'], stream = True)
        datatoupload.raw.decode_content = True

        ##4. Save image into Google Bucket
        file_name = msg['id'] + ".jpg"
        _google_client = storage.Client()
        bucket = _google_client.get_bucket('photos_cache')
        blob = Blob(file_name, bucket)
        blob.upload_from_string(datatoupload.raw.data)

    except Exception as e:
        logging.error(str(e))
Beispiel #22
0
def deidentify_with_mask(data,done):

    # Convert the project id into a full resource id.
    parent = dlp.project_path(PROJECT_ID)

    # Construct inspect configuration dictionary
    inspect_config = {
        'info_types': [{'name': info_type} for info_type in INFO_TYPES]
    }

    # Construct deidentify configuration dictionary
    deidentify_config = {
        'info_type_transformations': {
            'transformations': [
                {
                    'primitive_transformation': {
                        'character_mask_config': {
                            'masking_character': 'X',
                            'number_to_mask': 0
                        }
                    }
                }
            ]
        }
    }


    storage_client = storage.Client()
    bucket = storage_client.get_bucket(SENSITIVE_BUCKET)

    blobs = bucket.list_blobs()

    for blob in blobs:
        gcs_file = blob.download_as_string()
        #contents = gcs_file.readline()
        item = {'value': gcs_file}
         # Call the API
        response = dlp.deidentify_content(
        parent, inspect_config=inspect_config,
        deidentify_config=deidentify_config, item=item)    
        masked_item = response.item.value
        destination_bucket = storage_client.get_bucket(MASKED_BUCKET)
        masked_blob = Blob(blob.name,destination_bucket)
        masked_blob.upload_from_string(masked_item)
Beispiel #23
0
def upload():
    """Process the uploaded file and upload it to Google Cloud Storage."""
    uploaded_file = request.files['file']
    bucket_name = other_bucket
    size = len(uploaded_file.read())
    extension = os.path.splitext(uploaded_file.filename)[1]
    jpg = ".jpg"

    #    if size > 2000:
    #    	return "Cannot allow file larger than 20 kb"

    if extension == jpg:
        bucket_name = pic_bucket

    client = storage.Client()

    bucket = client.get_bucket(bucket_name)

    #encoded_key = base64.b64encode(key).decode('utf-8')

    #encryption_key = base64.b64decode(encoded_key)
    blob = Blob(uploaded_file.filename, bucket)
    blob.upload_from_string(uploaded_file.read())
    #blob.make_public()

    #    if blob.exists():
    #    	return "already exists"
    #    else:
    #    	return "doesnt exist"

    #    blob.upload_from_file(uploaded_file)
    #    #Print filesize
    #    object_list=[]
    #    total_size = 0
    #    blobs = bucket.list_blobs()
    #    for blob in blobs:
    #        object_list.append(blob.name+' '+str(blob.size))
    #        object_list.append("<br>")
    #        total_size = total_size + blob.size
    #    str1 = ''.join(object_list)

    #    return str1 + 'Total Size:' +str(total_size) + '<br>' + 'Time Created' + blob.metadata
    return str(blob.time_created)
Beispiel #24
0
def sendBucket(id):
    try:
        ds = datastore.Client(project='hackathon-team-011')
        key = ds.key('capitals', int(id))
        entity = ds.get(ds.key('capitals', int(id)))
        if entity is None:
            response = {'code': 404, 'message': 'Capital not found'}
            return jsonify(response), 404

        gcs = storage.Client(project='hackathon-team-011')

        obj = request.get_json()
        bucketname = obj['bucket']

        bucket = gcs.get_bucket(bucketname)
        filename = str(id)

        blob = Blob(filename, bucket)

        #fs = open(filename, 'w')
        #fs.write(json.dumps(parse_capital(entity)))
        #fs.close()
        #fs = open(filename, 'r')
        blob.upload_from_string(json.dumps(parse_capital(entity)))
        #fs.close()

        response = {
            'code': 200,
            'message':
            'Capital successfully stored in GCS in file: ' + filename
        }
        return jsonify(response), 200

    except exceptions.NotFound:
        response = {
            'code': 404,
            'message': 'Error: Bucket {} does not exists.'.format(bucketname)
        }
        return jsonify(response), 404
    except Exception as e:
        response = {'code': 0, 'message': 'Unexpected error' + e.message}
        return jsonify(response)
Beispiel #25
0
    def store(self, bucket_name, id, entity):
        bucket_exists = self.check_bucket(bucket_name)

        if bucket_exists is not None and not bucket_exists:
            try:
                print('creating bucket {}'.format(bucket_name))
                bucket = self.gcs.create_bucket(bucket_name)
                blob = Blob(id, bucket)
                js = json.dumps(entity)
                resp = Response(js, status=200, mimetype='application/json')
                blob.upload_from_string(entity)
            except Exception as e:
                print "Error: Create bucket Exception"
                print e
                return None
        else:
            bucket = self.gcs.get_bucket(bucket_name)
            blob = Blob(id, bucket)
            js = json.dumps(entity)
            resp = Response(js, status=200, mimetype='application/json')
            blob.upload_from_string(entity)

        return 200
Beispiel #26
0
def diarize(data, context):

    speech_file = data['name']
    bucket = data['bucket']
    print('Bucket {}'.format(bucket))
    print('File {}'.format(speech_file))
    filename_uri = "gs://" + bucket + "/" + speech_file
    print('File name uri {}'.format(filename_uri))
    dest_file = speech_file + ".txt"

    audio = speech.types.RecognitionAudio(uri=filename_uri)
    config = speech.types.RecognitionConfig(
        encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=8000,
        language_code='en-US',
        enable_speaker_diarization=True,
        diarization_speaker_count=2)

    operation = client.long_running_recognize(config, audio)
    print('Waiting for operation to complete...')

    # The transcript within each result is separate and sequential per result.
    # However, the words list within an alternative includes all the words
    # from all the results thus far. Thus, to get all the words with speaker
    # tags, you only have to take the words list from the last result:
    #print(response.results)
    response = operation.result(timeout=300)
    result = response.results[-1]
    words_info = result.alternatives[0].words
    writestring = ''
    for word_info in words_info:
        writestring += "Word: {} Speaker Tag: {}\n".format(
            word_info.word, word_info.speaker_tag)
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket)
    blob = Blob(dest_file, bucket)
    blob.upload_from_string(writestring)
Beispiel #27
0
def upload_blob_string(bucket_name, data_string, destination_blob_name,
                       metadata, content_type):
    logging.debug(
        'upload_blob_string(...) \n\tbucket_name={}\n\tlen data_string={}\n\t'
        'destination_blob_name={}\n\tmetadata={}\n\tcontent_type={}'.format(
            bucket_name, len(data_string), destination_blob_name, metadata,
            content_type))
    client = storage.Client()
    bucket = client.get_bucket(bucket_name)
    blob = Blob(destination_blob_name, bucket)
    if blob.metadata is None:
        blob.metadata = metadata
    else:
        blob.metadata.update(metadata)
    return blob.upload_from_string(data=data_string, content_type=content_type)
def execute(request):
    # Check Payload
    if not request.form or not 'external_ref' in request.form:
        return ('Bad request: External Ref is required', 400)
    if not request.form or not 'external_key' in request.form:
        return ('Bad request: External Key is required', 400)
    if not request.files or not 'file' in request.files:
        return ('Bad request: File is required', 400)
           
    # Get external ref
    external_ref = request.form.get('external_ref')
    # Get external key
    external_key = request.form.get('external_key')
    
    # External ref uuid name
    external_ref_uuid_name = 'extref=%s%s' % (slugify.slugify(external_ref), external_key)
    # Generate UUID external ref
    external_ref_uuid = uuid.uuid5(uuid.NAMESPACE_X500, external_ref_uuid_name)
    # Get external ref uuid str
    external_ref_uuid_str = str(uuid.uuid5(uuid.NAMESPACE_X500, external_ref_uuid_name))
    # Get unique key external ref
    external_ref_key = hashlib.md5(external_ref_uuid.bytes).hexdigest()

    try: # Get/Create bucket
        client_storage = storage.Client()
        bucket = client_storage.create_bucket(BUCKET_NAME)
    except google.api_core.exceptions.Conflict: # Bucket already exists
        bucket = client_storage.get_bucket(BUCKET_NAME)
    except Exception as e: # Any exception
        return (u'Error: %s' % e, 500) 
    
    # Bucket file uuid name
    encryption_key_name = 'extrefkey=%s' % external_ref_key
    # Generate UUID Bucket file
    encryption_key_uuid = uuid.uuid5(uuid.NAMESPACE_X500, encryption_key_name)
    # Generate encryption key Bucket file
    encryption_key = hashlib.md5(encryption_key_uuid.bytes).hexdigest()

    try: # Get file
        file = request.files.get('file')
        # Generate path file
        path_file = "%s/%s/%s" % (str(external_ref_uuid), str(encryption_key_uuid), file.filename)
        # Create encripty Blob
        blob = Blob(name=path_file, bucket=bucket, encryption_key=encryption_key)
    except Exception as e:
        return (u'Error: %s' % e, 500)
    
    try: # Read and upload file into bucket
        buff = file.read()
        blob.upload_from_string(buff, content_type=file.content_type)
    except Exception as e: # Any exception
        return (e, 500)

    # Generate File hash
    file_hash = hashlib.md5(buff).hexdigest()

    try: # Create entity
        client_datastore = datastore.Client()
        # Generate Datastore Key
        item_key = client_datastore.key(DS_KIND, "%s-%s" % (external_ref_key, file_hash))
        # Entity
        item = datastore.Entity(key=item_key,) # Insert user key
        item['external_ref_key'] = external_ref_key
        item['file_hash'] = file_hash
        item['file_path'] = path_file
        item['file_content_type'] = file.content_type
        client_datastore.put(item) 
    except Exception as e: # Any exception
        return (u'Error: %s' % e, 500)

    # Data return
    data = json.dumps({'filename': file.filename,'hash' : file_hash})
    # Response
    return Response(data, mimetype='application/json')
Beispiel #29
0
def upload_document(document, uid):
    """
        Uploads given document to given user's folder

        If a file like "documentname.ext" already exists,
        find next best name as so: "documentname_({i})" where {i} is a number between
        1 and the number of similarly-named files + 1

        Args:
            document - File blob given by user
            uid - firestore user's document id

    """
    try:
        new_file = None
        flags = re.U | re.I
        file_no_ext = re.sub(r"\.[a-z]+$", "", document.filename, flags=flags)
        file_ext = re.search(r"\.[a-z]+$", document.filename, flags).group(0)

        # Lists current files in the database
        blobs = storage_client.list_blobs(bucket,
                                          prefix=get_cloud_folder(uid) +
                                          file_no_ext)

        # Grabs all files with the same basic file name
        files = [
            re.search(r"[^\\\/:*?\"<>]+\.[a-z]+$", blob.name, flags).group(0)
            for blob in blobs
        ]
        next_file_name = None

        # If no similarly-named files or the first one is not an exact match
        if len(files) == 0 or files[0] != document.filename:
            # New file keeps its original name
            new_file = Blob(
                get_cloud_folder(uid) + document.filename,
                bucket,
                encryption_key=encryption_key,
            )

        if new_file == None:
            for i, blob in enumerate(files):
                # Tries to find an open similar file name between 1 and the number of files
                next_file_name = str.format("{}_({}){}", file_no_ext, i + 1,
                                            file_ext)
                if next_file_name not in files:
                    print(next_file_name, blob)
                    new_file = Blob(
                        get_cloud_folder(uid) + next_file_name,
                        bucket,
                        encryption_key=encryption_key,
                    )
                    break

        # If no open filename
        if new_file == None:
            # New file name is the number of files + 1
            next_file_name = str.format("{}_({}){}", file_no_ext, i + 1,
                                        file_ext)
            new_file = Blob(
                get_cloud_folder(uid) + next_file_name,
                bucket,
                encryption_key=encryption_key,
            )

        # Creates file from the given blob and uploads it to Google Cloud Storage
        new_file.upload_from_string(document.read(),
                                    content_type=document.content_type)

    except Exception as e:
        print(e)
Beispiel #30
0
 def _write_string(self, bucket_name, file_name, data):
     log.info(f"Storing data in {bucket_name}:{file_name}")
     blob = Blob(file_name, self.get_or_create_bucket(bucket_name))
     blob.upload_from_string(data)
 def __create_new_timestamp_blob(self):
     blob = Blob(self.last_timestamp_filename, self.bucket)
     blob.upload_from_string('0')
     return blob
Beispiel #32
0
def sync_box_to_gcs(box: BoxClient, bucket: Bucket,
                    cache: dict) -> List[Future]:
    """Sync Box account files to a GCS bucket.

    For versioned Box files, the latest version will always be synced back to the GCS bucket. 
    Non-current versions will not be deliberately preserved, though syncing to a versioned 
    bucket will have this effect.
    
    Arguments:
        box {BoxClient} -- [description]
        bucket {Bucket} -- [description]
        cache {dict} -- A dictionary that will opportunistically be filled with Box item paths/objects.
    
    Returns:
        List[Future] -- [description]
    """
    # constuct an executor for copy tasks
    executor = ThreadPoolExecutor(max_workers=cpu_count())
    futures = []
    # sync box files back to GCS
    for path, item in box_walk(box.root_folder()):
        LOG.debug("Box directory walk found: {}".format(path))
        # cache the Box item in module scope
        cache[path] = item

        # get the blob to overwrite, or make a new one
        blob_exists = True
        blob = bucket.get_blob(path)
        if not blob:
            blob_exists = False
            blob = Blob(path, bucket)

        # branch depending on whether file or folder
        if item.type == "folder":
            if not blob_exists:
                blob.metadata = {
                    # Not an important value.
                    BOX_MTIME_KEY: datetime.now().isoformat()
                }
                # create directory marker, used by UI and FUSE
                LOG.info("Creating directory marker in GCS: {}".format(
                    blob.name))
                blob.upload_from_string(b'')

        elif item.type == "file":
            box_file = box.file(item.id)
            box_mtime = box_file.get().modified_at
            if should_copy_box_to_gcs(box_file, box_mtime, blob, blob_exists):
                LOG.info(
                    "Box file {} is not found in GCS or updated since last sync. Copying to {}."
                    .format(item.name, blob.name))
                blob.metadata = {
                    BOX_MTIME_KEY: box_mtime
                }  # This change will "follow" the upload into GCS
                temp_file = BytesIO()
                reader = box_file.download_to
                writer = blob.upload_from_file

                future = executor.submit(concurrent_upload, reader, writer,
                                         temp_file)
                futures.append(future)

        else:
            LOG.info("Ignoring item of type {}".format(item.type))

    return futures
 def _create_folder_file():
     folder_key = path.join(root, '{0}_$folder$'.format(folder_name))
     blob = Blob(folder_key, gcp_bucket)
     if not blob.exists():
         blob.upload_from_string(data='')