def add_thumbnail(file_name): ''' add a thumbnail for the specific file''' src_uri = boto.storage_uri( '{}/{}'.format(conf.photos_bucket_name, file_name), 'gs') dest_uri = boto.storage_uri( '{}/{}'.format(conf.thumbnails_bucket_name, file_name), 'gs') try: new_key = dest_uri.new_key() except boto.exception.NoAuthHandlerFound as e: logging.error(e) return None # Create a file-like object for holding the photo contents. photo = StringIO.StringIO() src_uri.get_key().get_file(photo) thumbnail = StringIO.StringIO() im = Image.open(photo) im.thumbnail((260, 260)) im.save(thumbnail, 'JPEG') thumbnail.seek(0) # save the thumbnail try: new_key.set_contents_from_file(thumbnail) new_key.make_public() except boto.exception.GSResponseError as e: logging.error(e) # Do we have the credentials file set up? boto_cred_file = os.path.expanduser('~') + '/.boto' if not os.path.exists(boto_cred_file): logging.error( 'Credentials file {} was not found.'.format(boto_cred_file))
def x_test_basic(self): from ambry.client.bigquery import BigQuery bg = BigQuery() import StringIO import os import shutil import tempfile import time from gslib.third_party.oauth2_plugin import oauth2_plugin import boto # URI scheme for Google Cloud Storage. GOOGLE_STORAGE = 'gs' # URI scheme for accessing local files. LOCAL_FILE = 'file' project_id = 128975330021 header_values = { "x-goog-api-version": "2", "x-goog-project-id": project_id } uri = boto.storage_uri('', GOOGLE_STORAGE) for bucket in uri.get_all_buckets(): print bucket.name uri = boto.storage_uri(bucket.name, GOOGLE_STORAGE) for obj in uri.get_bucket(): print '%s://%s/%s' % (uri.scheme, uri.bucket_name, obj.name)
def read_file(file_name, bucket_name): if(DEBUG): print "Reading File: " + file_name # Create a file-like object for holding the object contents. object_contents = StringIO.StringIO() src_uri = boto.storage_uri(bucket_name + '/' + file_name, GOOGLE_STORAGE) # get_file() doesn't return the file contents # it writes the file contents to "object_contents" instead src_uri.get_key(headers=HEADER_VALUES).get_file(object_contents) local_dst_uri = boto.storage_uri(file_name, LOCAL_FILE) bucket_dst_uri = boto.storage_uri(bucket_name + '/' + file_name, GOOGLE_STORAGE) for dst_uri in (local_dst_uri, bucket_dst_uri): object_contents.seek(0) dst_uri.new_key(headers=HEADER_VALUES).set_contents_from_file(object_contents) object_contents.close() if(DEBUG): print "Read data from GCS to file: " + file_name return True
def _SetStatus(self, status): if self.base: uri_string = 'gs://%s/output/stitch.status' % self.base logger.info('Setting status %s to %s', status, uri_string) uri = boto.storage_uri(uri_string) uri.connect(config.gs_access_key,config.gs_secret_key) uri.new_key().set_contents_from_string( status, policy='public-read', headers={'Content-Type': 'text/plain'}) # Construct a state object to upload state = {} state['status'] = status state['update_time'] = time.time() state['output_base'] = '%s/output' % self.base state['input'] = [{ 'full': os.path.basename(i), 'thumb': os.path.basename(t)} for (i, t) in zip(self.input_files, self.input_thumbs)] if status == 'DONE': state['output'] = { 'full': 'stitch.jpg', 'thumb': 'stitch-thumb.jpg' } if status in ('DONE', 'FAILED'): state['log'] = 'stitch.log' uri_string = 'gs://%s/output/stitch.state' % self.base state_json = json.dumps(state, indent=2) logger.info('Uploading state to %s\n%s', uri_string, state_json) uri = boto.storage_uri(uri_string) uri.connect(config.gs_access_key,config.gs_secret_key) uri.new_key().set_contents_from_string( state_json, policy='public-read', headers={'Content-Type': 'text/plain'}) else: logger.error('No upload path for status %s', status)
def _Check1(): listing1 = self.RunGsUtil( ['ls', '-la', suri(bucket1_uri)], return_stdout=True).split('\n') listing2 = self.RunGsUtil( ['ls', '-la', suri(bucket2_uri)], return_stdout=True).split('\n') # 2 lines of listing output, 1 summary line, 1 empty line from \n split. self.assertEquals(len(listing1), 4) self.assertEquals(len(listing2), 4) # First object in each bucket should match in size and version-less name. size1, _, uri_str1, _ = listing1[0].split() self.assertEquals(size1, str(len('data0'))) self.assertEquals(storage_uri(uri_str1).object_name, 'k') size2, _, uri_str2, _ = listing2[0].split() self.assertEquals(size2, str(len('data0'))) self.assertEquals(storage_uri(uri_str2).object_name, 'k') # Similarly for second object in each bucket. size1, _, uri_str1, _ = listing1[1].split() self.assertEquals(size1, str(len('longer_data1'))) self.assertEquals(storage_uri(uri_str1).object_name, 'k') size2, _, uri_str2, _ = listing2[1].split() self.assertEquals(size2, str(len('longer_data1'))) self.assertEquals(storage_uri(uri_str2).object_name, 'k')
def upload_to_gc(fileobj): filename = helpers.generate_filename(fileobj.name) ext = os.path.splitext(fileobj.name)[-1].lower() # ext = save_tmp_file(fileobj, filename, ext) dst_uri = boto.storage_uri(settings.GS_STORAGE_BUCKET + '/' + filename + ext, settings.GS_URI_SCHEME) dst_uri.new_key().set_contents_from_filename("/tmp/" + filename + ext) gcloud_path = dst_uri.object_name os.remove("/tmp/" + filename + ext) if ext in ['.jpg']: # upload thumbnail dst_uri = boto.storage_uri(settings.GS_STORAGE_BUCKET + '/' + filename + ext + ".thumbnail.jpg", settings.GS_URI_SCHEME) dst_uri.new_key().set_contents_from_filename("/tmp/" + filename + ext + '.thumbnail.jpg') os.remove("/tmp/" + filename + ext + ".thumbnail.jpg") elif ext in ['.dcm', '.dicom']: # upload thumbnail dst_uri = boto.storage_uri(settings.GS_STORAGE_BUCKET + '/' + filename + ext + ".thumbnail.jpg", settings.GS_URI_SCHEME) dst_uri.new_key().set_contents_from_filename("/tmp/" + filename + ext + '.thumbnail.png') os.remove("/tmp/" + filename + ext + ".thumbnail.png") return (gcloud_path, ext)
def open(path, mode="rb", **kw): uri = urlparse(path) if uri.scheme in ['file', 's3', '']: return smart_open(path, mode=mode, **kw) elif uri.scheme in ['gs']: if mode in ('r', 'rb'): storage_uri = boto.storage_uri(uri.netloc, uri.scheme) bucket = storage_uri.get_bucket(uri.netloc) key = bucket.get_key(uri.path) if key is None: raise KeyError(uri.path) return GSOpenRead(key, **kw) elif mode in ('w', 'wb'): storage_uri = boto.storage_uri(uri.netloc + '/' + uri.path, uri.scheme) key = storage_uri.new_key() if key is None: raise KeyError(uri.path) return GSOpenWrite(key, **kw) else: raise NotImplementedError( "file mode %s not supported for %r scheme", mode, uri.scheme) else: raise NotImplementedError("scheme %r is not supported", uri.scheme)
def test_invalid_scheme(self): uri_str = 'mars://bucket/object' try: boto.storage_uri(uri_str, validate=False, suppress_consec_slashes=False) except InvalidUriError as e: self.assertIn('Unrecognized scheme', e.message)
def download(srcName, dstName) : "Download the files." src_uri = boto.storage_uri(bucketName + "/" + srcName, "gs") dst_uri = boto.storage_uri(dstName, "file") "Append the object name to the directory name." dst_key_name = dst_uri.object_name# + os.sep + src_uri.object_name "Use the new destination key name to create a new destination URI." new_dst_uri = dst_uri.clone_replace_name(dst_key_name) print new_dst_uri "Create a new destination key object." dst_key = new_dst_uri.new_key() "Retrieve the source key and create a source key object." src_key = src_uri.get_key() "Create a temporary file to hold our copy operation." tmp = tempfile.TemporaryFile() src_key.get_file(tmp) tmp.seek(0) "Download the object." dst_key.set_contents_from_file(tmp) return
def download(srcName, dstName): "Download the files." src_uri = boto.storage_uri(bucketName + "/" + srcName, "gs") dst_uri = boto.storage_uri(dstName, "file") "Append the object name to the directory name." dst_key_name = dst_uri.object_name # + os.sep + src_uri.object_name "Use the new destination key name to create a new destination URI." new_dst_uri = dst_uri.clone_replace_name(dst_key_name) print new_dst_uri "Create a new destination key object." dst_key = new_dst_uri.new_key() "Retrieve the source key and create a source key object." src_key = src_uri.get_key() "Create a temporary file to hold our copy operation." tmp = tempfile.TemporaryFile() src_key.get_file(tmp) tmp.seek(0) "Download the object." dst_key.set_contents_from_file(tmp) return
def get_all_files(self): try: uri = boto.storage_uri('', GOOGLE_STORAGE) files = [] bucketmap = dict() for b in uri.get_all_buckets(): bucket_dic, map_path = self.get_files_from_bucket(b) if map_path != '': # print map_path cipher = AES.new(process_cipherkey('password'), AES.MODE_ECB, CYPHER_IV) uri = boto.storage_uri(b.name + '/' + map_path, GOOGLE_STORAGE) data = cipher.decrypt(uri.get_key().get_contents_as_string()) for line in data.split('\n'): # print line if line == 'bucket to dir': continue elif line == 'dir to bucket': break else: dirs = line.split() bucketmap[dirs[0]] = eval(dirs[1]) for prefix in bucket_dic: filename = b.name + '/' + prefix + '_' + bucket_dic[prefix][0] + '_' + str(bucket_dic[prefix][1]) files.append(filename) return bucketmap, files except IOError: print 'Cannot get google files'
def get_userobjects(user=None, buckets=[]): """ retrieves objects matching the pattern with user. default generic case provided to return all files, but not currently used in the scope of project """ logging.debug('get userobject bucket=%s user=%s %d' % (str(buckets), str(user), len(buckets))) objects = [] error_str = '' pattern = None if user != None: pattern = config.Status_log_pattern % (user) else: pattern = None try: ## get all the buckets under the storage with the given key id ## if len(buckets) == 0: logging.debug('querying all buckets') uri = boto.storage_uri('', config.Google_storage) buckets = [bucket.name for bucket in uri.get_all_buckets()] ## list of objects ## for bucket in buckets: uri = boto.storage_uri(bucket, config.Google_storage) for obj_uri in uri.get_bucket(): if pattern != None: m = re.match(pattern, obj_uri.name) if m != None: objects.append(Bunch(obj_uri=obj_uri, pid=m.group(2))) else: #Note this case is currently not used objects.append(Bunch(obj_uri=obj_uri, pid=None)) except AttributeError, e: error_str = 'GSCloud::get_userlist Attribute Error %s' % (e) logging.error(error_str)
def download_object(bucket_name, object_name, debug_level): ''' Performs a GET Object operation to download an object from the specified bucket. @param bucket_name: The name of the bucket that contains the object to download. @param object_name: The name of the object to download. @param debug_level: The level of debug messages to be printed. Input parameters: ''' try: # Destination directory. # Replace the directory with one of your choice. dest_dir = os.getenv("HOME") + "/tmp/downloads/" # Define the object URI uri = boto.storage_uri(bucket_name + "/" + object_name, GOOGLE_STORAGE, debug_level) # Create a file-like object to hold the object contents. object_contents = StringIO.StringIO() # Get the object contents. uri.get_key().get_file(object_contents) # Set the local destination path. local_dest_uri = boto.storage_uri( os.path.join(dest_dir, object_name), LOCAL_FILE) # Download the object to the local destination. object_contents.seek(0) local_dest_uri.new_key().set_contents_from_file(object_contents) object_contents.close() except boto.exception, e: logging.error("download_object, error occurred: %s", e)
def download_object(bucket, filename, dest_dir): print 'FUNCTION: DOWNLOAD OBJECT' print '' # Call list_object first bucket_name = bucket.name #self.list_object() #filename = raw_input("Which file to download: ") #dest_dir = raw_input("Input the downloading directory: ") src_uri = boto.storage_uri(bucket_name + '/' + filename, GOOGLE_STORAGE) # Create a file-like object for holding the object contents. object_contents = StringIO.StringIO() # The unintuitively-named get_file() doesn't return the object # contents; instead, it actually writes the contents to # object_contents. try: src_uri.get_key().get_file(object_contents) dst_uri = boto.storage_uri(os.path.join(dest_dir, filename), LOCAL_FILE) object_contents.seek(0) dst_uri.new_key().set_contents_from_file(object_contents) object_contents.close() except: msg = 'Sorry, but fail to download file: "%s".' % filename return msg else: msg = 'Successfully downloaded "%s"!' % filename return msg
def get_userobjects(user=None, buckets=[]): """ retrieves objects matching the pattern with user. default generic case provided to return all files, but not currently used in the scope of project """ logging.debug('get userobject bucket=%s user=%s %d' %(str(buckets), str(user), len(buckets))) objects = [] error_str = '' pattern = None if user != None: pattern=config.Status_log_pattern%(user) else: pattern=None try: ## get all the buckets under the storage with the given key id ## if len(buckets) == 0: logging.debug('querying all buckets') uri = boto.storage_uri('', config.Google_storage) buckets = [bucket.name for bucket in uri.get_all_buckets()] ## list of objects ## for bucket in buckets: uri = boto.storage_uri(bucket, config.Google_storage) for obj_uri in uri.get_bucket(): if pattern != None: m = re.match(pattern, obj_uri.name) if m != None: objects.append(Bunch(obj_uri=obj_uri, pid=m.group(2))) else: #Note this case is currently not used objects.append(Bunch(obj_uri=obj_uri,pid=None)) except AttributeError, e: error_str = 'GSCloud::get_userlist Attribute Error %s'% (e) logging.error(error_str)
def read_file(file_name, bucket_name): if (DEBUG): print "Reading File: " + file_name # Create a file-like object for holding the object contents. object_contents = StringIO.StringIO() src_uri = boto.storage_uri(bucket_name + '/' + file_name, GOOGLE_STORAGE) # get_file() doesn't return the file contents # it writes the file contents to "object_contents" instead src_uri.get_key(headers=HEADER_VALUES).get_file(object_contents) local_dst_uri = boto.storage_uri(file_name, LOCAL_FILE) bucket_dst_uri = boto.storage_uri(bucket_name + '/' + file_name, GOOGLE_STORAGE) for dst_uri in (local_dst_uri, bucket_dst_uri): object_contents.seek(0) dst_uri.new_key( headers=HEADER_VALUES).set_contents_from_file(object_contents) object_contents.close() if (DEBUG): print "Read data from GCS to file: " + file_name return True
def test_roundtrip_versioned_gs_object_uri_parsed(self): uri_str = 'gs://bucket/obj#1359908801674000' uri = boto.storage_uri(uri_str, validate=False, suppress_consec_slashes=False) roundtrip_uri = boto.storage_uri(uri.uri, validate=False, suppress_consec_slashes=False) self.assertEqual(uri.uri, roundtrip_uri.uri) self.assertEqual(uri.is_version_specific, True)
def _prepareTestFile(self, bucket, size=None): import boto fileName = 'testfile_%s' % uuid.uuid4() uri = 'gs://%s/%s' % (bucket.name, fileName) if size: with open('/dev/urandom', 'r') as readable: boto.storage_uri(uri).set_contents_from_string(readable.read(size)) return uri
def upload_logical_node( logical_id, executable_path, parameter_file_path ): executable_file = open( executable_path, 'r' ) executable_uri = boto.storage_uri( configs.bucket + '/' + logical_id + '/executable', GOOGLE_STORAGE) executable_uri.new_key().set_contents_from_file( executable_file ) parameter_file = open( parameter_file_path, 'r' ) parameter_file_uri = boto.storage_uri( configs.bucket + '/' + logical_id + '/parameters.default', GOOGLE_STORAGE) parameter_file_key = parameter_file_uri.get_key()
def remove_metadata(logical_id): uri = boto.storage_uri(configs.bucket + "/" + logical_id + "/instances.txt", GOOGLE_STORAGE) uri.delete() uri = boto.storage_uri(configs.bucket + "/" + logical_id + "/executable", GOOGLE_STORAGE) uri.delete() uri = boto.storage_uri(configs.bucket + "/" + logical_id + "/params.default", GOOGLE_STORAGE) uri.delete()
def CreateBucket(self, bucket_name=None, test_objects=0, storage_class=None, provider=None, prefer_json_api=False): """Creates a test bucket. The bucket and all of its contents will be deleted after the test. Args: bucket_name: Create the bucket with this name. If not provided, a temporary test bucket name is constructed. test_objects: The number of objects that should be placed in the bucket. Defaults to 0. storage_class: storage class to use. If not provided we us standard. provider: Provider to use - either "gs" (the default) or "s3". prefer_json_api: If true, use the JSON creation functions where possible. Returns: StorageUri for the created bucket. """ if not provider: provider = self.default_provider if prefer_json_api and provider == 'gs': json_bucket = self.CreateBucketJson(bucket_name=bucket_name, test_objects=test_objects, storage_class=storage_class) bucket_uri = boto.storage_uri( 'gs://%s' % json_bucket.name.encode(UTF8).lower(), suppress_consec_slashes=False) self.bucket_uris.append(bucket_uri) return bucket_uri bucket_name = bucket_name or self.MakeTempName('bucket') bucket_uri = boto.storage_uri('%s://%s' % (provider, bucket_name.lower()), suppress_consec_slashes=False) if provider == 'gs': # Apply API version and project ID headers if necessary. headers = {'x-goog-api-version': self.api_version} headers[GOOG_PROJ_ID_HDR] = PopulateProjectId() else: headers = {} # Parallel tests can easily run into bucket creation quotas. # Retry with exponential backoff so that we create them as fast as we # reasonably can. @Retry(StorageResponseError, tries=7, timeout_secs=1) def _CreateBucketWithExponentialBackoff(): bucket_uri.create_bucket(storage_class=storage_class, headers=headers) _CreateBucketWithExponentialBackoff() self.bucket_uris.append(bucket_uri) for i in range(test_objects): self.CreateObject(bucket_uri=bucket_uri, object_name=self.MakeTempName('obj'), contents='test %d' % i) return bucket_uri
def _getUrlForTestFile(cls, size=None): import boto fileName = 'testfile_%s' % uuid.uuid4() bucket = cls._createExternalStore() uri = 'gs://%s/%s' % (bucket.name, fileName) if size: with open('/dev/urandom', 'r') as readable: boto.storage_uri(uri).set_contents_from_string(readable.read(size)) return uri
def _getUrlForTestFile(cls, size=None): import boto fileName = 'testfile_%s' % uuid.uuid4() bucket = cls._createExternalStore() uri = 'gs://%s/%s' % (bucket.name, fileName) if size: with open('/dev/urandom', 'r') as readable: boto.storage_uri(uri).set_contents_from_string( readable.read(size)) return uri
def upload_file(file_path, bucket, new_filename=None, thumb_bucket=None): ''' Upload the file object to a bucket. Add the original file path to its object metadata. ''' if not new_filename: fn = get_basename(file_path) else: fn = new_filename dest_uri = boto.storage_uri(bucket + '/' + fn, 'gs') try: new_key = dest_uri.new_key() except boto.exception.NoAuthHandlerFound as e: logging.error(e) return None try: file_obj = open(file_path) except IOError as e: logging.error(e) return None new_key.update_metadata({'path': file_path}) try: new_key.set_contents_from_file(file_obj) new_key.make_public() except boto.exception.GSResponseError as e: logging.error(e) # Do we have the credentials file set up? boto_cred_file = os.path.expanduser('~') + '/.boto' if not os.path.exists(boto_cred_file): logging.error( 'Credentials file {} was not found.'.format(boto_cred_file)) return None if thumb_bucket: file_obj.seek(0) im = Image.open(file_obj) im.thumbnail((260, 260)) thumbnail = StringIO.StringIO() im.save(thumbnail, 'JPEG') thumb_uri = boto.storage_uri('{}/{}'.format(thumb_bucket, fn), 'gs') new_key = thumb_uri.new_key() # save the thumbnail thumbnail.seek(0) new_key.set_contents_from_file(thumbnail) file_obj.close() return str(dest_uri)
def downloadGCImages(): # http://127.0.0.1:53000/downloadGCImages print("inside downloadGCImages2") imagelist = [] DOGS_BUCKET = "rajpics" CATS_BUCKET = "rajpics" # Listing objects uri = boto.storage_uri(DOGS_BUCKET, GOOGLE_STORAGE) for obj in uri.get_bucket(): print('%s://%s/%s' % (uri.scheme, uri.bucket_name, obj.name)) # print(' "%s"' % obj.get_contents_as_string()) # , "image":obj.get_contents_as_string() imagelist.append({"bucket_name": uri.bucket_name, "name": obj.name}) print("image added to json list") src_uri = boto.storage_uri(DOGS_BUCKET + '/' + obj.name, GOOGLE_STORAGE) # Create a file-like object for holding the object contents. object_contents = io.BytesIO() # object_contents = obj.get_contents_as_string() # The unintuitively-named get_file() doesn't return the object # contents; instead, it actually writes the contents to # object_contents. src_uri.get_key().get_file(object_contents) print("after get file") local_dst_uri = boto.storage_uri(os.path.join(tmplocaldir, obj.name), LOCAL_FILE) bucket_dst_uri = boto.storage_uri(CATS_BUCKET + '/' + obj.name, GOOGLE_STORAGE) print("before writing to local file") # for dst_uri in (local_dst_uri, bucket_dst_uri): # print("before seek") # object_contents.seek(0) # print("after seek") # dst_uri.new_key().set_contents_from_file(object_contents) # print("after write") print("before seek") object_contents.seek(0) print("after seek") local_dst_uri.new_key().set_contents_from_file(object_contents) print("after write") object_contents.close() print("Images written to dir:" + tmplocaldir) return app.response_class(json.dumps(imagelist), content_type='application/json')
def TestGetPathBeforeFinalDir(self): """Tests _GetPathBeforeFinalDir() (unit test)""" self.assertEqual('gs://', cp._GetPathBeforeFinalDir(storage_uri('gs://bucket/'))) self.assertEqual('gs://bucket', cp._GetPathBeforeFinalDir(storage_uri('gs://bucket/dir/'))) self.assertEqual('gs://bucket', cp._GetPathBeforeFinalDir(storage_uri('gs://bucket/dir'))) self.assertEqual('gs://bucket/dir', cp._GetPathBeforeFinalDir( storage_uri('gs://bucket/dir/obj'))) self.assertEqual('file://%s' % self.src_dir_root.rstrip('/'), cp._GetPathBeforeFinalDir(storage_uri( 'file://%sdir0/' % self.src_dir_root)))
def delete(self,widget): if self.remote_treeview.get_selection().count_selected_rows() == 0: print '請至少選擇一個文件' return 0 row = self.remote_treeview.get_selection().get_selected_rows()[1][0] remote_name = self.remote_liststore[row][0] print remote_name if self.list_status == 'bucket': remote_uri = boto.storage_uri(remote_name,'gs') remote_uri.delete_bucket() self.refresh(self) elif self.list_status == 'object': remote_uri = boto.storage_uri(self.bucket+os.sep+remote_name,'gs') remote_uri.delete_key() self.refresh(self)
def create_bucket(): import datetime now = time.time() bucket_name = 'lighthouse-%d' % now # Your project ID can be found at https://console.cloud.google.com/ # If there is no domain for your project, then project_id = 'YOUR_PROJECT' project_id = project_name # Instantiate a BucketStorageUri object. if bucket_name: # Try to create the bucket. try: uri = boto.storage_uri(bucket_name, GOOGLE_STORAGE) # If the default project is defined, # you do not need the headers. # Just call: uri.create_bucket() header_values = {"x-goog-project-id": project_id} uri.create_bucket(headers=header_values) except: print('Failed to create bucket:') print('Successfully created bucket "%s"' % bucket_name) return bucket_name
def _test_storage_uri(uri_str, default_scheme='file', debug=0, validate=True): """Convenience method for instantiating a testing instance of StorageUri. This makes it unnecessary to specify bucket_storage_uri_class=mock_storage_service.MockBucketStorageUri. Also naming the factory method this way makes it clearer in the test code that StorageUri needs to be set up for testing. Args, Returns, and Raises are same as for boto.storage_uri(), except there's no bucket_storage_uri_class arg. Args: uri_str: Uri string to create StorageUri for. default_scheme: Default scheme for the StorageUri debug: debug level to pass to the underlying connection (0..3) validate: If True, validate the resource that the StorageUri refers to. Returns: StorageUri based on the arguments. """ return boto.storage_uri(uri_str, default_scheme, debug, validate, util.GSMockBucketStorageUri)
def cleanup(self, id, name, stats): gslocation = "gs://%s/%s" % (self.bucket, id) logging.info("Clearing down older '%s' backups from GS (%s)..." % (name, gslocation)) uri = boto.storage_uri(self.bucket, 'gs') bucket = uri.get_bucket() # Gather list of potentials first candidates = [] for key in bucket.list(prefix="%s/" % id): parsed_date = parser.parse(key.last_modified) candidates.append([parsed_date, key.name]) candidates.sort() # Loop and purge unretainable copies removable_names = [] if self.retention_copies > 0: names = [name for d, name in candidates] if len(names) > self.retention_copies: removable_names = names[0:(len(names) - self.retention_copies)] if self.retention_days > 0: for d, name in candidates: days = (datetime.datetime.now(tz.tzutc()) - d).days if days > self.retention_days: removable_names.append(name) for name in removable_names: logging.info("Removing '%s'..." % name) bucket.get_key(name).delete() # Return number of copies left stats.retained_copies = len(candidates) - len(removable_names)
def _hashTestFile(self, url): import boto from toil.jobStores.googleJobStore import GoogleJobStore projectID, uri = GoogleJobStore._getResources(urlparse.urlparse(url)) uri = boto.storage_uri(uri) contents = uri.get_contents_as_string(headers=self.headers) return hashlib.md5(contents).hexdigest()
def __getMatchingKeys(self, dataPath, filename=None, includeDirectories=False, recursive=False): parse = _BotoClient.parseQuery(dataPath) storageScheme = parse[0] bucketName = parse[1] keyName = parse[2] if storageScheme == 's3' or storageScheme == 's3n': conn = S3ConnectionWithAnon(*self.awsCredentialsOverride.credentials) bucket = conn.get_bucket(bucketName) elif storageScheme == 'gs': conn = boto.storage_uri(bucketName, 'gs') bucket = conn.get_bucket() else: raise NotImplementedError("No file reader implementation for URL scheme " + storageScheme) if filename: # check whether last section of dataPath refers to a directory if not keyName.endswith("/"): if self.checkPrefix(bucket, keyName + "/"): # keyname is a directory, but we've omitted the trailing "/" keyName += "/" else: # assume keyname refers to an object other than a directory # look for filename in same directory as keyname slashIdx = keyName.rfind("/") if slashIdx >= 0: keyName = keyName[:(slashIdx+1)] else: # no directory separators, so our object is in the top level of the bucket keyName = "" keyName += filename return (storageScheme, _BotoClient.retrieveKeys(bucket, keyName, prefix=parse[3], postfix=parse[4], includeDirectories=includeDirectories, recursive=recursive))
def _ListObjects(storage_schema, bucket, prefix, host_to_connect=None): """List objects under a bucket given a prefix. Args: storage_schema: The address schema identifying a storage. e.g., "gs" bucket: Name of the bucket. prefix: A prefix to list from. host_to_connect: An optional endpoint string to connect to. Returns: A list of object names. """ bucket_list_result = None if _useBotoApi(storage_schema): bucket_uri = boto.storage_uri(bucket, storage_schema) if host_to_connect is not None: bucket_uri.connect(host=host_to_connect) bucket_list_result = bucket_uri.list_bucket(prefix=prefix) else: bucket_list_result = _AZURE_BLOB_SERVICE.list_blobs(bucket, prefix=prefix) list_result = [] for k in bucket_list_result: list_result.append(k.name) return list_result
def ReadObjects(storage_schema, bucket, objects_to_read, latency_results=None, bandwidth_results=None, object_size=None, host_to_connect=None): """Read a bunch of objects. Args: storage_schema: The address schema identifying a storage. e.g., "gs" bucket: Name of the bucket. objects_to_read: A list of names of objects to read. latency_results: An optional list to receive latency results. bandwidth_results: An optional list to receive bandwidth results. object_size: Size of the object that will be read, used to calculate bw. host_to_connect: An optional endpoint string to connect to. """ for object_name in objects_to_read: start_time = time.time() try: if _useBotoApi(storage_schema): object_path = '%s/%s' % (bucket, object_name) object_uri = boto.storage_uri(object_path, storage_schema) object_uri.connect(host=host_to_connect) object_uri.new_key().get_contents_as_string() else: _AZURE_BLOB_SERVICE.get_blob_to_bytes(bucket, object_name) latency = time.time() - start_time if latency_results is not None: latency_results.append(latency) if (bandwidth_results is not None and object_size is not None and latency > 0.0): bandwidth_results.append(object_size / latency) except: logging.exception('Failed to read object %s', object_name)
def setUp(self): """ Initializes for each test. """ # Create the test bucket. hostname = socket.gethostname().split('.')[0] uri_base_str = 'gs://res-download-test-%s-%s-%s' % ( hostname, os.getpid(), int(time.time())) self.src_bucket_uri = storage_uri('%s-dst' % uri_base_str) self.src_bucket_uri.create_bucket() # Create test source objects. self.empty_src_key_size = 0 (self.empty_src_key_as_string, self.empty_src_key) = ( self.build_input_object('empty', self.empty_src_key_size)) self.small_src_key_size = 2 * 1024 # 2 KB. (self.small_src_key_as_string, self.small_src_key) = ( self.build_input_object('small', self.small_src_key_size)) self.larger_src_key_size = 500 * 1024 # 500 KB. (self.larger_src_key_as_string, self.larger_src_key) = ( self.build_input_object('larger', self.larger_src_key_size)) # Use a designated tmpdir prefix to make it easy to find the end of # the tmp path. self.tmpdir_prefix = 'tmp_resumable_download_test' # Create temp dir and name for download file. self.tmp_dir = tempfile.mkdtemp(prefix=self.tmpdir_prefix) self.dst_file_name = '%s%sdst_file' % (self.tmp_dir, os.sep) self.tracker_file_name = '%s%stracker' % (self.tmp_dir, os.sep) # Create file-like object for detination of each download test. self.dst_fp = open(self.dst_file_name, 'w') self.created_test_data = True
def test_default_object_acls_storage_uri(self): """Test default object acls using storage_uri.""" # create a new bucket bucket = self._MakeBucket() bucket_name = bucket.name uri = storage_uri('gs://' + bucket_name) # get default acl and make sure it's project-private acl = uri.get_def_acl() self.assertIsNotNone(re.search(PROJECT_PRIVATE_RE, acl.to_xml())) # set default acl to a canned acl and verify it gets set uri.set_def_acl('public-read') acl = uri.get_def_acl() # save public-read acl for later test public_read_acl = acl self.assertEqual(acl.to_xml(), ('<AccessControlList><Entries><Entry>' '<Scope type="AllUsers"></Scope><Permission>READ</Permission>' '</Entry></Entries></AccessControlList>')) # back to private acl uri.set_def_acl('private') acl = uri.get_def_acl() self.assertEqual(acl.to_xml(), '<AccessControlList></AccessControlList>') # set default acl to an xml acl and verify it gets set uri.set_def_acl(public_read_acl) acl = uri.get_def_acl() self.assertEqual(acl.to_xml(), ('<AccessControlList><Entries><Entry>' '<Scope type="AllUsers"></Scope><Permission>READ</Permission>' '</Entry></Entries></AccessControlList>')) # back to private acl uri.set_def_acl('private') acl = uri.get_def_acl() self.assertEqual(acl.to_xml(), '<AccessControlList></AccessControlList>')
def test_upload_with_file_content_change_during_upload(self): """ Tests resumable upload on a file that changes one byte of content (so, size stays the same) while upload in progress """ test_file_size = 500 * 1024 # 500 KB. test_file = self.build_input_file(test_file_size)[1] harness = CallbackTestHarness(fail_after_n_bytes=test_file_size/2, fp_to_change=test_file, # Write to byte 1, as the CallbackTestHarness writes # 3 bytes. This will result in the data on the server # being different than the local file. fp_change_pos=1) res_upload_handler = ResumableUploadHandler(num_retries=1) dst_key = self._MakeKey(set_contents=False) bucket_uri = storage_uri('gs://' + dst_key.bucket.name) dst_key_uri = bucket_uri.clone_replace_name(dst_key.name) try: dst_key.set_contents_from_file( test_file, cb=harness.call, res_upload_handler=res_upload_handler) self.fail('Did not get expected ResumableUploadException') except ResumableUploadException, e: self.assertEqual(e.disposition, ResumableTransferDisposition.ABORT) # Ensure the file size didn't change. test_file.seek(0, os.SEEK_END) self.assertEqual(test_file_size, test_file.tell()) self.assertNotEqual( e.message.find('md5 signature doesn\'t match etag'), -1) # Ensure the bad data wasn't left around. try: dst_key_uri.get_key() self.fail('Did not get expected InvalidUriError') except InvalidUriError, e: pass
def test_default_object_acls_storage_uri(self): """Test default object acls using storage_uri.""" # create a new bucket bucket = self._MakeBucket() bucket_name = bucket.name uri = storage_uri('gs://' + bucket_name) # get default acl and make sure it's project-private acl = uri.get_def_acl() self.assertIsNotNone( re.search(PROJECT_PRIVATE_RE, acl.to_xml()), 'PROJECT_PRIVATE_RE not found in ACL XML:\n' + acl.to_xml()) # set default acl to a canned acl and verify it gets set uri.set_def_acl('public-read') acl = uri.get_def_acl() # save public-read acl for later test public_read_acl = acl self.assertEqual(acl.to_xml(), ('<AccessControlList><Entries><Entry>' '<Scope type="AllUsers"></Scope><Permission>READ</Permission>' '</Entry></Entries></AccessControlList>')) # back to private acl uri.set_def_acl('private') acl = uri.get_def_acl() self.assertEqual(acl.to_xml(), '<AccessControlList></AccessControlList>') # set default acl to an xml acl and verify it gets set uri.set_def_acl(public_read_acl) acl = uri.get_def_acl() self.assertEqual(acl.to_xml(), ('<AccessControlList><Entries><Entry>' '<Scope type="AllUsers"></Scope><Permission>READ</Permission>' '</Entry></Entries></AccessControlList>')) # back to private acl uri.set_def_acl('private') acl = uri.get_def_acl() self.assertEqual(acl.to_xml(), '<AccessControlList></AccessControlList>')
def store_executable( bucket, executable_name ): try: executable_file = open( executable_name , 'r' ) uri = boto.storage_uri( bucket + '/executable', GOOGLE_STORAGE) uri.new_key().set_contents_from_file( executable_file ) except IOError, e: print "can't open the executable file to read"
def store_param_file( bucket, param_file_name ): try: param_file = open( param_file_name , 'r' ) uri = boto.storage_uri( bucket + '/params.default', GOOGLE_STORAGE) uri.new_key().set_contents_from_file( param_file ) except IOError, e: print "can't open the parameter file to read"
def test_cp_v_option(self): # Tests that cp -v option returns the created object's version-specific URI. bucket_uri = self.CreateVersionedBucket() k1_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data1') k2_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data2') g1 = k1_uri.generation # Case 1: Upload file to object using one-shot PUT. tmpdir = self.CreateTempDir() fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents='data1') self._run_cp_minus_v_test('-v', fpath1, k2_uri.uri) # Case 2: Upload file to object using resumable upload. size_threshold = boto.config.get('GSUtil', 'resumable_threshold', TWO_MB) file_as_string = os.urandom(size_threshold) tmpdir = self.CreateTempDir() fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents=file_as_string) self._run_cp_minus_v_test('-v', fpath1, k2_uri.uri) # Case 3: Upload stream to object. self._run_cp_minus_v_test('-v', '-', k2_uri.uri) # Case 4: Download object to file. For this case we just expect output of # gsutil cp -v to be the URI of the file. tmpdir = self.CreateTempDir() fpath1 = self.CreateTempFile(tmpdir=tmpdir) dst_uri = storage_uri(fpath1) stderr = self.RunGsUtil(['cp', '-v', suri(k1_uri), suri(dst_uri)], return_stderr=True) self.assertIn('Created: %s' % dst_uri.uri, stderr.split('\n')[-2]) # Case 5: Daisy-chain from object to object. self._run_cp_minus_v_test('-Dv', k1_uri.uri, k2_uri.uri)
def DeleteObjects(storage_schema, bucket, objects_to_delete, host_to_connect=None, objects_deleted=None): """Delete a bunch of objects. Args: storage_schema: The address schema identifying a storage. e.g., "gs" bucket: Name of the bucket. objects_to_delete: A list of names of objects to delete. host_to_connect: An optional endpoint string to connect to. objects_deleted: An optional list to record the objects that have been successfully deleted. """ for object_name in objects_to_delete: try: if _useBotoApi(storage_schema): object_path = '%s/%s' % (bucket, object_name) object_uri = boto.storage_uri(object_path, storage_schema) if host_to_connect is not None: object_uri.connect(host=host_to_connect) object_uri.delete_key() else: _AZURE_BLOB_SERVICE.delete_blob(bucket, object_name) if objects_deleted is not None: objects_deleted.append(object_name) except: logging.exception('Caught exception while deleting object %s.', object_name)
def get_path(path): prefix = "" if path.endswith(".gz") and ".tar.gz" not in path: prefix = "/vsigzip/" uri = parse_uri(path) if uri.scheme == "file": path = uri.uri_path if os.path.exists(uri.uri_path) else None elif uri.scheme == "s3": conn = connect_s3() bucket = conn.get_bucket(uri.bucket_id) key = bucket.lookup(uri.key_id) if prefix == "": prefix = "/" prefix += os.path.join(prefix, "vsicurl") path = key.generate_url(60 * 60) if key is not None else key elif uri.scheme == 'gs': storage_uri = boto.storage_uri(uri.bucket_id, uri.scheme) bucket = storage_uri.get_bucket(uri.bucket_id) key = bucket.lookup(uri.key_id) if prefix == "": prefix = "/" prefix += os.path.join(prefix, "vsicurl/") path = key.generate_url(60 * 60) if key is not None else key return prefix + path
def CreateBucket(self, bucket_name=None, test_objects=0, storage_class=None): """Creates a test bucket. The bucket and all of its contents will be deleted after the test. Args: bucket_name: Create the bucket with this name. If not provided, a temporary test bucket name is constructed. test_objects: The number of objects that should be placed in the bucket. Defaults to 0. storage_class: storage class to use. If not provided we us standard. Returns: StorageUri for the created bucket. """ bucket_name = bucket_name or self.MakeTempName('bucket') bucket_uri = boto.storage_uri('gs://%s' % bucket_name.lower(), suppress_consec_slashes=False) # Apply API version and project ID headers if necessary. headers = {'x-goog-api-version': self.api_version} self.proj_id_handler.FillInProjectHeaderIfNeeded( 'test', bucket_uri, headers) bucket_uri.create_bucket(storage_class=storage_class, headers=headers) self.bucket_uris.append(bucket_uri) for i in range(test_objects): self.CreateObject(bucket_uri=bucket_uri, object_name=self.MakeTempName('obj'), contents='test %d' % i) return bucket_uri
def test_create_with_retention_months(self): bucket_name = self.MakeTempName('bucket') bucket_uri = boto.storage_uri('gs://%s' % (bucket_name.lower()), suppress_consec_slashes=False) self.RunGsUtil(['mb', '--retention', '1m', suri(bucket_uri)]) self.VerifyRetentionPolicy( bucket_uri, expected_retention_period_in_seconds=SECONDS_IN_MONTH)
def connection_with_gs(name): """ Connect to GS """ import boto conn = boto.storage_uri(name, 'gs') return conn
def upload(self, obj): sys.stdout.write('Uploading %s/%s %s... ' % (obj.parent.name, obj.name, format_size(obj.size))) sys.stdout.flush() with open(obj.full_path, 'r') as localfile: dest_uri = boto.storage_uri(CONFIG['bucket_id'] + obj.full_path, 'gs') dest_uri.new_key().set_contents_from_file(localfile) sys.stdout.write('Done!\n')
def OutputBenchExpectations(bench_type, rev_min, rev_max, representation_alg): """Reads bench data from google storage, and outputs expectations. Ignores data with revisions outside [rev_min, rev_max] integer range. For bench data with multiple revisions, we use higher revisions to calculate expected bench values. bench_type is either 'micro' or 'skp', according to the flag '-b'. Uses the provided representation_alg for calculating bench representations. """ if bench_type not in BENCH_TYPES: raise Exception('Not valid bench_type! (%s)' % BENCH_TYPES) expectation_dic = {} uri = boto.storage_uri(URI_BUCKET, GOOGLE_STORAGE_URI_SCHEME) for obj in uri.get_bucket(): # Filters out non-bench files. if ((not obj.name.startswith('perfdata/%s' % BENCH_BUILDER_PREFIX) and not obj.name.startswith( 'playback/perfdata/%s' % BENCH_BUILDER_PREFIX)) or obj.name.find('_data') < 0): continue if ((bench_type == 'micro' and obj.name.find('_data_skp_') > 0) or (bench_type == 'skp' and obj.name.find('_skp_') < 0)): # Skips wrong bench type. continue # Ignores uninterested platforms. platform = obj.name.split('/')[1] if not platform.startswith(BENCH_BUILDER_PREFIX): platform = obj.name.split('/')[2] if not platform.startswith(BENCH_BUILDER_PREFIX): continue # Ignores non-platform object if platform not in PLATFORMS: continue # Filters by revision. to_filter = True for rev in range(rev_min, rev_max + 1): if '_r%s_' % rev in obj.name: to_filter = False break if to_filter: continue contents = cStringIO.StringIO() obj.get_file(contents) for point in bench_util.parse('', contents.getvalue().split('\n'), representation_alg): if point.config in CONFIGS_TO_FILTER: continue key = '%s_%s_%s,%s-%s' % (point.bench, point.config, point.time_type, platform, representation_alg) # It is fine to have later revisions overwrite earlier benches, since we # only use the latest bench within revision range to set expectations. expectation_dic[key] = point.time keys = expectation_dic.keys() keys.sort() for key in keys: bench_val = expectation_dic[key] # Prints out expectation lines. print '%s,%.3f,%.3f,%.3f' % (key, bench_val, bench_val * BENCH_LB - BENCH_ALLOWED_NOISE, bench_val * BENCH_UB + BENCH_ALLOWED_NOISE)
def testCompose(self): data1 = 'hello ' data2 = 'world!' expected_crc = 1238062967 b = self._MakeBucket() bucket_uri = storage_uri("gs://%s" % b.name) key_uri1 = bucket_uri.clone_replace_name("component1") key_uri1.set_contents_from_string(data1) key_uri2 = bucket_uri.clone_replace_name("component2") key_uri2.set_contents_from_string(data2) # Simple compose. key_uri_composite = bucket_uri.clone_replace_name("composite") components = [key_uri1, key_uri2] key_uri_composite.compose(components, content_type='text/plain') self.assertEquals(key_uri_composite.get_contents_as_string(), data1 + data2) composite_key = key_uri_composite.get_key() cloud_crc32c = binascii.hexlify(composite_key.cloud_hashes['crc32c']) self.assertEquals(cloud_crc32c, hex(expected_crc)[2:]) self.assertEquals(composite_key.content_type, 'text/plain') # Compose disallowed between buckets. key_uri1.bucket_name += '2' try: key_uri_composite.compose(components) self.fail('Composing between buckets didn\'t fail as expected.') except BotoClientError as err: self.assertEquals(err.reason, 'GCS does not support inter-bucket composing')
def move_object(bucket_from, bucket_to, file): conn = connection.connect() bucket_from_name = bucket_from.name #self.list_object() #src_uri = boto.storage_uri(bucket_from_name + '/' + file.name, 'gs') # Create a file-like object for holding the object contents. object_contents = StringIO.StringIO() try: from boto.s3.key import Key k = Key(bucket_from) k.key = file.name #k.key.get_file(object_contents) k.get_contents_to_file(object_contents) dst_uri = boto.storage_uri(bucket_to.name + '/' + file.name, 'gs') object_contents.seek(0) dst_uri.new_key().set_contents_from_file(object_contents) # bucket = conn.get_bucket(bucket_to.name) # key = bucket.new_key(file.name) # # key.set_contents_from_file(object_contents) object_contents.close() except: msg = 'Sorry, but fail to move file: "%s".' % file.name return msg else: msg = 'Successfully move "%s"!' % file.name return msg