def check(key): try: gcs.stat(key, retry_params=write_retry_params) return True except Exception: return False
def migrate(image): """Copies blobs stored in Blobstore over to a GCS bucket. Args: image: main.Image instance representing a single entity in the Datastore. This does not delete migrated (old) blobs so it is safe to run the job multiple times. """ if image.blob_key and not image.gs_key: blob_info = blobstore.get(image.blob_key) if not blob_info: image.blob_key = None else: gs_key = '/'.join(['', BUCKET, blob_info.filename]) try: gcs.stat(gs_key) except gcs.NotFoundError: reader = blobstore.BlobReader(blob_info) with gcs.open(gs_key, 'w', content_type=blob_info.content_type) as f: while True: data = reader.read(1024**2) if not data: break f.write(data) blob_gs_key = blobstore.create_gs_key('/gs'+gs_key) image.url = images_api.get_serving_url(blob_gs_key, secure_url=True) image.gs_key = gs_key yield op.db.Put(image) if image.gs_key: yield op.counters.Increment('Migrated')
def _is_gs_file_present(self, gs_path): """True if given GS file exists.""" try: cloudstorage.stat( filename=gs_path, retry_params=self._retry_params) except cloudstorage.NotFoundError: return False return True
def check_backup_exists(self, backup_bucket, file_name): try: cloudstorage.stat("/{}/{}".format(backup_bucket, file_name)) except Exception, e: logging.info("Unable to find backup {} in GCS bucket {} - {}".format( file_name, backup_bucket, e.message)) self.abort(400, "Unable to check backup exists")
def exists(bucket, filename): """ Return True if the filename exists in cloudstorage. False otherwise. """ destination = get_cloudstorage_filename(bucket, filename) try: cloudstorage.stat(destination) return True except cloudstorage.NotFoundError: return False
def _get_bucket_file_handle(date): bucket = '/rogerthat-protocol-logs' counter = 0 while True: counter += 1 file_name = "%s/protocol-logs-%04d%02d%02d.%d" % (bucket, date.year, date.month, date.day, counter) try: cloudstorage.stat(file_name) except cloudstorage.NotFoundError: break return cloudstorage.open(file_name, 'w', content_type='text/plain')
def serveProxyCached(self , url): #does the url not exist in the bucket? try: gcs.stat(url) except NotFoundError: #download the data and store it into the bucket. result = urlfetch.fetch(url) if result.status_code == 200: #todo: store it pass else: return webapp2.abort(510)
def _file_exists(destination): """Checks if a file exists. Tries to do a stat check on the file. If it succeeds returns True otherwise false Args: destination: Full path to the file (ie. /bucket/object) w/ leading slash Returns: True if the file is accessible otherwsie False """ try: cloudstorage.stat(destination) return True except cloud_errors.NotFoundError: return False
def render_gcs(self, path, fallback=None): try: content_type = gcs.stat(path).content_type gcs_file = gcs.open(path) self.response.headers.add_header("Content-Type", content_type) self.response.write(gcs_file.readline()) stat = gcs.stat(path) self.response.write(gcs_file.read()) gcs_file.close() except gcs.NotFoundError: logging.warning(fallback) if fallback != None: self.render_gcs(fallback) else: self.error(404)
def get(self): try: ch=0 # getting Download File from Client dlFileName=self.request.get("action"); cacheFlag=self.request.get("cacheFlag"); # cacheFlag = False; Fetch list from GCS if str(cacheFlag)=="false": filecontent = memcache.get(str(dlFileName)) if filecontent != None: self.response.write("Memcache File Content:") self.response.write("<br>"+filecontent+"") stat = gcs.stat(bucket_name+"/"+str(dlFileName)) gcs_file = gcs.open(bucket_name+"/"+str(dlFileName),'r') filecontent=gcs_file.read() fileName=str(dlFileName) self.response.write(filecontent) gcs_file.close() stat = None # cacheFlag = False; Fetch list from Memcache else: filecontent = memcache.get(str(dlFileName)) print("printing memcache filecontent to response"); self.response.write(filecontent); except Exception: print "server exception" self.response.write("")
def AuthenticatedGet(self, org, event): filename = all_event_timeless_filename(event) bucket_path = BUCKET_NAME + '/' + filename try: file_stat = cloudstorage.stat(bucket_path) except cloudstorage.NotFoundError: self.abort(404) # rewrite filename to include timestamp custom_timestamp = timestamp( datetime.datetime.utcfromtimestamp(file_stat.st_ctime)) filename_to_serve = file_stat.filename.replace( '.csv', '-%s.csv' % custom_timestamp ) # serve the file as an attachment, forcing download gcs_fd = cloudstorage.open(bucket_path) if file_stat.content_type: self.response.headers['Content-Type'] = file_stat.content_type self.response.headers['Content-Disposition'] = ( str('attachment; filename="%s"' % filename_to_serve) ) self.response.write(gcs_fd.read())
def AuthenticatedGet(self, org, event): filename = self.request.get('filename') if not filename: self.abort(404) # check we are allowed to get this file, by filename allowed_filename_substrs = [inc.filename_friendly_name for inc in org.incidents] allowed_to_access = ( org.is_global_admin or any(s in filename for s in allowed_filename_substrs) ) if not allowed_to_access: self.abort(403) # find file in GCS bucket_path = BUCKET_NAME + '/' + filename try: file_stat = cloudstorage.stat(bucket_path) except cloudstorage.NotFoundError: # say not ready yet (HTTP 202) self.response.set_status(202) return # send the file contents & force download gcs_fd = cloudstorage.open(bucket_path) if file_stat.content_type: self.response.headers['Content-Type'] = file_stat.content_type self.response.headers['Content-Disposition'] = ( str('attachment; filename="%s"' % filename) ) self.response.write(gcs_fd.read())
def get(self, *args, **kwargs): from urllib import unquote uri = self.request.path uri = unquote(uri.encode('ascii')).decode('utf-8') try: gcs_object = cloudstorage.open( uri, mode = "r", ) except cloudstorage.NotFoundError: if uri == "/dres/directory": self.get_listing() return # awk :-/ else: self.response.set_status(404) else: gcs_object_info = cloudstorage.stat( uri, ) self.response.headers['Content-Type'] = gcs_object_info.content_type self.response.write(gcs_object.read()) gcs_object.close()
def size(self, name): try: info = cloudstorage.stat(self._add_bucket(name)) except cloudstorage.NotFoundError: return None else: return info.st_size
def update_response_headers(self, request_headers, response_headers): try: # cloudstorage.stat doesn't use "/gs" prefix. gs_object_name = self.gs_object_name[3:] stat = cloudstorage.stat(gs_object_name) except cloudstorage.errors.NotFoundError as e: raise FileNotFoundError(str(e)) headers = {} time_obj = datetime.fromtimestamp(stat.st_ctime).timetuple() headers['Last-Modified'] = time.strftime('%a, %d %b %Y %H:%M:%S GMT', time_obj) headers['ETag'] = '"{}"'.format(stat.etag) if stat.content_type: headers['Content-Type'] = stat.content_type # The presence of "X-AppEngine-BlobKey" tells App Engine that we want to # serve the GCS blob directly to the user. This avoids reading the blob data # into the App Engine application. If the user has the file cached already, # omit the X-AppEngine-BlobKey header since we want to serve an empty response # with a 304 status code. request_etag = request_headers.get('If-None-Match') if request_etag != headers['ETag']: key = blobstore.create_gs_key(self.gs_object_name) headers['X-AppEngine-BlobKey'] = key response_headers.update(headers)
def post(self): """Copy uploaded files to provided destination Returns: string: path to uploaded path """ if not self.get_file_infos(): self.abort(400, "No file has been uploaded") fileinfo = self.get_file_infos()[0] try: import cloudstorage as gcs except ImportError: self.abort( 500, 'GoogleAppEngineCloudStorageClient module is required') stat = gcs.stat(fileinfo.gs_object_name[3:]) destpath = "/".join(stat.filename.split("/")[:-1]) gcs.copy2(fileinfo.gs_object_name[3:], destpath) gcs.delete(fileinfo.gs_object_name[3:]) if spiner.env.is_local_env(): url = '/_ah/gcs{}'.format(destpath) else: url = 'https://storage.googleapis.com{}'.format(destpath) self.response.write(url)
def testRecoverSomethingWrittenInFailedInstance(self): mr_spec = self.mr_state.mapreduce_spec shard_state = self.create_shard_state(0) ctx = context.Context(mr_spec, shard_state) context.Context._set(ctx) writer = self.WRITER_CLS.create(mr_spec, 0, 0) writer.write("123") writer = self.WRITER_CLS.from_json(writer.to_json()) writer.write("4") new_writer = writer._recover(mr_spec, 0, 0) # Old instance is finalized and valid offset saved. old_stat = cloudstorage.stat(writer._streaming_buffer.name) self.assertEqual( len("123"), int(old_stat.metadata[self.WRITER_CLS._VALID_LENGTH])) # New instance is created with an incremented seg index. self.assertEqual(writer._seg_index + 1, new_writer._seg_index) # Verify filenames. self.assertTrue( writer._streaming_buffer.name.endswith(str(writer._seg_index))) self.assertTrue( new_writer._streaming_buffer.name.endswith(str(new_writer._seg_index)))
def get(self, bucket, obj): gcs_path = '/%s/%s' % (bucket, urllib.quote(obj)) logging.info('Opening %s using BLOCK_SIZE=%d', gcs_path, self.BLOCK_SIZE) try: gcs_file = gcs.open(gcs_path, retry_params=self.RETRY_PARAMS) gcs_stat = gcs.stat(gcs_path, retry_params=self.RETRY_PARAMS) except gcs.ForbiddenError: logging.exception("ForbiddenError accessing path %s", gcs_path) self.abort(httplib.FORBIDDEN) except gcs.AuthorizationError: logging.exception("AuthorizationError accessing path %s", gcs_path) self.abort(httplib.UNAUTHORIZED) self.response.headers["Content-Type"] = gcs_stat.content_type content_size = 0L block_num = 0 while True: block = gcs_file.read(self.BLOCK_SIZE) if not block: break self.response.write(block) content_size += len(block) block_num += 1 logging.info("Wrote content from [%s]: %s blocks, %s bytes", gcs_path, block_num, content_size)
def serve_file(request, blob_key_or_info, as_download=False, content_type=None, filename=None, offset=None, size=None): """ Serves a file from the blobstore, reads most of the data from the blobinfo by default but you can override stuff by passing kwargs. You can also pass a Google Cloud Storage filename as `blob_key_or_info` to use Blobstore API to serve the file: https://cloud.google.com/appengine/docs/python/blobstore/#Python_Using_the_Blobstore_API_with_Google_Cloud_Storage """ if isinstance(blob_key_or_info, BlobKey): info = BlobInfo.get(blob_key_or_info) blob_key = blob_key_or_info elif isinstance(blob_key_or_info, basestring): info = BlobInfo.get(BlobKey(blob_key_or_info)) blob_key = BlobKey(blob_key_or_info) elif isinstance(blob_key_or_info, BlobInfo): info = blob_key_or_info blob_key = info.key() else: raise ValueError("Invalid type %s" % blob_key_or_info.__class__) if info == None: # Lack of blobstore_info means this is a Google Cloud Storage file if has_cloudstorage: info = cloudstorage.stat(blob_key_or_info) info.size = info.st_size blob_key = create_gs_key('/gs{0}'.format(blob_key_or_info)) else: raise ImportError("To serve a Cloud Storage file you need to install cloudstorage") response = HttpResponse(content_type=content_type or info.content_type) response[BLOB_KEY_HEADER] = str(blob_key) response['Accept-Ranges'] = 'bytes' http_range = request.META.get('HTTP_RANGE') if offset or size: # Looks a little bonkers, but basically create the HTTP range string, we cast to int first to make sure # nothing funky gets into the headers http_range = "{}-{}".format( str(int(offset)) if offset else "", str(int(offset or 0) + size) if size else "" ) if http_range is not None: response[BLOB_RANGE_HEADER] = http_range if as_download: response['Content-Disposition'] = smart_str( u'attachment; filename="%s"' % (filename or info.filename) ) elif filename: raise ValueError("You can't specify a filename without also specifying as_download") if info.size is not None: response['Content-Length'] = info.size return response
def testShuffleNoFile(self): bucket_name = "testbucket" p = shuffler.ShufflePipeline("testjob", {"bucket_name": bucket_name}, []) p.start() test_support.execute_until_empty(self.taskqueue) p = shuffler.ShufflePipeline.from_id(p.pipeline_id) for filename in p.outputs.default.value: self.assertEqual(0, cloudstorage.stat(filename).st_size) self.assertEquals(1, len(self.emails))
def testCopy2(self): with cloudstorage.open(TESTFILE, 'w', 'text/foo', {'x-goog-meta-foo': 'foo'}) as f: f.write('abcde') dst = TESTFILE + 'copy' self.assertRaises(cloudstorage.NotFoundError, cloudstorage.stat, dst) cloudstorage_api.copy2(TESTFILE, dst) src_stat = cloudstorage.stat(TESTFILE) dst_stat = cloudstorage.stat(dst) self.assertEqual(src_stat.st_ctime, dst_stat.st_ctime) self.assertEqual(src_stat.st_size, dst_stat.st_size) self.assertEqual(src_stat.etag, dst_stat.etag) self.assertEqual(src_stat.content_type, dst_stat.content_type) self.assertEqual(src_stat.metadata, dst_stat.metadata) with cloudstorage.open(dst) as f: self.assertEqual('abcde', f.read())
def validate_gsc_filename(filename): if not filename: return None else: filename = _prepare_gcs_filename(filename) try: stat = gcs.stat(filename) except: filename = None return filename
def _statFile_(self, name): """ Primitive, private method. Returns a Google Stat object, raises cloudstorage.NotFoundError """ filename = self.location+"/"+name #if settings.GOOGLE_CLOUD_STORAGE_LOGGING: # logging.info("GoogleCloudStorage-stat %s", filename) return cloudstorage.stat(filename)
def _get_or_create_cached_blob_key_and_info(blob_key_or_info): cached_value = _get_from_cache(blob_key_or_info) if cached_value: blob_key, info = cached_value else: info = cloudstorage.stat(blob_key_or_info) info.size = info.st_size blob_key = create_gs_key('/gs{0}'.format(blob_key_or_info)) _add_to_cache(blob_key_or_info, blob_key, info) return (blob_key, info)
def testFilenameEscaping(self): name = BUCKET + '/a b/c d/*%$' with cloudstorage.open(name, 'w') as f: f.write('foo') with cloudstorage.open(name) as f: self.assertEqual('foo', f.read()) self.assertEqual(name, cloudstorage.stat(name).filename) bucket = cloudstorage.listbucket(BUCKET) for stat in bucket: self.assertEqual(name, stat.filename) cloudstorage.delete(name)
def get(self): """ GET 'filename' is required. """ filename = self.request.GET['filename'].strip() stat = cloudstorage.stat(filename) gcs_file = cloudstorage.open(filename) self.response.headers['Content-type'] = stat.content_type self.response.out.write(gcs_file.read()) gcs_file.close()
def stat( file_name, retry_params = None ): try: gcs_stat = gcs.stat( file_name ) except gcs.NotFoundError: return None except gcs.AuthorizationError: return None else: file_stat = __gcs_file_stat_conversion__( gcs_stat ) return file_stat
def view_video(): for _file in cloudstorage.listbucket(BUCKET_NAME): if "microscope_video" in _file.filename: _file = cloudstorage.stat(_file.filename) logging.info(_file.filename) logging.info(_file.content_type) cloud_file = cloudstorage.open(_file.filename,mode='r') response = make_response(cloud_file.read()) cloud_file.close() response.mimetype = _file.content_type return response return "No file found"
def testCopy2ReplacesMetadata(self): with cloudstorage.open(TESTFILE, 'w', 'text/foo', {'x-goog-meta-foo': 'foo'}) as f: f.write('abcde') src_stat = cloudstorage.stat(TESTFILE) cloudstorage_api.copy2(TESTFILE, TESTFILE, metadata={'x-goog-meta-foo': 'bar', 'content-type': 'text/bar'}) dst_stat = cloudstorage.stat(TESTFILE) self.assertEqual(src_stat.st_size, dst_stat.st_size) self.assertEqual(src_stat.etag, dst_stat.etag) self.assertEqual(src_stat.st_ctime, dst_stat.st_ctime) self.assertEqual('text/foo', src_stat.content_type) self.assertEqual('text/bar', dst_stat.content_type) self.assertEqual('foo', src_stat.metadata['x-goog-meta-foo']) self.assertEqual('bar', dst_stat.metadata['x-goog-meta-foo']) with cloudstorage.open(TESTFILE) as f: self.assertEqual('abcde', f.read())
def StatObject(self, url=None, bucket=None, obj=None): """Reads some information about an object in Gcs. Args: url: Full URL of the object. Use either this or bucket and object bucket: Bucket name. Use either this and object or url. obj: Object name. Use either this and bucket or url. Returns: A dict with size, md5, contentType and metadata keys. """ stat = None if url: stat = cloudstorage.stat(Gcs.UrlToBucketAndNamePath(url)) else: stat = cloudstorage.stat(Gcs.MakeBucketAndNamePath(bucket, obj)) return { 'size': stat.st_size, 'md5Hash': stat.etag, 'contentType': stat.content_type, 'metadata': stat.metadata }
def get_file_info(bucket, filename): """Returns information about stored file. Arguments: bucket: a bucket that contains the file. filename: path to a file relative to bucket root. Returns: FileInfo object or None if no such file. """ try: stat = cloudstorage.stat('/%s/%s' % (bucket, filename), retry_params=_make_retry_params()) return FileInfo(size=stat.st_size) except cloudstorage.errors.NotFoundError: return None
def testStat(self): self.CreateFile(TESTFILE) filestat = cloudstorage.stat(TESTFILE) content = ''.join(DEFAULT_CONTENT) self.assertEqual(len(content), filestat.st_size) self.assertEqual('text/plain', filestat.content_type) self.assertEqual('foo', filestat.metadata['x-goog-meta-foo']) self.assertEqual('bar', filestat.metadata['x-goog-meta-bar']) self.assertEqual('public, max-age=6000', filestat.metadata['cache-control']) self.assertEqual('attachment; filename=f.txt', filestat.metadata['content-disposition']) self.assertEqual(TESTFILE, filestat.filename) self.assertEqual(hashlib.md5(content).hexdigest(), filestat.etag) self.assertTrue(math.floor(self.start_time) <= filestat.st_ctime) self.assertTrue(filestat.st_ctime <= time.time())
def isfile(self, filepath): """Checks if the file with the given filepath exists in the GCS. Args: filepath: str. The path to the relevant file within the entity's assets folder. Returns: bool. Whether the file exists in GCS. """ try: return bool( cloudstorage.stat(self._get_gcs_file_url(filepath), retry_params=None)) except cloudstorage.NotFoundError: return False
def _stat_file(file_path): """ :param filename: full path :return: """ st = gcs.stat(file_path) return { 'filename': st.filename, 'is_dir': st.is_dir, 'st_size': st.st_size, 'st_ctime': st.st_ctime, 'etag': st.etag, 'content_type': st.content_type, 'metadata': st.metadata, }
def testAppendAndFlush(self): self.pool.append("a") self.assertRaises(cloudstorage.errors.NotFoundError, cloudstorage.open, self.filename) self.pool.append("b") self.assertRaises(cloudstorage.errors.NotFoundError, cloudstorage.open, self.filename) self.pool.flush() self.assertRaises(cloudstorage.errors.NotFoundError, cloudstorage.open, self.filename) # File handle does need to be explicitly closed. self.filehandle.close() self.assertEquals(32 * 1024, cloudstorage.stat(self.filename).st_size) self.assertEquals( ["a", "b"], list(records.RecordsReader(cloudstorage.open(self.filename))))
def get(self, *args, **kwargs): bucket_name = os.environ.get( 'BUCKET_NAME', app_identity.get_default_gcs_bucket_name()) bucket = '/' + bucket_name # omit first '/' object_name = self.request.path[1:] try: stat = gcs.stat(bucket + '/' + object_name) logging.info(repr(stat)) except gcs.errors.NotFoundError: self.abort(404) except gcs.errors.AuthorizationError: self.abort(404) except Exception, e: logging.exception(e) self.response.write('There was an error: {}'.format(e))
def testShuffleNoData(self): bucket_name = "testbucket" test_filename = "testfile" full_filename = "/%s/%s" % (bucket_name, test_filename) gcs_file = cloudstorage.open(full_filename, mode="w") gcs_file.close() p = shuffler.ShufflePipeline("testjob", {"bucket_name": bucket_name}, [full_filename, full_filename, full_filename]) p.start() test_support.execute_until_empty(self.taskqueue) p = shuffler.ShufflePipeline.from_id(p.pipeline_id) for filename in p.outputs.default.value: self.assertEqual(0, cloudstorage.stat(filename).st_size) self.assertEquals(1, len(self.emails))
def get(self): try: file_name = self.request.get('filename') assert file_name info = gcs.stat('/%s/%s' % (bucket_name, file_name)) assert info is not None blob_key = blobstore.create_gs_key('/gs/%s/%s' % (bucket_name, file_name)) assert blob_key is not None except Exception as e: self.response.set_status(400) else: self.response.headers['X-AppEngine-BlobKey'] = blob_key self.response.headers['Content-Type'] = info.content_type.encode() self.response.headers['Content-Disposition'] = ( 'attachment; filename="%s"' % file_name).encode()
def get_database_file(): bucket = '/' + constants.GEOLOCATION_MAXMIND_GCS_BUCKET bucket_path = bucket + '/' + constants.GEOLOCATION_MAXMIND_BUCKET_PATH filename = bucket_path + '/' + constants.GEOLOCATION_MAXMIND_CITY_FILE logging.info('MaxMind database GCS path is: %s', filename) try: file_stat = gcs.stat(filename) database_file = gcs.open(filename) logging.info( 'MaxMind database file creation time is: %s', datetime.datetime.fromtimestamp( file_stat.st_ctime).strftime('%Y-%m-%d %H:%M:%S')) except gcs.NotFoundError: logging.error('MaxMind database file not found in GCS: %s', filename) raise return database_file
def split_input(cls, mapper_spec): """Returns a list of shard_count input_spec_shards for input_spec. Args: mapper_spec: The mapper specification to split from. Must contain 'file_paths' parameter with one or more file paths. Returns: A list of GoogleStorageLineInputReader corresponding to the specified shards. """ params = _get_params(mapper_spec) file_paths = params[cls.FILE_PATHS_PARAM] if isinstance(file_paths, basestring): # This is a mechanism to allow multiple file paths (which do not contain # commas) in a single string. It may go away. file_paths = file_paths.split(",") file_sizes = {} for file_path in file_paths: fstat = cloudstorage.stat(file_path) file_sizes[file_path] = fstat.st_size shard_count = min(cls._MAX_SHARD_COUNT, mapper_spec.shard_count) shards_per_file = shard_count // len(file_paths) if shards_per_file == 0: shards_per_file = 1 chunks = [] for file_path, file_size in file_sizes.items(): file_chunk_size = file_size // shards_per_file for i in xrange(shards_per_file - 1): chunks.append(GoogleStorageLineInputReader.from_json( {cls.FILE_PATH_PARAM: file_path, cls.INITIAL_POSITION_PARAM: file_chunk_size * i, cls.END_POSITION_PARAM: file_chunk_size * (i + 1)})) chunks.append(GoogleStorageLineInputReader.from_json( {cls.FILE_PATH_PARAM: file_path, cls.INITIAL_POSITION_PARAM: file_chunk_size * (shards_per_file - 1), cls.END_POSITION_PARAM: file_size})) return chunks
def futuregcsfileshardedpagemap(pagemapf=None, gcspath=None, pagesize=100, onsuccessf=None, onfailuref=None, onprogressf = None, onallchildsuccessf = None, initialresult = None, oncombineresultsf = None, weight = None, parentkey=None, **taskkwargs): def MapOverRange(futurekey, startbyte, endbyte, weight, **kwargs): logdebug("Enter MapOverRange: %s, %s, %s" % (startbyte, endbyte, weight)) linitialresult = initialresult if not initialresult is None else 0 loncombineresultsf = oncombineresultsf if oncombineresultsf else lambda a, b: a + b try: # open file at gcspath for read with gcs.open(gcspath) as gcsfile: page, ranges = hwalk(gcsfile, pagesize, 2, startbyte, endbyte) if pagemapf: lonallchildsuccessf = GenerateOnAllChildSuccess(futurekey, linitialresult, loncombineresultsf) taskkwargs["futurename"] = "pagemap %s of %s,%s" % (len(page), startbyte, endbyte) future(pagemapf, parentkey=futurekey, onallchildsuccessf=lonallchildsuccessf, weight = len(page), **taskkwargs)(page) else: setlocalprogress(futurekey, len(page)) if ranges: newweight = (weight - len(page)) / len(ranges) if not weight is None else None for arange in ranges: taskkwargs["futurename"] = "shard %s" % (arange) lonallchildsuccessf = GenerateOnAllChildSuccess(futurekey, linitialresult if pagemapf else len(page), loncombineresultsf) future(MapOverRange, parentkey=futurekey, onallchildsuccessf=lonallchildsuccessf, weight = newweight, **taskkwargs)(arange[0], arange[1], weight = newweight) if ranges or pagemapf: raise FutureReadyForResult("still going") else: return len(page) finally: logdebug("Leave MapOverRange: %s, %s, %s" % (startbyte, endbyte, weight)) # get length of file in bytes filestat = gcs.stat(gcspath) filesizebytes = filestat.st_size futurename = "top level 0 to %s" % (filesizebytes) taskkwargscopy = dict(taskkwargs) taskkwargscopy["futurename"] = taskkwargscopy.get("futurename", futurename) return future(MapOverRange, onsuccessf = onsuccessf, onfailuref = onfailuref, onprogressf = onprogressf, onallchildsuccessf=onallchildsuccessf, parentkey=parentkey, weight = weight, **taskkwargscopy)(0, filesizebytes, weight)
def detectBillFileType(self, request): accountId = int(request.accountId) accountKey = Key(Account, accountId) billId = request.billId billKey = Key(Bill, billId, parent=accountKey) billfileId = request.billfileToDetect billFile = Key(BillFile, billfileId, parent=billKey).get() # Build file path filepath = getFilepath(str(accountId), billId, billfileId, billFile.name) filestat = gcs.stat('/' + settings.FILE_BUCKET + filepath) billFile.file_type = filestat.content_type billFile.put() return StringMessage(data='Detected:' + billFile.file_type)
def testGzip(self): with cloudstorage.open(TESTFILE, 'w', 'text/plain', {'content-encoding': 'gzip'}) as f: gz = gzip.GzipFile('', 'wb', 9, f) gz.write('a' * 1024) gz.write('b' * 1024) gz.close() stat = cloudstorage.stat(TESTFILE) self.assertEqual('text/plain', stat.content_type) self.assertEqual('gzip', stat.metadata['content-encoding']) self.assertEqual(37, stat.st_size) with cloudstorage.open(TESTFILE) as f: gz = gzip.GzipFile('', 'rb', 9, f) result = gz.read(10) self.assertEqual('a' * 10, result) self.assertEqual('a' * 1014 + 'b' * 1024, gz.read())
def _copy_gcs_file(m): if not m.gcs_filename: return if m.gcs_filename.endswith(".jpeg") or m.gcs_filename.endswith( ".png") or m.gcs_filename.endswith(".gif"): return old_gcs_filename = m.gcs_filename old_gcs_stats = cloudstorage.stat(old_gcs_filename) content_type = old_gcs_stats.content_type filename = '%s.%s' % (old_gcs_filename, get_extension_for_content_type(content_type)) with cloudstorage.open(old_gcs_filename, 'r') as gcs_file: with cloudstorage.open(filename, 'w', content_type) as f: f.write(gcs_file.read()) m.gcs_filename = filename m.put()
def isfile(self, filepath): """Checks if the file with the given filepath exists in the GCS. Args: filepath: str. The path to the relevant file within the exploration. Returns: bool. Whether the file exists in GCS. """ bucket_name = app_identity_services.get_gcs_resource_bucket_name() # Upload to GCS bucket with filepath # "<bucket>/<exploration-id>/assets/<filepath>". gcs_file_url = ('/%s/%s/assets/%s' % (bucket_name, self._exploration_id, filepath)) try: return cloudstorage.stat(gcs_file_url, retry_params=None) except cloudstorage.NotFoundError: return False
def post(self): if self.request.get('_delete'): self.delete() return tags = clean_tags(self.request.get('tags')) if not tags: api.write_error(self.response, 400, 'Missing required parameter: tags') return tags.append(api.get_geo_name(self.request)) try: uploaded_file = self.request.POST['file'] if not uploaded_file.type: api.write_error(self.response, 400, 'Missing content type') return except: uploaded_file = None if uploaded_file == None: api.write_error(self.response, 400, 'Missing content') return recording_id = str(uuid.uuid4()) filename = config.RECORDINGS_BUCKET + recording_id gcs_file = gcs.open(filename, mode='w', content_type=uploaded_file.type) gcs_file.write(uploaded_file.file.read()) gcs_file.close() stat = gcs.stat(filename) recording = Recording(uuid=recording_id, tags=tags, duration=stat.st_size / config.SAMPLES_PER_SEC) recording.put_async() api.write_message(self.response, 'success')
def _next_seg(self): """Get next seg.""" if self._seg: self._seg.close() self._seg_index += 1 if self._seg_index > self._last_seg_index: self._seg = None return filename = self._seg_prefix + str(self._seg_index) stat = cloudstorage.stat(filename) writer = output_writers._GoogleCloudStorageOutputWriter if writer._VALID_LENGTH not in stat.metadata: raise ValueError("Expect %s in metadata for file %s." % (writer._VALID_LENGTH, filename)) self._seg_valid_length = int(stat.metadata[writer._VALID_LENGTH]) if self._seg_valid_length > stat.st_size: raise ValueError( "Valid length %s is too big for file %s of length %s" % (self._seg_valid_length, filename, stat.st_size)) self._seg = cloudstorage.open(filename)
def read_photo_from_storage(photo, label, response): bucket_name = os.environ.get('BUCKET_NAME', app_identity.get_default_gcs_bucket_name()) filename = format_photo_file_name(bucket_name, photo.created_by_user_id, photo.sha256, label) try: file_stat = gcs.stat(filename) gcs_file = gcs.open(filename) response.headers['Content-Type'] = file_stat.content_type response.headers[ 'Cache-Control'] = 'private, max-age=31536000' # cache for upto 1 year response.headers['ETag'] = file_stat.etag response.write(gcs_file.read()) gcs_file.close() except gcs.NotFoundError: logging.exception("Fail to read photo file") response.status = 404 response.write('photo file not found')
def gcsfileshardedpagemap(pagemapf=None, gcspath=None, initialshards = 10, pagesize = 100, **taskkwargs): @task(**taskkwargs) def MapOverRange(startpos, endpos, **kwargs): logdebug("Enter MapOverRange: %s, %s" % (startpos, endpos)) # open file at gcspath for read with gcs.open(gcspath) as gcsfile: page, ranges = hwalk(gcsfile, pagesize, initialshards, startpos, endpos) if ranges: for arange in ranges: MapOverRange(arange[0], arange[1]) if pagemapf: pagemapf(page) logdebug("Leave MapOverRange: %s, %s" % (startpos, endpos)) # get length of file in bytes filestat = gcs.stat(gcspath) MapOverRange(0, filestat.st_size)
def testWriterMetadata(self): test_acl = "test-acl" test_content_type = "test-mime" mapreduce_state = self.create_mapreduce_state( output_params={ self.WRITER_CLS.BUCKET_NAME_PARAM: "test", self.WRITER_CLS.ACL_PARAM: test_acl, self.WRITER_CLS.CONTENT_TYPE_PARAM: test_content_type }) shard_state = self.create_shard_state(0) ctx = context.Context(mapreduce_state.mapreduce_spec, shard_state) context.Context._set(ctx) writer = self.WRITER_CLS.create(mapreduce_state.mapreduce_spec, shard_state.shard_number, 0) writer = self.WRITER_CLS.from_json(writer.to_json()) writer.finalize(ctx, shard_state) filename = self.WRITER_CLS._get_filename(shard_state) file_stat = cloudstorage.stat(filename) self.assertEqual(test_content_type, file_stat.content_type)
def _stat_file(self, bucketFile, ftype=None, cache=True): #log.info("_stat_file(%s,%s,%s)" % (bucketFile, ftype, cache)) ret = None if cache: item = self.readCache(bucketFile,ftype) if item: ret = item.stat log.info("Got from readCache") if not ret: #log.info('Stating file {}'.format(bucketFile)) try: ret = cloudstorage.stat(bucketFile) except cloudstorage.NotFoundError: log.info("File not found: %s" % bucketFile) except Exception as e: log.info("Stat error(%s): %s" % (bucketFile,e)) if ret: log.info("Stat {}".format(ret)) itm = bucketCacheItem(ret,None) self.writeCache(bucketFile, itm, ftype) return ret
def AuthenticatedGet(self, org, event): filename = all_event_timeless_filename(event) bucket_path = BUCKET_NAME + '/' + filename try: file_stat = cloudstorage.stat(bucket_path) except cloudstorage.NotFoundError: self.abort(404) # rewrite filename to include timestamp custom_timestamp = timestamp( datetime.datetime.utcfromtimestamp(file_stat.st_ctime)) filename_to_serve = file_stat.filename.replace( '.csv', '-%s.csv' % custom_timestamp) # serve the file as an attachment, forcing download gcs_fd = cloudstorage.open(bucket_path) if file_stat.content_type: self.response.headers['Content-Type'] = file_stat.content_type self.response.headers['Content-Disposition'] = (str( 'attachment; filename="%s"' % filename_to_serve)) self.response.write(gcs_fd.read())
def gcs_stat(filename, *args, **kwargs): return gcs.stat(gcs_filename(filename), *args, **kwargs)
def exists(filename): try: cloudstorage.stat(filename) return True except cloudstorage.NotFoundError: return False
def stat(filename): try: return cloudstorage.stat(filename) except cloudstorage.NotFoundError: raise IOError('File {} not found.'.format(filename))
def exists(self, name): try: cloudstorage.stat(self.path(name)) return True except cloudstorage.NotFoundError: return False
def StatGCS(fullurl): try: return gcs.stat(_remove_gcs_prefix(fullurl)) except gcs.NotFoundError: return None
def serve_file(request, blob_key_or_info, as_download=False, content_type=None, filename=None, offset=None, size=None): """ Serves a file from the blobstore, reads most of the data from the blobinfo by default but you can override stuff by passing kwargs. You can also pass a Google Cloud Storage filename as `blob_key_or_info` to use Blobstore API to serve the file: https://cloud.google.com/appengine/docs/python/blobstore/#Python_Using_the_Blobstore_API_with_Google_Cloud_Storage """ if isinstance(blob_key_or_info, BlobKey): info = BlobInfo.get(blob_key_or_info) blob_key = blob_key_or_info elif isinstance(blob_key_or_info, basestring): info = BlobInfo.get(BlobKey(blob_key_or_info)) blob_key = BlobKey(blob_key_or_info) elif isinstance(blob_key_or_info, BlobInfo): info = blob_key_or_info blob_key = info.key() else: raise ValueError("Invalid type %s" % blob_key_or_info.__class__) if info == None: # Lack of blobstore_info means this is a Google Cloud Storage file if has_cloudstorage: cached_value = _get_from_cache(blob_key_or_info) if cached_value: blob_key, info = cached_value else: info = cloudstorage.stat(blob_key_or_info) info.size = info.st_size blob_key = create_gs_key('/gs{0}'.format(blob_key_or_info)) _add_to_cache(blob_key_or_info, blob_key, info) else: raise ImportError( "To serve a Cloud Storage file you need to install cloudstorage" ) response = HttpResponse(content_type=content_type or info.content_type) response[BLOB_KEY_HEADER] = str(blob_key) response['Accept-Ranges'] = 'bytes' http_range = request.META.get('HTTP_RANGE') if offset or size: # Looks a little bonkers, but basically create the HTTP range string, we cast to int first to make sure # nothing funky gets into the headers http_range = "{}-{}".format( str(int(offset)) if offset else "", str(int(offset or 0) + size) if size else "") if http_range is not None: response[BLOB_RANGE_HEADER] = http_range if as_download: response['Content-Disposition'] = smart_str( u'attachment; filename="%s"' % (filename or info.filename)) elif filename: raise ValueError( "You can't specify a filename without also specifying as_download") if info.size is not None: response['Content-Length'] = info.size return response
def size(self, name): filestat = cloudstorage.stat(self.path(name)) return filestat.st_size
def created_time(self, name): filestat = cloudstorage.stat(self.path(name)) creation_date = timezone.datetime.fromtimestamp(filestat.st_ctime) return timezone.make_aware(creation_date, timezone.get_current_timezone())