def get(self): namespace = self.request.get('namespace', 'default-gzip') digest = self.request.get('digest', '') content = None if digest and namespace: try: raw_data, entity = model.get_content(namespace, digest) except ValueError: self.abort(400, 'Invalid key') except LookupError: self.abort(404, 'Unable to retrieve the entry') if not raw_data: stream = gcs.read_file(config.settings().gs_bucket, entity.key.id()) else: stream = [raw_data] content = ''.join(model.expand_content(namespace, stream)) self.response.headers['X-Frame-Options'] = 'SAMEORIGIN' # We delete Content-Type before storing to it to avoid having two (yes, # two) Content-Type headers. del self.response.headers['Content-Type'] # Apparently, setting the content type to text/plain encourages the # browser (Chrome, at least) to sniff the mime type and display # things like images. Images are autowrapped in <img> and text is # wrapped in <pre>. self.response.headers['Content-Type'] = 'text/plain; charset=utf-8' self.response.headers['Content-Disposition'] = str('filename=%s' % digest) if content.startswith('{'): # Try to format as JSON. try: content = json.dumps( json.loads(content), sort_keys=True, indent=2, separators=(',', ': ')) # If we don't wrap this in html, browsers will put content in a pre # tag which is also styled with monospace/pre-wrap. We can't use # anchor tags in <pre>, so we force it to be a <div>, which happily # accepts links. content = ( '<div style="font-family:monospace;white-space:pre-wrap;">%s</div>' % content) # Linkify things that look like hashes content = re.sub(r'([0-9a-f]{40})', r'<a target="_blank" href="/browse?namespace=%s' % namespace + r'&digest=\1">\1</a>', content) self.response.headers['Content-Type'] = 'text/html; charset=utf-8' except ValueError: pass self.response.write(content)
def post(self, namespace, hash_key): entry = model.entry_key(namespace, hash_key).get() if not entry: logging.error('Failed to find entity') return if entry.is_verified: logging.warning('Was already verified') return if entry.content is not None: logging.error('Should not be called with inline content') return # Get GS file size. gs_bucket = config.settings().gs_bucket gs_file_info = gcs.get_file_info(gs_bucket, entry.key.id()) # It's None if file is missing. if not gs_file_info: # According to the docs, GS is read-after-write consistent, so a file is # missing only if it wasn't stored at all or it was deleted, in any case # it's not a valid ContentEntry. self.purge_entry(entry, 'No such GS file') return # Expected stored length and actual length should match. if gs_file_info.size != entry.compressed_size: self.purge_entry(entry, 'Bad GS file: expected size is %d, actual size is %d', entry.compressed_size, gs_file_info.size) return save_to_memcache = ( entry.compressed_size <= model.MAX_MEMCACHE_ISOLATED and entry.is_isolated) expanded_size = 0 digest = model.get_hash_algo(namespace) data = None try: # Start a loop where it reads the data in block. stream = gcs.read_file(gs_bucket, entry.key.id()) if save_to_memcache: # Wraps stream with a generator that accumulates the data. stream = Accumulator(stream) for data in model.expand_content(namespace, stream): expanded_size += len(data) digest.update(data) # Make sure the data is GC'ed. del data # Hashes should match. if digest.hexdigest() != hash_key: self.purge_entry(entry, 'SHA-1 do not match data (%d bytes, %d bytes expanded)', entry.compressed_size, expanded_size) return except gcs.NotFoundError as e: # Somebody deleted a file between get_file_info and read_file calls. self.purge_entry(entry, 'File was unexpectedly deleted') return except (gcs.ForbiddenError, gcs.AuthorizationError) as e: # Misconfiguration in Google Storage ACLs. Don't delete an entry, it may # be fine. Maybe ACL problems would be fixed before the next retry. logging.warning( 'CloudStorage auth issues (%s): %s', e.__class__.__name__, e) # Abort so the job is retried automatically. return self.abort(500) except (gcs.FatalError, zlib.error, IOError) as e: # ForbiddenError and AuthorizationError inherit FatalError, so this except # block should be last. # It's broken or unreadable. self.purge_entry(entry, 'Failed to read the file (%s): %s', e.__class__.__name__, e) return # Verified. Data matches the hash. entry.expanded_size = expanded_size entry.is_verified = True future = entry.put_async() logging.info( '%d bytes (%d bytes expanded) verified', entry.compressed_size, expanded_size) if save_to_memcache: model.save_in_memcache(namespace, hash_key, ''.join(stream.accumulated)) future.wait()
def get(self): namespace = self.request.get('namespace', 'default-gzip') digest = self.request.get('digest', '') content = None if not digest: self.abort(400, 'Missing digest') if not namespace: self.abort(400, 'Missing namespace') try: raw_data, entity = model.get_content(namespace, digest) except ValueError: self.abort(400, 'Invalid key') except LookupError: self.abort(404, 'Unable to retrieve the entry') logging.info('%s', entity) if not raw_data: try: stream = gcs.read_file(config.settings().gs_bucket, entity.key.id()) content = ''.join(model.expand_content(namespace, stream)) except cloudstorage.NotFoundError: logging.error( 'Entity in DB but not in GCS: deleting entity in DB') entity.key.delete() self.abort(404, 'Unable to retrieve the file from GCS') else: content = ''.join(model.expand_content(namespace, [raw_data])) self.response.headers['X-Frame-Options'] = 'SAMEORIGIN' # We delete Content-Type before storing to it to avoid having two (yes, # two) Content-Type headers. del self.response.headers['Content-Type'] # Apparently, setting the content type to text/plain encourages the # browser (Chrome, at least) to sniff the mime type and display # things like images. Images are autowrapped in <img> and text is # wrapped in <pre>. self.response.headers['Content-Type'] = 'text/plain; charset=utf-8' # App Engine puts a limit of 33554432 bytes on a request, which includes # headers. Headers are ~150 bytes. If the content + headers might # exceed that limit, we give the user an option to workround getting # their file. if len(content) > 33554000: host = modules.get_hostname(module='default', version='default') # host is something like default.default.myisolateserver.appspot.com host = host.replace('default.default.', '') sizeInMib = len(content) / (1024.0 * 1024.0) content = ( 'Sorry, your file is %1.1f MiB big, which exceeds the 32 MiB' ' App Engine limit.\nTo work around this, run the following command:\n' ' python isolateserver.py download -I %s --namespace %s -f %s %s' % (sizeInMib, host, namespace, digest, digest)) else: self.response.headers['Content-Disposition'] = str( 'filename=%s' % self.request.get('as') or digest) try: json_data = json.loads(content) if self._is_isolated_format(json_data): self.response.headers[ 'Content-Type'] = 'text/html; charset=utf-8' json_data['files'] = collections.OrderedDict( sorted(json_data['files'].items(), key=lambda (filepath, data): filepath)) params = { 'namespace': namespace, 'isolated': json_data, } content = template.render('isolate/isolated.html', params) except ValueError: pass self.response.write(content)
def post(self, namespace, hash_key): original_request = self.request.get('req') entry = model.get_entry_key(namespace, hash_key).get() if not entry: logging.error('Failed to find entity\n%s', original_request) return if entry.is_verified: logging.warning('Was already verified\n%s', original_request) return if entry.content is not None: logging.error('Should not be called with inline content\n%s', original_request) return # Get GS file size. gs_bucket = config.settings().gs_bucket gs_file_info = gcs.get_file_info(gs_bucket, entry.key.id()) # It's None if file is missing. if not gs_file_info: # According to the docs, GS is read-after-write consistent, so a file is # missing only if it wasn't stored at all or it was deleted, in any case # it's not a valid ContentEntry. self.purge_entry(entry, 'No such GS file\n%s', original_request) return # Expected stored length and actual length should match. if gs_file_info.size != entry.compressed_size: self.purge_entry( entry, 'Bad GS file: expected size is %d, actual size is %d\n%s', entry.compressed_size, gs_file_info.size, original_request) return save_to_memcache = ( entry.compressed_size <= model.MAX_MEMCACHE_ISOLATED and entry.is_isolated) expanded_size = 0 digest = hashlib.sha1() data = None try: # Start a loop where it reads the data in block. stream = gcs.read_file(gs_bucket, entry.key.id()) if save_to_memcache: # Wraps stream with a generator that accumulates the data. stream = Accumulator(stream) for data in model.expand_content(namespace, stream): expanded_size += len(data) digest.update(data) # Make sure the data is GC'ed. del data # Hashes should match. if digest.hexdigest() != hash_key: self.purge_entry( entry, 'SHA-1 do not match data\n' '%d bytes, %d bytes expanded, expected %d bytes\n%s', entry.compressed_size, expanded_size, entry.expanded_size, original_request) return except gcs.NotFoundError as e: # Somebody deleted a file between get_file_info and read_file calls. self.purge_entry(entry, 'File was unexpectedly deleted\n%s', original_request) return except (gcs.ForbiddenError, gcs.AuthorizationError) as e: # Misconfiguration in Google Storage ACLs. Don't delete an entry, it may # be fine. Maybe ACL problems would be fixed before the next retry. logging.warning('CloudStorage auth issues (%s): %s', e.__class__.__name__, e) # Abort so the job is retried automatically. return self.abort(500) except (gcs.FatalError, zlib.error, IOError) as e: # ForbiddenError and AuthorizationError inherit FatalError, so this except # block should be last. # It's broken or unreadable. self.purge_entry(entry, 'Failed to read the file (%s): %s\n%s', e.__class__.__name__, e, original_request) return # Verified. Data matches the hash. entry.expanded_size = expanded_size entry.is_verified = True future = entry.put_async() logging.info('%d bytes (%d bytes expanded) verified\n%s', entry.compressed_size, expanded_size, original_request) if save_to_memcache: model.save_in_memcache(namespace, hash_key, ''.join(stream.accumulated)) future.wait()
def get(self): namespace = self.request.get('namespace', 'default-gzip') digest = self.request.get('digest', '') content = None if digest and namespace: try: raw_data, entity = model.get_content(namespace, digest) except ValueError: self.abort(400, 'Invalid key') except LookupError: self.abort(404, 'Unable to retrieve the entry') if not raw_data: stream = gcs.read_file(config.settings().gs_bucket, entity.key.id()) else: stream = [raw_data] content = ''.join(model.expand_content(namespace, stream)) self.response.headers['X-Frame-Options'] = 'SAMEORIGIN' # We delete Content-Type before storing to it to avoid having two (yes, # two) Content-Type headers. del self.response.headers['Content-Type'] # Apparently, setting the content type to text/plain encourages the # browser (Chrome, at least) to sniff the mime type and display # things like images. Images are autowrapped in <img> and text is # wrapped in <pre>. self.response.headers['Content-Type'] = 'text/plain; charset=utf-8' # App Engine puts a limit of 33554432 bytes on a request, which includes # headers. Headers are ~150 bytes. If the content + headers might # exceed that limit, we give the user an option to workround getting # their file. if len(content) > 33554000: host = modules.get_hostname(module='default', version='default') # host is something like default.default.myisolateserver.appspot.com host = host.replace('default.default.', '') sizeInMib = len(content) / (1024.0 * 1024.0) content = ( 'Sorry, your file is %1.1f MiB big, which exceeds the 32 MiB' ' App Engine limit.\nTo work around this, run the following command:\n' ' python isolateserver.py download -I %s --namespace %s -f %s %s' % (sizeInMib, host, namespace, digest, digest)) else: self.response.headers['Content-Disposition'] = str( 'filename=%s' % digest) if content.startswith('{'): # Try to format as JSON. try: content = json.dumps(json.loads(content), sort_keys=True, indent=2, separators=(',', ': ')) # If we don't wrap this in html, browsers will put content in a pre # tag which is also styled with monospace/pre-wrap. We can't use # anchor tags in <pre>, so we force it to be a <div>, which happily # accepts links. content = ( '<div style="font-family:monospace;white-space:pre-wrap;">%s' '</div>' % content) # Linkify things that look like hashes content = re.sub( r'([0-9a-f]{40})', r'<a target="_blank" href="/browse?namespace=%s' % namespace + r'&digest=\1">\1</a>', content) self.response.headers[ 'Content-Type'] = 'text/html; charset=utf-8' except ValueError: pass self.response.write(content)