def Get(self): path = self._request.path if path.startswith('_'): servlet_path = path[1:] if not '/' in servlet_path: servlet_path += '/' servlet_name, servlet_path = servlet_path.split('/', 1) if servlet_name == _FORCE_CRON_TARGET: queue = taskqueue.Queue() queue.purge() time.sleep(2) queue.add(taskqueue.Task(url='/_cron')) return Response.Ok('Cron job started.') if servlet_name == 'enqueue': queue = taskqueue.Queue() queue.add(taskqueue.Task(url='/%s' % servlet_path)) return Response.Ok('Task enqueued.') servlet = _SERVLETS.get(servlet_name) if servlet is None: return Response.NotFound('"%s" servlet not found' % servlet_path) else: servlet_path = path servlet = _DEFAULT_SERVLET return servlet( Request(servlet_path, self._request.host, self._request.headers, self._request.arguments)).Get()
def Get(self): link_error_tests = ('broken_links', 'orphaned_pages', 'link_errors') if not self._request.path in link_error_tests: return Response.NotFound( 'Test %s not found. Available tests are: %s' % (self._request.path, ','.join(link_error_tests))) constructor = InstanceServlet.GetConstructor(self._delegate) def renderer(path): return constructor(Request(path, '', self._request.headers)).Get() link_tester = BrokenLinkTester( InstanceServletRenderServletDelegate( self._delegate).CreateServerInstance(), renderer) if self._request.path == 'broken_links': errors, content = link_tester.TestBrokenLinks() elif self._request.path == 'orphaned_pages': errors, content = link_tester.TestOrphanedPages() else: link_errors, link_content = link_tester.TestBrokenLinks() orphaned_errors, orphaned_content = link_tester.TestOrphanedPages() errors = link_errors + orphaned_errors content = "%s\n%s" % (link_content, orphaned_content) if errors: return Response.InternalError(content=content) return Response.Ok(content="%s test passed." % self._request.path)
def _GetSuccessResponse(self, path, server_instance): '''Returns the Response from trying to render |path| with |server_instance|. If |path| isn't found then a FileNotFoundError will be raised, such that the only responses that will be returned from this method are Ok and Redirect. ''' content_provider, path = ( server_instance.content_providers.GetByServeFrom(path)) assert content_provider, 'No ContentProvider found for %s' % path redirect = Redirector(server_instance.compiled_fs_factory, content_provider.file_system).Redirect( self._request.host, path) if redirect is not None: return Response.Redirect(redirect, permanent=False) content_and_type = content_provider.GetContentAndType( self._request.host, path).Get() if not content_and_type.content: logging.error('%s had empty content' % path) if isinstance(content_and_type.content, Handlebar): content_and_type.content = server_instance.template_renderer.Render( content_and_type.content, self._request) return Response.Ok(content_and_type.content, headers=_MakeHeaders(content_and_type.content_type))
def _GetSuccessResponse(self, request_path, server_instance): '''Returns the Response from trying to render |path| with |server_instance|. If |path| isn't found then a FileNotFoundError will be raised, such that the only responses that will be returned from this method are Ok and Redirect. ''' content_provider, serve_from, path = ( server_instance.content_providers.GetByServeFrom(request_path)) assert content_provider, 'No ContentProvider found for %s' % path redirect = Redirector( server_instance.compiled_fs_factory, content_provider.file_system).Redirect(self._request.host, path) if redirect is not None: # Absolute redirects stay absolute, relative redirects are relative to # |serve_from|; all redirects eventually need to be *served* as absolute. if not redirect.startswith('/'): redirect = '/' + posixpath.join(serve_from, redirect) return Response.Redirect(redirect, permanent=False) canonical_path = content_provider.GetCanonicalPath(path) if canonical_path != path: redirect_path = posixpath.join(serve_from, canonical_path) return Response.Redirect('/' + redirect_path, permanent=False) if request_path.endswith('/'): # Directory request hasn't been redirected by now. Default behaviour is # to redirect as though it were a file. return Response.Redirect('/' + request_path.rstrip('/'), permanent=False) if not path: # Empty-path request hasn't been redirected by now. It doesn't exist. raise FileNotFoundError('Empty path') content_and_type = content_provider.GetContentAndType(path).Get() if not content_and_type.content: logging.error('%s had empty content' % path) content = content_and_type.content if isinstance(content, Handlebar): template_content, template_warnings = ( server_instance.template_renderer.Render(content, self._request)) # HACK: the site verification file (google2ed...) doesn't have a title. content, doc_warnings = server_instance.document_renderer.Render( template_content, path, render_title=path != SITE_VERIFICATION_FILE) warnings = template_warnings + doc_warnings if warnings: sep = '\n - ' logging.warning('Rendering %s:%s%s' % (path, sep, sep.join(warnings))) content_type = content_and_type.content_type if isinstance(content, unicode): content = content.encode('utf-8') content_type += '; charset=utf-8' return Response.Ok(content, headers=_MakeHeaders(content_type))
def Get(self): access_token = self.GetAccessToken() if not access_token: return Response.Unauthorized('Unauthorized', 'Bearer', 'update') if not self._delegate.IsAuthorized(access_token): return Response.Forbidden('Forbidden') result = memcache.flush_all() return Response.Ok('Flushed: %s' % result)
def _GetImpl(self): # Cron strategy: # # Collect all DataSources, the PlatformBundle, the ContentProviders, and # any other statically renderered contents (e.g. examples content), # and spin up taskqueue tasks which will refresh any cached data relevant # to these assets. # # TODO(rockot/kalman): At the moment examples are not actually refreshed # because they're too slow. _log.info('starting') server_instance = self._GetSafeServerInstance() master_fs = server_instance.host_file_system_provider.GetMaster() master_commit = master_fs.GetCommitID().Get() # This is the guy that would be responsible for refreshing the cache of # examples. Here for posterity, hopefully it will be added to the targets # below someday. render_refresher = RenderRefresher(server_instance, self._request) # Get the default taskqueue queue = taskqueue.Queue() # GAE documentation specifies that it's bad to add tasks to a queue # within one second of purging. We wait 2 seconds, because we like # to go the extra mile. queue.purge() time.sleep(2) success = True try: data_sources = CreateDataSources(server_instance) targets = ( data_sources.items() + [('content_providers', server_instance.content_providers), ('platform_bundle', server_instance.platform_bundle)]) title = 'initializing %s parallel targets' % len(targets) _log.info(title) timer = Timer() for name, target in targets: refresh_paths = target.GetRefreshPaths() for path in refresh_paths: queue.add( taskqueue.Task(url='/_refresh/%s/%s' % (name, path), params={'commit': master_commit})) _log.info('%s took %s' % (title, timer.Stop().FormatElapsed())) except: # This should never actually happen (each cron step does its own # conservative error checking), so re-raise no matter what it is. _log.error('uncaught error: %s' % traceback.format_exc()) success = False raise finally: _log.info('finished (%s)', 'success' if success else 'FAILED') return (Response.Ok('Success') if success else Response.InternalError('Failure'))
def Get(self): commit_tracker = self._delegate.CreateCommitTracker() commit_name, commit_id = self._request.path.split('/', 1) history = commit_tracker.GetHistory(commit_name).Get() if not any(entry.commit_id == commit_id for entry in history): return Response.BadRequest('Commit %s not cached.' % commit_id) commit_tracker.Set(commit_name, commit_id).Get() return Response.Ok('Commit "%s" updated to %s' % (commit_name, commit_id))
def _GetImpl(self): path = self._request.path.strip('/') parts = self._request.path.split('/', 1) source_name = parts[0] if len(parts) == 2: source_path = parts[1] else: source_path = None _log.info('starting refresh of %s DataSource %s' % (source_name, '' if source_path is None else '[%s]' % source_path)) if 'commit' in self._request.arguments: commit = self._request.arguments['commit'] else: _log.warning('No commit given; refreshing from master. ' 'This is probably NOT what you want.') commit = None server_instance = self._CreateServerInstance(commit) success = True try: if source_name == 'platform_bundle': data_source = server_instance.platform_bundle elif source_name == 'content_providers': data_source = server_instance.content_providers else: data_source = CreateDataSource(source_name, server_instance) class_name = data_source.__class__.__name__ refresh_future = data_source.Refresh(source_path) assert isinstance(refresh_future, Future), ( '%s.Refresh() did not return a Future' % class_name) timer = Timer() try: refresh_future.Get() except Exception as e: _log.error('%s: error %s' % (class_name, traceback.format_exc())) success = False if IsFileSystemThrottledError(e): return Response.ThrottledError('Throttled') raise finally: _log.info('Refreshing %s took %s' % (class_name, timer.Stop().FormatElapsed())) except: success = False # This should never actually happen. _log.error('uncaught error: %s' % traceback.format_exc()) raise finally: _log.info('finished (%s)', 'success' if success else 'FAILED') return (Response.Ok('Success') if success else Response.InternalError('Failure'))
def Get(self): if not IsDevServer(): return Response.BadRequest('') import cPickle from persistent_object_store_appengine import PersistentObjectStoreAppengine with open(self._request.path, 'r') as f: data = cPickle.load(f) for namespace, contents in data.iteritems(): store = PersistentObjectStoreAppengine(namespace) for k, v in cPickle.loads(contents).iteritems(): try: store.Set(k, v).Get() except: logging.warn('Skipping entry %s because of errors.' % k) return Response.Ok('Data pushed!')
def Get(self): object_store_creator = ObjectStoreCreator(start_empty=False) refresh_tracker = RefreshTracker(object_store_creator) commit_id = self._request.path work_order = refresh_tracker._GetWorkOrder(commit_id).Get() task_names = ['%s@%s' % (commit_id, task) for task in work_order.tasks] completions = refresh_tracker._task_completions.GetMulti( task_names).Get() missing = [] for task in task_names: if task not in completions: missing.append(task) response = 'Missing:<br>%s' % ''.join('%s<br>' % task for task in missing) return Response.Ok(response)
def _GetSuccessResponse(self, path, server_instance): '''Returns the Response from trying to render |path| with |server_instance|. If |path| isn't found then a FileNotFoundError will be raised, such that the only responses that will be returned from this method are Ok and Redirect. ''' content_provider, serve_from, path = ( server_instance.content_providers.GetByServeFrom(path)) assert content_provider, 'No ContentProvider found for %s' % path redirect = Redirector(server_instance.compiled_fs_factory, content_provider.file_system).Redirect( self._request.host, path) if redirect is not None: return Response.Redirect(redirect, permanent=False) canonical_path = content_provider.GetCanonicalPath(path) if canonical_path != path: redirect_path = posixpath.join(serve_from, canonical_path) return Response.Redirect('/' + redirect_path, permanent=False) content_and_type = content_provider.GetContentAndType(path).Get() if not content_and_type.content: logging.error('%s had empty content' % path) content = content_and_type.content if isinstance(content, Handlebar): template_content, template_warnings = ( server_instance.template_renderer.Render( content, self._request)) # HACK: the site verification file (google2ed...) doesn't have a title. content, doc_warnings = server_instance.document_renderer.Render( template_content, path, render_title=path != SITE_VERIFICATION_FILE) warnings = template_warnings + doc_warnings if warnings: sep = '\n - ' logging.warning('Rendering %s:%s%s' % (path, sep, sep.join(warnings))) content_type = content_and_type.content_type if isinstance(content, unicode): content = content.encode('utf-8') content_type += '; charset=utf-8' return Response.Ok(content, headers=_MakeHeaders(content_type))
def Get(self): object_store_creator = ObjectStoreCreator(start_empty=False) commit_tracker = CommitTracker(object_store_creator) def generate_response(result): commit_id, history = result history_log = ''.join('%s: %s<br>' % (entry.datetime, entry.commit_id) for entry in reversed(history)) response = 'Current commit: %s<br><br>Most recent commits:<br>%s' % ( commit_id, history_log) return response commit_name = self._request.path id_future = commit_tracker.Get(commit_name) history_future = commit_tracker.GetHistory(commit_name) return Response.Ok( All((id_future, history_future)).Then(generate_response).Get())
def Get(self): path_with_channel, headers = (self._request.path, self._request.headers) # Redirect "extensions" and "extensions/" to "extensions/index.html", etc. if (os.path.splitext(path_with_channel)[1] == '' and path_with_channel.find('/') == -1): path_with_channel += '/' if path_with_channel.endswith('/'): return Response.Redirect('/%sindex.html' % path_with_channel) channel, path = BranchUtility.SplitChannelNameFromPath( path_with_channel) if channel == self._default_channel: return Response.Redirect('/%s' % path) if channel is None: channel = self._default_channel server_instance = self._delegate.CreateServerInstanceForChannel( channel) canonical_path = ( server_instance.path_canonicalizer.Canonicalize(path).lstrip('/')) if path != canonical_path: redirect_path = (canonical_path if channel is None else '%s/%s' % (channel, canonical_path)) return Response.Redirect('/%s' % redirect_path) templates = server_instance.template_data_source_factory.Create( self._request, path) content = None content_type = None try: if fnmatch(path, 'extensions/examples/*.zip'): content = server_instance.example_zipper.Create( path[len('extensions/'):-len('.zip')]) content_type = 'application/zip' elif path.startswith('extensions/examples/'): mimetype = mimetypes.guess_type(path)[0] or 'text/plain' content = server_instance.content_cache.GetFromFile( '%s/%s' % (svn_constants.DOCS_PATH, path[len('extensions/'):]), binary=_IsBinaryMimetype(mimetype)) content_type = mimetype elif path.startswith('static/'): mimetype = mimetypes.guess_type(path)[0] or 'text/plain' content = server_instance.content_cache.GetFromFile( ('%s/%s' % (svn_constants.DOCS_PATH, path)), binary=_IsBinaryMimetype(mimetype)) content_type = mimetype elif path.endswith('.html'): content = templates.Render(path) content_type = 'text/html' except FileNotFoundError as e: logging.warning(traceback.format_exc()) content = None headers = {'x-frame-options': 'sameorigin'} if content is None: doc_class = path.split('/', 1)[0] content = templates.Render('%s/404' % doc_class) if not content: content = templates.Render('extensions/404') return Response.NotFound(content, headers=headers) if not content: logging.error('%s had empty content' % path) headers.update({ 'content-type': content_type, 'cache-control': 'max-age=300', }) return Response.Ok(content, headers=headers)
def Get(self): ''' Render the page for a request. ''' # TODO(kalman): a consistent path syntax (even a Path class?) so that we # can stop being so conservative with stripping and adding back the '/'s. path = self._request.path.lstrip('/') if path.split('/')[-1] == 'redirects.json': return Response.Ok('') server_instance = self._delegate.CreateServerInstance() redirect = server_instance.redirector.Redirect(self._request.host, path) if redirect is not None: return Response.Redirect(redirect) canonical_result = server_instance.path_canonicalizer.Canonicalize( path) redirect = canonical_result.path.lstrip('/') if path != redirect: return Response.Redirect('/' + redirect, permanent=canonical_result.permanent) templates = server_instance.template_data_source_factory.Create( self._request, path) content = None content_type = None try: # At this point, any valid paths ending with '/' have been redirected. # Therefore, the response should be a 404 Not Found. if path.endswith('/'): pass elif fnmatch(path, 'extensions/examples/*.zip'): content = server_instance.example_zipper.Create( path[len('extensions/'):-len('.zip')]) content_type = 'application/zip' elif path.startswith('extensions/examples/'): mimetype = mimetypes.guess_type(path)[0] or 'text/plain' content = server_instance.host_file_system.ReadSingle( '%s/%s' % (svn_constants.DOCS_PATH, path[len('extensions/'):]), binary=_IsBinaryMimetype(mimetype)) content_type = mimetype elif path.startswith('static/'): mimetype = mimetypes.guess_type(path)[0] or 'text/plain' content = server_instance.host_file_system.ReadSingle( ('%s/%s' % (svn_constants.DOCS_PATH, path)), binary=_IsBinaryMimetype(mimetype)) content_type = mimetype elif path.endswith('.html'): content = templates.Render(path) content_type = 'text/html' except FileNotFoundError: logging.warning(traceback.format_exc()) content = None headers = {'x-frame-options': 'sameorigin'} if content is None: doc_class = path.split('/', 1)[0] content = templates.Render('%s/404' % doc_class) if not content: content = templates.Render('extensions/404') return Response.NotFound(content, headers=headers) if not content: logging.error('%s had empty content' % path) headers.update({ 'content-type': content_type, 'cache-control': 'max-age=300', }) return Response.Ok(content, headers=headers)
def _GetImpl(self): # Cron strategy: # # Find all public template files and static files, and render them. Most of # the time these won't have changed since the last cron run, so it's a # little wasteful, but hopefully rendering is really fast (if it isn't we # have a problem). channel = self._request.path.strip('/') logging.info('cron/%s: starting' % channel) server_instance = ServerInstance.CreateOnline(channel) def run_cron_for_dir(d, path_prefix=''): success = True start_time = time.time() files = [ f for f in server_instance.content_cache.GetFromFileListing(d) if not f.endswith('/') ] logging.info('cron/%s: rendering %s files from %s...' % (channel, len(files), d)) try: for i, f in enumerate(files): error = None path = '%s%s' % (path_prefix, f) try: response = RenderServlet( Request(path, self._request.headers)).Get( server_instance=server_instance) if response.status != 200: error = 'Got %s response' % response.status except DeadlineExceededError: logging.error( 'cron/%s: deadline exceeded rendering %s (%s of %s): %s' % (channel, path, i + 1, len(files), traceback.format_exc())) raise except error: pass if error: logging.error('cron/%s: error rendering %s: %s' % (channel, path, error)) success = False finally: logging.info( 'cron/%s: rendering %s files from %s took %s seconds' % (channel, len(files), d, time.time() - start_time)) return success success = True for path, path_prefix in ( # Note: rendering the public templates will pull in all of the private # templates. (svn_constants.PUBLIC_TEMPLATE_PATH, ''), # Note: rendering the public templates will have pulled in the .js and # manifest.json files (for listing examples on the API reference pages), # but there are still images, CSS, etc. (svn_constants.STATIC_PATH, 'static/'), (svn_constants.EXAMPLES_PATH, 'extensions/examples/')): try: # Note: don't try to short circuit any of this stuff. We want to run # the cron for all the directories regardless of intermediate failures. success = run_cron_for_dir(path, path_prefix=path_prefix) and success except DeadlineExceededError: success = False break logging.info('cron/%s: finished' % channel) return (Response.Ok('Success') if success else Response.InternalError('Failure'))
def Get(self, server_instance=None): path_with_channel, headers = (self._request.path.lstrip('/'), self._request.headers) # Redirect "extensions" and "extensions/" to "extensions/index.html", etc. if (os.path.splitext(path_with_channel)[1] == '' and path_with_channel.find('/') == -1): path_with_channel += '/' if path_with_channel.endswith('/'): return Response.Redirect(path_with_channel + 'index.html') channel, path = BranchUtility.SplitChannelNameFromPath( path_with_channel) if channel == _DEFAULT_CHANNEL: return Response.Redirect('/%s' % path) if channel is None: channel = _DEFAULT_CHANNEL # AppEngine instances should never need to call out to SVN. That should # only ever be done by the cronjobs, which then write the result into # DataStore, which is as far as instances look. To enable this, crons can # pass a custom (presumably online) ServerInstance into Get(). # # Why? SVN is slow and a bit flaky. Cronjobs failing is annoying but # temporary. Instances failing affects users, and is really bad. # # Anyway - to enforce this, we actually don't give instances access to SVN. # If anything is missing from datastore, it'll be a 404. If the cronjobs # don't manage to catch everything - uhoh. On the other hand, we'll figure # it out pretty soon, and it also means that legitimate 404s are caught # before a round trip to SVN. if server_instance is None: # The ALWAYS_ONLINE thing is for tests and preview.py that shouldn't need # to run the cron before rendering things. constructor = (ServerInstance.CreateOnline if _ALWAYS_ONLINE else ServerInstance.GetOrCreateOffline) server_instance = constructor(channel) canonical_path = server_instance.path_canonicalizer.Canonicalize(path) if path != canonical_path: return Response.Redirect( canonical_path if channel is None else '%s/%s' % (channel, canonical_path)) templates = server_instance.template_data_source_factory.Create( self._request, path) content = None content_type = None try: if fnmatch(path, 'extensions/examples/*.zip'): content = server_instance.example_zipper.Create( path[len('extensions/'):-len('.zip')]) content_type = 'application/zip' elif path.startswith('extensions/examples/'): mimetype = mimetypes.guess_type(path)[0] or 'text/plain' content = server_instance.content_cache.GetFromFile( '%s/%s' % (svn_constants.DOCS_PATH, path[len('extensions/'):]), binary=_IsBinaryMimetype(mimetype)) content_type = mimetype elif path.startswith('static/'): mimetype = mimetypes.guess_type(path)[0] or 'text/plain' content = server_instance.content_cache.GetFromFile( ('%s/%s' % (svn_constants.DOCS_PATH, path)), binary=_IsBinaryMimetype(mimetype)) content_type = mimetype elif path.endswith('.html'): content = templates.Render(path) content_type = 'text/html' except FileNotFoundError as e: logging.warning(traceback.format_exc()) content = None headers = {'x-frame-options': 'sameorigin'} if content is None: return Response.NotFound(templates.Render('404'), headers=headers) if not content: logging.error('%s had empty content' % path) headers.update({ 'content-type': content_type, 'cache-control': 'max-age=300', }) return Response.Ok(content, headers=headers)
def Get(self): queue = taskqueue.Queue() queue.add( taskqueue.Task(url='/%s' % self._request.path, params=self._request.arguments)) return Response.Ok('Task enqueued.')
def Get(self): ''' Render the page for a request. ''' headers = self._request.headers channel, path = BranchUtility.SplitChannelNameFromPath( self._request.path) if path.split('/')[-1] == 'redirects.json': return Response.Ok('') if channel == self._default_channel: return Response.Redirect('/' + path) if channel is None: channel = self._default_channel server_instance = self._delegate.CreateServerInstanceForChannel( channel) redirect = server_instance.redirector.Redirect(self._request.host, path) if redirect is not None: if (channel != self._default_channel and not urlsplit(redirect).scheme in ('http', 'https')): redirect = '/%s%s' % (channel, redirect) return Response.Redirect(redirect) canonical_path = server_instance.path_canonicalizer.Canonicalize(path) redirect = canonical_path.lstrip('/') if path != redirect: if channel is not None: redirect = '%s/%s' % (channel, canonical_path) return Response.Redirect('/' + redirect) templates = server_instance.template_data_source_factory.Create( self._request, path) content = None content_type = None try: if fnmatch(path, 'extensions/examples/*.zip'): content = server_instance.example_zipper.Create( path[len('extensions/'):-len('.zip')]) content_type = 'application/zip' elif path.startswith('extensions/examples/'): mimetype = mimetypes.guess_type(path)[0] or 'text/plain' content = server_instance.content_cache.GetFromFile( '%s/%s' % (svn_constants.DOCS_PATH, path[len('extensions/'):]), binary=_IsBinaryMimetype(mimetype)) content_type = mimetype elif path.startswith('static/'): mimetype = mimetypes.guess_type(path)[0] or 'text/plain' content = server_instance.content_cache.GetFromFile( ('%s/%s' % (svn_constants.DOCS_PATH, path)), binary=_IsBinaryMimetype(mimetype)) content_type = mimetype elif path.endswith('.html'): content = templates.Render(path) content_type = 'text/html' except FileNotFoundError: logging.warning(traceback.format_exc()) content = None headers = {'x-frame-options': 'sameorigin'} if content is None: doc_class = path.split('/', 1)[0] content = templates.Render('%s/404' % doc_class) if not content: content = templates.Render('extensions/404') return Response.NotFound(content, headers=headers) if not content: logging.error('%s had empty content' % path) headers.update({ 'content-type': content_type, 'cache-control': 'max-age=300', }) return Response.Ok(content, headers=headers)
def Get(self): object_store_creator = ObjectStoreCreator(start_empty=False) commit_tracker = CommitTracker(object_store_creator) return Response.Ok(commit_tracker.Get(self._request.path).Get())
def _GetImpl(self): # Cron strategy: # # Find all public template files and static files, and render them. Most of # the time these won't have changed since the last cron run, so it's a # little wasteful, but hopefully rendering is really fast (if it isn't we # have a problem). channel = self._channel logging.info('cron/%s: starting' % channel) # This is returned every time RenderServlet wants to create a new # ServerInstance. server_instance = self._GetSafeServerInstance() def get_via_render_servlet(path): return RenderServlet( Request(path, self._request.host, self._request.headers), _SingletonRenderServletDelegate(server_instance)).Get() def run_cron_for_dir(d, path_prefix=''): success = True start_time = time.time() files = [f for f in server_instance.content_cache.GetFromFileListing(d) if not f.endswith('/')] logging.info('cron/%s: rendering %s files from %s...' % ( channel, len(files), d)) try: for i, f in enumerate(files): error = None path = '%s%s' % (path_prefix, f) try: response = get_via_render_servlet(path) if response.status != 200: error = 'Got %s response' % response.status except DeadlineExceededError: logging.error( 'cron/%s: deadline exceeded rendering %s (%s of %s): %s' % ( channel, path, i + 1, len(files), traceback.format_exc())) raise except error: pass if error: logging.error('cron/%s: error rendering %s: %s' % ( channel, path, error)) success = False finally: logging.info('cron/%s: rendering %s files from %s took %s seconds' % ( channel, len(files), d, time.time() - start_time)) return success success = True try: # Render all of the publicly accessible files. cron_runs = [ # Note: rendering the public templates will pull in all of the private # templates. (svn_constants.PUBLIC_TEMPLATE_PATH, ''), # Note: rendering the public templates will have pulled in the .js # and manifest.json files (for listing examples on the API reference # pages), but there are still images, CSS, etc. (svn_constants.STATIC_PATH, 'static/'), ] if not IsDevServer(): cron_runs.append( (svn_constants.EXAMPLES_PATH, 'extensions/examples/')) # Note: don't try to short circuit any of this stuff. We want to run # the cron for all the directories regardless of intermediate # failures. for path, path_prefix in cron_runs: success = run_cron_for_dir(path, path_prefix=path_prefix) and success # TODO(kalman): Generic way for classes to request cron access. The next # two special cases are ugly. It would potentially greatly speed up cron # runs, too. # Extension examples have zip files too. Well, so do apps, but the app # file system doesn't get the Offline treatment so they don't need cron. if not IsDevServer(): manifest_json = '/manifest.json' example_zips = [ '%s.zip' % filename[:-len(manifest_json)] for filename in server_instance.content_cache.GetFromFileListing( svn_constants.EXAMPLES_PATH) if filename.endswith(manifest_json)] logging.info('cron/%s: rendering %s example zips...' % ( channel, len(example_zips))) start_time = time.time() try: success = success and all( get_via_render_servlet('extensions/examples/%s' % z).status == 200 for z in example_zips) finally: logging.info('cron/%s: rendering %s example zips took %s seconds' % ( channel, len(example_zips), time.time() - start_time)) # Also trigger a redirect so that PathCanonicalizer has an opportunity to # cache file listings. logging.info('cron/%s: triggering a redirect...' % channel) redirect_response = get_via_render_servlet('storage.html') success = success and redirect_response.status == 302 except DeadlineExceededError: success = False logging.info('cron/%s: finished' % channel) return (Response.Ok('Success') if success else Response.InternalError('Failure'))
def _GetImpl(self): path = self._request.path.strip('/') parts = self._request.path.split('/', 1) source_name = parts[0] if len(parts) == 2: source_path = parts[1] else: source_path = None _log.info( 'starting refresh of %s DataSource %s' % (source_name, '' if source_path is None else '[%s]' % source_path)) if 'commit' in self._request.arguments: commit = self._request.arguments['commit'] else: _log.warning('No commit given; refreshing from master. ' 'This is probably NOT what you want.') commit = None server_instance = self._CreateServerInstance(commit) commit_tracker = CommitTracker(server_instance.object_store_creator) refresh_tracker = RefreshTracker(server_instance.object_store_creator) # If no commit was given, use the ID of the last cached master commit. # This allows sources external to the chromium repository to be updated # independently from individual refresh cycles. if commit is None: commit = commit_tracker.Get('master').Get() success = True try: if source_name == 'platform_bundle': data_source = server_instance.platform_bundle elif source_name == 'content_providers': data_source = server_instance.content_providers else: data_source = CreateDataSource(source_name, server_instance) class_name = data_source.__class__.__name__ refresh_future = data_source.Refresh(source_path) assert isinstance( refresh_future, Future), ('%s.Refresh() did not return a Future' % class_name) timer = Timer() try: refresh_future.Get() # Mark this (commit, task) pair as completed and then see if this # concludes the full cache refresh. The list of tasks required to # complete a cache refresh is registered (and keyed on commit ID) by the # CronServlet before kicking off all the refresh tasks. (refresh_tracker.MarkTaskComplete( commit, path).Then(lambda _: refresh_tracker.GetRefreshComplete( commit)).Then(lambda is_complete: commit_tracker.Set( 'master', commit) if is_complete else None).Get()) except Exception as e: _log.error('%s: error %s' % (class_name, traceback.format_exc())) success = False if IsFileSystemThrottledError(e): return Response.ThrottledError('Throttled') raise finally: _log.info('Refreshing %s took %s' % (class_name, timer.Stop().FormatElapsed())) except: success = False # This should never actually happen. _log.error('uncaught error: %s' % traceback.format_exc()) raise finally: _log.info('finished (%s)', 'success' if success else 'FAILED') return (Response.Ok('Success') if success else Response.InternalError('Failure'))
def _GetImpl(self): # Cron strategy: # # Find all public template files and static files, and render them. Most of # the time these won't have changed since the last cron run, so it's a # little wasteful, but hopefully rendering is really fast (if it isn't we # have a problem). logging.info('cron: starting') # This is returned every time RenderServlet wants to create a new # ServerInstance. server_instance = self._GetSafeServerInstance() def get_via_render_servlet(path): request = Request(path, self._request.host, self._request.headers) delegate = _SingletonRenderServletDelegate(server_instance) return RenderServlet(request, delegate).Get() def run_cron_for_dir(d, path_prefix=''): success = True start_time = time.time() files = dict( CreateURLsFromPaths(server_instance.host_file_system, d, path_prefix)) logging.info('cron: rendering %s files from %s...' % (len(files), d)) try: for i, path in enumerate(files): error = None try: response = get_via_render_servlet(path) if response.status != 200: error = 'Got %s response' % response.status except DeadlineExceededError: logging.error( 'cron: deadline exceeded rendering %s (%s of %s): %s' % (path, i + 1, len(files), traceback.format_exc())) raise except error: pass if error: logging.error('cron: error rendering %s: %s' % (path, error)) success = False finally: logging.info( 'cron: rendering %s files from %s took %s seconds' % (len(files), d, time.time() - start_time)) return success success = True try: # Render all of the publicly accessible files. cron_runs = [ # Note: rendering the public templates will pull in all of the private # templates. (svn_constants.PUBLIC_TEMPLATE_PATH, ''), # Note: rendering the public templates will have pulled in the .js # and manifest.json files (for listing examples on the API reference # pages), but there are still images, CSS, etc. (svn_constants.STATIC_PATH, 'static/'), ] if not IsDevServer(): cron_runs.append( (svn_constants.EXAMPLES_PATH, 'extensions/examples/')) # Note: don't try to short circuit any of this stuff. We want to run # the cron for all the directories regardless of intermediate # failures. for path, path_prefix in cron_runs: success = run_cron_for_dir(path, path_prefix=path_prefix) and success # TODO(kalman): Generic way for classes to request cron access. The next # two special cases are ugly. It would potentially greatly speed up cron # runs, too. # Extension examples have zip files too. Well, so do apps, but the app # file system doesn't get the Offline treatment so they don't need cron. if not IsDevServer(): manifest_json = 'manifest.json' example_zips = [] for root, _, files in server_instance.host_file_system.Walk( svn_constants.EXAMPLES_PATH): example_zips.extend(root + '.zip' for name in files if name == manifest_json) logging.info('cron: rendering %s example zips...' % len(example_zips)) start_time = time.time() try: success = success and all( get_via_render_servlet('extensions/examples/%s' % z).status == 200 for z in example_zips) finally: logging.info( 'cron: rendering %s example zips took %s seconds' % (len(example_zips), time.time() - start_time)) except DeadlineExceededError: success = False logging.info('cron: running Redirector cron...') server_instance.redirector.Cron() logging.info('cron: finished (%s)' % ('success' if success else 'failure', )) return (Response.Ok('Success') if success else Response.InternalError('Failure'))
def _GetSuccessResponse(self, request_path, server_instance): '''Returns the Response from trying to render |path| with |server_instance|. If |path| isn't found then a FileNotFoundError will be raised, such that the only responses that will be returned from this method are Ok and Redirect. ''' content_provider, serve_from, path = ( server_instance.content_providers.GetByServeFrom(request_path)) assert content_provider, 'No ContentProvider found for %s' % path redirect = Redirector(server_instance.compiled_fs_factory, content_provider.file_system).Redirect( self._request.host, path) if redirect is not None: # Absolute redirects stay absolute, relative redirects are relative to # |serve_from|; all redirects eventually need to be *served* as absolute. if not redirect.startswith('/'): redirect = '/' + posixpath.join(serve_from, redirect) return Response.Redirect(redirect, permanent=False) canonical_path = content_provider.GetCanonicalPath(path) if canonical_path != path: redirect_path = posixpath.join(serve_from, canonical_path) return Response.Redirect('/' + redirect_path, permanent=False) if request_path.endswith('/'): # Directory request hasn't been redirected by now. Default behaviour is # to redirect as though it were a file. return Response.Redirect('/' + request_path.rstrip('/'), permanent=False) if not path: # Empty-path request hasn't been redirected by now. It doesn't exist. raise FileNotFoundError('Empty path') content_and_type = content_provider.GetContentAndType(path).Get() if not content_and_type.content: logging.error('%s had empty content' % path) content = content_and_type.content if isinstance(content, Handlebar): template_content, template_warnings = ( server_instance.template_renderer.Render( content, self._request)) # HACK: the site verification file (google2ed...) doesn't have a title. content, doc_warnings = server_instance.document_renderer.Render( template_content, path, render_title=path != SITE_VERIFICATION_FILE) warnings = template_warnings + doc_warnings if warnings: sep = '\n - ' logging.warning('Rendering %s:%s%s' % (path, sep, sep.join(warnings))) # Content was dynamic. The new etag is a hash of the content. etag = None elif content_and_type.version is not None: # Content was static. The new etag is the version of the content. Hash it # to make sure it's valid. etag = '"%s"' % hashlib.md5(str( content_and_type.version)).hexdigest() else: # Sometimes non-dynamic content does not have a version, for example # .zip files. The new etag is a hash of the content. etag = None content_type = content_and_type.content_type if isinstance(content, unicode): content = content.encode('utf-8') content_type += '; charset=utf-8' if etag is None: # Note: we're using md5 as a convenient and fast-enough way to identify # content. It's not intended to be cryptographic in any way, and this # is *not* what etags is for. That's what SSL is for, this is unrelated. etag = '"%s"' % hashlib.md5(content).hexdigest() headers = _MakeHeaders(content_type, etag=etag) if etag == self._request.headers.get('If-None-Match'): return Response.NotModified('Not Modified', headers=headers) return Response.Ok(content, headers=headers)
def _GetImpl(self): # Cron strategy: # # Find all public template files and static files, and render them. Most of # the time these won't have changed since the last cron run, so it's a # little wasteful, but hopefully rendering is really fast (if it isn't we # have a problem). _cronlog.info('starting') # This is returned every time RenderServlet wants to create a new # ServerInstance. # # TODO(kalman): IMPORTANT. This sometimes throws an exception, breaking # everything. Need retry logic at the fetcher level. server_instance = self._GetSafeServerInstance() trunk_fs = server_instance.host_file_system_provider.GetTrunk() def render(path): request = Request(path, self._request.host, self._request.headers) delegate = _SingletonRenderServletDelegate(server_instance) return RenderServlet(request, delegate).Get() def request_files_in_dir(path, prefix='', strip_ext=None): '''Requests every file found under |path| in this host file system, with a request prefix of |prefix|. |strip_ext| is an optional list of file extensions that should be stripped from paths before requesting. ''' def maybe_strip_ext(name): if name == SITE_VERIFICATION_FILE or not strip_ext: return name base, ext = posixpath.splitext(name) return base if ext in strip_ext else name files = [ maybe_strip_ext(name) for name, _ in CreateURLsFromPaths(trunk_fs, path, prefix) ] return _RequestEachItem(path, files, render) results = [] try: # Start running the hand-written Cron methods first; they can be run in # parallel. They are resolved at the end. def run_cron_for_future(target): title = target.__class__.__name__ future, init_timer = TimerClosure(target.Cron) assert isinstance( future, Future), ('%s.Cron() did not return a Future' % title) def resolve(): resolve_timer = Timer() try: future.Get() except Exception as e: _cronlog.error('%s: error %s' % (title, traceback.format_exc())) results.append(False) if IsDeadlineExceededError(e): raise finally: resolve_timer.Stop() _cronlog.info( '%s took %s: %s to initialize and %s to resolve' % (title, init_timer.With(resolve_timer).FormatElapsed(), init_timer.FormatElapsed(), resolve_timer.FormatElapsed())) return Future(delegate=Gettable(resolve)) targets = (CreateDataSources(server_instance).values() + [server_instance.content_providers]) title = 'initializing %s parallel Cron targets' % len(targets) _cronlog.info(title) timer = Timer() try: cron_futures = [ run_cron_for_future(target) for target in targets ] finally: _cronlog.info('%s took %s' % (title, timer.Stop().FormatElapsed())) # Rendering the public templates will also pull in all of the private # templates. results.append( request_files_in_dir(PUBLIC_TEMPLATES, strip_ext=('.html', '.md'))) # Rendering the public templates will have pulled in the .js and # manifest.json files (for listing examples on the API reference pages), # but there are still images, CSS, etc. results.append(request_files_in_dir(STATIC_DOCS, prefix='static')) # Samples are too expensive to run on the dev server, where there is no # parallel fetch. if not IsDevServer(): # Fetch each individual sample file. results.append( request_files_in_dir(EXAMPLES, prefix='extensions/examples')) # Fetch the zip file of each example (contains all the individual # files). example_zips = [] for root, _, files in trunk_fs.Walk(EXAMPLES): example_zips.extend(root + '.zip' for name in files if name == 'manifest.json') results.append( _RequestEachItem( 'example zips', example_zips, lambda path: render('extensions/examples/' + path))) # Resolve the hand-written Cron method futures. title = 'resolving %s parallel Cron targets' % len(targets) _cronlog.info(title) timer = Timer() try: for future in cron_futures: future.Get() finally: _cronlog.info('%s took %s' % (title, timer.Stop().FormatElapsed())) except: results.append(False) # This should never actually happen (each cron step does its own # conservative error checking), so re-raise no matter what it is. _cronlog.error('uncaught error: %s' % traceback.format_exc()) raise finally: success = all(results) _cronlog.info('finished (%s)', 'success' if success else 'FAILED') return (Response.Ok('Success') if success else Response.InternalError('Failure'))