def Get(self): link_error_tests = ('broken_links', 'orphaned_pages', 'link_errors') if not self._request.path in link_error_tests: return Response.NotFound( 'Test %s not found. Available tests are: %s' % (self._request.path, ','.join(link_error_tests))) constructor = InstanceServlet.GetConstructor(self._delegate) def renderer(path): return constructor(Request(path, '', self._request.headers)).Get() link_tester = BrokenLinkTester( InstanceServletRenderServletDelegate( self._delegate).CreateServerInstance(), renderer) if self._request.path == 'broken_links': errors, content = link_tester.TestBrokenLinks() elif self._request.path == 'orphaned_pages': errors, content = link_tester.TestOrphanedPages() else: link_errors, link_content = link_tester.TestBrokenLinks() orphaned_errors, orphaned_content = link_tester.TestOrphanedPages() errors = link_errors + orphaned_errors content = "%s\n%s" % (link_content, orphaned_content) if errors: return Response.InternalError(content=content) return Response.Ok(content="%s test passed." % self._request.path)
def _GetImpl(self): # Cron strategy: # # Collect all DataSources, the PlatformBundle, the ContentProviders, and # any other statically renderered contents (e.g. examples content), # and spin up taskqueue tasks which will refresh any cached data relevant # to these assets. # # TODO(rockot/kalman): At the moment examples are not actually refreshed # because they're too slow. _log.info('starting') server_instance = self._GetSafeServerInstance() master_fs = server_instance.host_file_system_provider.GetMaster() master_commit = master_fs.GetCommitID().Get() # This is the guy that would be responsible for refreshing the cache of # examples. Here for posterity, hopefully it will be added to the targets # below someday. render_refresher = RenderRefresher(server_instance, self._request) # Get the default taskqueue queue = taskqueue.Queue() # GAE documentation specifies that it's bad to add tasks to a queue # within one second of purging. We wait 2 seconds, because we like # to go the extra mile. queue.purge() time.sleep(2) success = True try: data_sources = CreateDataSources(server_instance) targets = ( data_sources.items() + [('content_providers', server_instance.content_providers), ('platform_bundle', server_instance.platform_bundle)]) title = 'initializing %s parallel targets' % len(targets) _log.info(title) timer = Timer() for name, target in targets: refresh_paths = target.GetRefreshPaths() for path in refresh_paths: queue.add( taskqueue.Task(url='/_refresh/%s/%s' % (name, path), params={'commit': master_commit})) _log.info('%s took %s' % (title, timer.Stop().FormatElapsed())) except: # This should never actually happen (each cron step does its own # conservative error checking), so re-raise no matter what it is. _log.error('uncaught error: %s' % traceback.format_exc()) success = False raise finally: _log.info('finished (%s)', 'success' if success else 'FAILED') return (Response.Ok('Success') if success else Response.InternalError('Failure'))
def _GetImpl(self): path = self._request.path.strip('/') parts = self._request.path.split('/', 1) source_name = parts[0] if len(parts) == 2: source_path = parts[1] else: source_path = None _log.info('starting refresh of %s DataSource %s' % (source_name, '' if source_path is None else '[%s]' % source_path)) if 'commit' in self._request.arguments: commit = self._request.arguments['commit'] else: _log.warning('No commit given; refreshing from master. ' 'This is probably NOT what you want.') commit = None server_instance = self._CreateServerInstance(commit) success = True try: if source_name == 'platform_bundle': data_source = server_instance.platform_bundle elif source_name == 'content_providers': data_source = server_instance.content_providers else: data_source = CreateDataSource(source_name, server_instance) class_name = data_source.__class__.__name__ refresh_future = data_source.Refresh(source_path) assert isinstance(refresh_future, Future), ( '%s.Refresh() did not return a Future' % class_name) timer = Timer() try: refresh_future.Get() except Exception as e: _log.error('%s: error %s' % (class_name, traceback.format_exc())) success = False if IsFileSystemThrottledError(e): return Response.ThrottledError('Throttled') raise finally: _log.info('Refreshing %s took %s' % (class_name, timer.Stop().FormatElapsed())) except: success = False # This should never actually happen. _log.error('uncaught error: %s' % traceback.format_exc()) raise finally: _log.info('finished (%s)', 'success' if success else 'FAILED') return (Response.Ok('Success') if success else Response.InternalError('Failure'))
def _GetImpl(self): # Cron strategy: # # Find all public template files and static files, and render them. Most of # the time these won't have changed since the last cron run, so it's a # little wasteful, but hopefully rendering is really fast (if it isn't we # have a problem). logging.info('cron: starting') # This is returned every time RenderServlet wants to create a new # ServerInstance. server_instance = self._GetSafeServerInstance() def get_via_render_servlet(path): request = Request(path, self._request.host, self._request.headers) delegate = _SingletonRenderServletDelegate(server_instance) return RenderServlet(request, delegate).Get() def run_cron_for_dir(d, path_prefix=''): success = True start_time = time.time() files = dict( CreateURLsFromPaths(server_instance.host_file_system, d, path_prefix)) logging.info('cron: rendering %s files from %s...' % (len(files), d)) try: for i, path in enumerate(files): error = None try: response = get_via_render_servlet(path) if response.status != 200: error = 'Got %s response' % response.status except DeadlineExceededError: logging.error( 'cron: deadline exceeded rendering %s (%s of %s): %s' % (path, i + 1, len(files), traceback.format_exc())) raise except error: pass if error: logging.error('cron: error rendering %s: %s' % (path, error)) success = False finally: logging.info( 'cron: rendering %s files from %s took %s seconds' % (len(files), d, time.time() - start_time)) return success success = True try: # Render all of the publicly accessible files. cron_runs = [ # Note: rendering the public templates will pull in all of the private # templates. (svn_constants.PUBLIC_TEMPLATE_PATH, ''), # Note: rendering the public templates will have pulled in the .js # and manifest.json files (for listing examples on the API reference # pages), but there are still images, CSS, etc. (svn_constants.STATIC_PATH, 'static/'), ] if not IsDevServer(): cron_runs.append( (svn_constants.EXAMPLES_PATH, 'extensions/examples/')) # Note: don't try to short circuit any of this stuff. We want to run # the cron for all the directories regardless of intermediate # failures. for path, path_prefix in cron_runs: success = run_cron_for_dir(path, path_prefix=path_prefix) and success # TODO(kalman): Generic way for classes to request cron access. The next # two special cases are ugly. It would potentially greatly speed up cron # runs, too. # Extension examples have zip files too. Well, so do apps, but the app # file system doesn't get the Offline treatment so they don't need cron. if not IsDevServer(): manifest_json = 'manifest.json' example_zips = [] for root, _, files in server_instance.host_file_system.Walk( svn_constants.EXAMPLES_PATH): example_zips.extend(root + '.zip' for name in files if name == manifest_json) logging.info('cron: rendering %s example zips...' % len(example_zips)) start_time = time.time() try: success = success and all( get_via_render_servlet('extensions/examples/%s' % z).status == 200 for z in example_zips) finally: logging.info( 'cron: rendering %s example zips took %s seconds' % (len(example_zips), time.time() - start_time)) except DeadlineExceededError: success = False logging.info('cron: running Redirector cron...') server_instance.redirector.Cron() logging.info('cron: finished (%s)' % ('success' if success else 'failure', )) return (Response.Ok('Success') if success else Response.InternalError('Failure'))
def _GetImpl(self): # Cron strategy: # # Find all public template files and static files, and render them. Most of # the time these won't have changed since the last cron run, so it's a # little wasteful, but hopefully rendering is really fast (if it isn't we # have a problem). _cronlog.info('starting') # This is returned every time RenderServlet wants to create a new # ServerInstance. # # TODO(kalman): IMPORTANT. This sometimes throws an exception, breaking # everything. Need retry logic at the fetcher level. server_instance = self._GetSafeServerInstance() trunk_fs = server_instance.host_file_system_provider.GetTrunk() def render(path): request = Request(path, self._request.host, self._request.headers) delegate = _SingletonRenderServletDelegate(server_instance) return RenderServlet(request, delegate).Get() def request_files_in_dir(path, prefix='', strip_ext=None): '''Requests every file found under |path| in this host file system, with a request prefix of |prefix|. |strip_ext| is an optional list of file extensions that should be stripped from paths before requesting. ''' def maybe_strip_ext(name): if name == SITE_VERIFICATION_FILE or not strip_ext: return name base, ext = posixpath.splitext(name) return base if ext in strip_ext else name files = [ maybe_strip_ext(name) for name, _ in CreateURLsFromPaths(trunk_fs, path, prefix) ] return _RequestEachItem(path, files, render) results = [] try: # Start running the hand-written Cron methods first; they can be run in # parallel. They are resolved at the end. def run_cron_for_future(target): title = target.__class__.__name__ future, init_timer = TimerClosure(target.Cron) assert isinstance( future, Future), ('%s.Cron() did not return a Future' % title) def resolve(): resolve_timer = Timer() try: future.Get() except Exception as e: _cronlog.error('%s: error %s' % (title, traceback.format_exc())) results.append(False) if IsDeadlineExceededError(e): raise finally: resolve_timer.Stop() _cronlog.info( '%s took %s: %s to initialize and %s to resolve' % (title, init_timer.With(resolve_timer).FormatElapsed(), init_timer.FormatElapsed(), resolve_timer.FormatElapsed())) return Future(delegate=Gettable(resolve)) targets = (CreateDataSources(server_instance).values() + [server_instance.content_providers]) title = 'initializing %s parallel Cron targets' % len(targets) _cronlog.info(title) timer = Timer() try: cron_futures = [ run_cron_for_future(target) for target in targets ] finally: _cronlog.info('%s took %s' % (title, timer.Stop().FormatElapsed())) # Rendering the public templates will also pull in all of the private # templates. results.append( request_files_in_dir(PUBLIC_TEMPLATES, strip_ext=('.html', '.md'))) # Rendering the public templates will have pulled in the .js and # manifest.json files (for listing examples on the API reference pages), # but there are still images, CSS, etc. results.append(request_files_in_dir(STATIC_DOCS, prefix='static')) # Samples are too expensive to run on the dev server, where there is no # parallel fetch. if not IsDevServer(): # Fetch each individual sample file. results.append( request_files_in_dir(EXAMPLES, prefix='extensions/examples')) # Fetch the zip file of each example (contains all the individual # files). example_zips = [] for root, _, files in trunk_fs.Walk(EXAMPLES): example_zips.extend(root + '.zip' for name in files if name == 'manifest.json') results.append( _RequestEachItem( 'example zips', example_zips, lambda path: render('extensions/examples/' + path))) # Resolve the hand-written Cron method futures. title = 'resolving %s parallel Cron targets' % len(targets) _cronlog.info(title) timer = Timer() try: for future in cron_futures: future.Get() finally: _cronlog.info('%s took %s' % (title, timer.Stop().FormatElapsed())) except: results.append(False) # This should never actually happen (each cron step does its own # conservative error checking), so re-raise no matter what it is. _cronlog.error('uncaught error: %s' % traceback.format_exc()) raise finally: success = all(results) _cronlog.info('finished (%s)', 'success' if success else 'FAILED') return (Response.Ok('Success') if success else Response.InternalError('Failure'))
def _GetImpl(self): path = self._request.path.strip('/') parts = self._request.path.split('/', 1) source_name = parts[0] if len(parts) == 2: source_path = parts[1] else: source_path = None _log.info( 'starting refresh of %s DataSource %s' % (source_name, '' if source_path is None else '[%s]' % source_path)) if 'commit' in self._request.arguments: commit = self._request.arguments['commit'] else: _log.warning('No commit given; refreshing from master. ' 'This is probably NOT what you want.') commit = None server_instance = self._CreateServerInstance(commit) commit_tracker = CommitTracker(server_instance.object_store_creator) refresh_tracker = RefreshTracker(server_instance.object_store_creator) # If no commit was given, use the ID of the last cached master commit. # This allows sources external to the chromium repository to be updated # independently from individual refresh cycles. if commit is None: commit = commit_tracker.Get('master').Get() success = True try: if source_name == 'platform_bundle': data_source = server_instance.platform_bundle elif source_name == 'content_providers': data_source = server_instance.content_providers else: data_source = CreateDataSource(source_name, server_instance) class_name = data_source.__class__.__name__ refresh_future = data_source.Refresh(source_path) assert isinstance( refresh_future, Future), ('%s.Refresh() did not return a Future' % class_name) timer = Timer() try: refresh_future.Get() # Mark this (commit, task) pair as completed and then see if this # concludes the full cache refresh. The list of tasks required to # complete a cache refresh is registered (and keyed on commit ID) by the # CronServlet before kicking off all the refresh tasks. (refresh_tracker.MarkTaskComplete( commit, path).Then(lambda _: refresh_tracker.GetRefreshComplete( commit)).Then(lambda is_complete: commit_tracker.Set( 'master', commit) if is_complete else None).Get()) except Exception as e: _log.error('%s: error %s' % (class_name, traceback.format_exc())) success = False if IsFileSystemThrottledError(e): return Response.ThrottledError('Throttled') raise finally: _log.info('Refreshing %s took %s' % (class_name, timer.Stop().FormatElapsed())) except: success = False # This should never actually happen. _log.error('uncaught error: %s' % traceback.format_exc()) raise finally: _log.info('finished (%s)', 'success' if success else 'FAILED') return (Response.Ok('Success') if success else Response.InternalError('Failure'))
def _GetImpl(self): # Cron strategy: # # Find all public template files and static files, and render them. Most of # the time these won't have changed since the last cron run, so it's a # little wasteful, but hopefully rendering is really fast (if it isn't we # have a problem). channel = self._channel logging.info('cron/%s: starting' % channel) # This is returned every time RenderServlet wants to create a new # ServerInstance. server_instance = self._GetSafeServerInstance() def get_via_render_servlet(path): return RenderServlet( Request(path, self._request.host, self._request.headers), _SingletonRenderServletDelegate(server_instance)).Get() def run_cron_for_dir(d, path_prefix=''): success = True start_time = time.time() files = [f for f in server_instance.content_cache.GetFromFileListing(d) if not f.endswith('/')] logging.info('cron/%s: rendering %s files from %s...' % ( channel, len(files), d)) try: for i, f in enumerate(files): error = None path = '%s%s' % (path_prefix, f) try: response = get_via_render_servlet(path) if response.status != 200: error = 'Got %s response' % response.status except DeadlineExceededError: logging.error( 'cron/%s: deadline exceeded rendering %s (%s of %s): %s' % ( channel, path, i + 1, len(files), traceback.format_exc())) raise except error: pass if error: logging.error('cron/%s: error rendering %s: %s' % ( channel, path, error)) success = False finally: logging.info('cron/%s: rendering %s files from %s took %s seconds' % ( channel, len(files), d, time.time() - start_time)) return success success = True try: # Render all of the publicly accessible files. cron_runs = [ # Note: rendering the public templates will pull in all of the private # templates. (svn_constants.PUBLIC_TEMPLATE_PATH, ''), # Note: rendering the public templates will have pulled in the .js # and manifest.json files (for listing examples on the API reference # pages), but there are still images, CSS, etc. (svn_constants.STATIC_PATH, 'static/'), ] if not IsDevServer(): cron_runs.append( (svn_constants.EXAMPLES_PATH, 'extensions/examples/')) # Note: don't try to short circuit any of this stuff. We want to run # the cron for all the directories regardless of intermediate # failures. for path, path_prefix in cron_runs: success = run_cron_for_dir(path, path_prefix=path_prefix) and success # TODO(kalman): Generic way for classes to request cron access. The next # two special cases are ugly. It would potentially greatly speed up cron # runs, too. # Extension examples have zip files too. Well, so do apps, but the app # file system doesn't get the Offline treatment so they don't need cron. if not IsDevServer(): manifest_json = '/manifest.json' example_zips = [ '%s.zip' % filename[:-len(manifest_json)] for filename in server_instance.content_cache.GetFromFileListing( svn_constants.EXAMPLES_PATH) if filename.endswith(manifest_json)] logging.info('cron/%s: rendering %s example zips...' % ( channel, len(example_zips))) start_time = time.time() try: success = success and all( get_via_render_servlet('extensions/examples/%s' % z).status == 200 for z in example_zips) finally: logging.info('cron/%s: rendering %s example zips took %s seconds' % ( channel, len(example_zips), time.time() - start_time)) # Also trigger a redirect so that PathCanonicalizer has an opportunity to # cache file listings. logging.info('cron/%s: triggering a redirect...' % channel) redirect_response = get_via_render_servlet('storage.html') success = success and redirect_response.status == 302 except DeadlineExceededError: success = False logging.info('cron/%s: finished' % channel) return (Response.Ok('Success') if success else Response.InternalError('Failure'))
def _GetImpl(self): # Cron strategy: # # Find all public template files and static files, and render them. Most of # the time these won't have changed since the last cron run, so it's a # little wasteful, but hopefully rendering is really fast (if it isn't we # have a problem). channel = self._request.path.strip('/') logging.info('cron/%s: starting' % channel) server_instance = ServerInstance.CreateOnline(channel) def run_cron_for_dir(d, path_prefix=''): success = True start_time = time.time() files = [ f for f in server_instance.content_cache.GetFromFileListing(d) if not f.endswith('/') ] logging.info('cron/%s: rendering %s files from %s...' % (channel, len(files), d)) try: for i, f in enumerate(files): error = None path = '%s%s' % (path_prefix, f) try: response = RenderServlet( Request(path, self._request.headers)).Get( server_instance=server_instance) if response.status != 200: error = 'Got %s response' % response.status except DeadlineExceededError: logging.error( 'cron/%s: deadline exceeded rendering %s (%s of %s): %s' % (channel, path, i + 1, len(files), traceback.format_exc())) raise except error: pass if error: logging.error('cron/%s: error rendering %s: %s' % (channel, path, error)) success = False finally: logging.info( 'cron/%s: rendering %s files from %s took %s seconds' % (channel, len(files), d, time.time() - start_time)) return success success = True for path, path_prefix in ( # Note: rendering the public templates will pull in all of the private # templates. (svn_constants.PUBLIC_TEMPLATE_PATH, ''), # Note: rendering the public templates will have pulled in the .js and # manifest.json files (for listing examples on the API reference pages), # but there are still images, CSS, etc. (svn_constants.STATIC_PATH, 'static/'), (svn_constants.EXAMPLES_PATH, 'extensions/examples/')): try: # Note: don't try to short circuit any of this stuff. We want to run # the cron for all the directories regardless of intermediate failures. success = run_cron_for_dir(path, path_prefix=path_prefix) and success except DeadlineExceededError: success = False break logging.info('cron/%s: finished' % channel) return (Response.Ok('Success') if success else Response.InternalError('Failure'))