def Get(self):
        link_error_tests = ('broken_links', 'orphaned_pages', 'link_errors')

        if not self._request.path in link_error_tests:
            return Response.NotFound(
                'Test %s not found. Available tests are: %s' %
                (self._request.path, ','.join(link_error_tests)))

        constructor = InstanceServlet.GetConstructor(self._delegate)

        def renderer(path):
            return constructor(Request(path, '', self._request.headers)).Get()

        link_tester = BrokenLinkTester(
            InstanceServletRenderServletDelegate(
                self._delegate).CreateServerInstance(), renderer)
        if self._request.path == 'broken_links':
            errors, content = link_tester.TestBrokenLinks()
        elif self._request.path == 'orphaned_pages':
            errors, content = link_tester.TestOrphanedPages()
        else:
            link_errors, link_content = link_tester.TestBrokenLinks()
            orphaned_errors, orphaned_content = link_tester.TestOrphanedPages()
            errors = link_errors + orphaned_errors
            content = "%s\n%s" % (link_content, orphaned_content)

        if errors:
            return Response.InternalError(content=content)

        return Response.Ok(content="%s test passed." % self._request.path)
Esempio n. 2
0
    def _GetImpl(self):
        # Cron strategy:
        #
        # Collect all DataSources, the PlatformBundle, the ContentProviders, and
        # any other statically renderered contents (e.g. examples content),
        # and spin up taskqueue tasks which will refresh any cached data relevant
        # to these assets.
        #
        # TODO(rockot/kalman): At the moment examples are not actually refreshed
        # because they're too slow.

        _log.info('starting')

        server_instance = self._GetSafeServerInstance()
        master_fs = server_instance.host_file_system_provider.GetMaster()
        master_commit = master_fs.GetCommitID().Get()

        # This is the guy that would be responsible for refreshing the cache of
        # examples. Here for posterity, hopefully it will be added to the targets
        # below someday.
        render_refresher = RenderRefresher(server_instance, self._request)

        # Get the default taskqueue
        queue = taskqueue.Queue()

        # GAE documentation specifies that it's bad to add tasks to a queue
        # within one second of purging. We wait 2 seconds, because we like
        # to go the extra mile.
        queue.purge()
        time.sleep(2)

        success = True
        try:
            data_sources = CreateDataSources(server_instance)
            targets = (
                data_sources.items() +
                [('content_providers', server_instance.content_providers),
                 ('platform_bundle', server_instance.platform_bundle)])
            title = 'initializing %s parallel targets' % len(targets)
            _log.info(title)
            timer = Timer()
            for name, target in targets:
                refresh_paths = target.GetRefreshPaths()
                for path in refresh_paths:
                    queue.add(
                        taskqueue.Task(url='/_refresh/%s/%s' % (name, path),
                                       params={'commit': master_commit}))
            _log.info('%s took %s' % (title, timer.Stop().FormatElapsed()))
        except:
            # This should never actually happen (each cron step does its own
            # conservative error checking), so re-raise no matter what it is.
            _log.error('uncaught error: %s' % traceback.format_exc())
            success = False
            raise
        finally:
            _log.info('finished (%s)', 'success' if success else 'FAILED')
            return (Response.Ok('Success')
                    if success else Response.InternalError('Failure'))
  def _GetImpl(self):
    path = self._request.path.strip('/')
    parts = self._request.path.split('/', 1)
    source_name = parts[0]
    if len(parts) == 2:
      source_path = parts[1]
    else:
      source_path = None

    _log.info('starting refresh of %s DataSource %s' %
        (source_name, '' if source_path is None else '[%s]' % source_path))

    if 'commit' in self._request.arguments:
      commit = self._request.arguments['commit']
    else:
      _log.warning('No commit given; refreshing from master. '
                   'This is probably NOT what you want.')
      commit = None

    server_instance = self._CreateServerInstance(commit)
    success = True
    try:
      if source_name == 'platform_bundle':
        data_source = server_instance.platform_bundle
      elif source_name == 'content_providers':
        data_source = server_instance.content_providers
      else:
        data_source = CreateDataSource(source_name, server_instance)

      class_name = data_source.__class__.__name__
      refresh_future = data_source.Refresh(source_path)
      assert isinstance(refresh_future, Future), (
          '%s.Refresh() did not return a Future' % class_name)
      timer = Timer()
      try:
        refresh_future.Get()
      except Exception as e:
        _log.error('%s: error %s' % (class_name, traceback.format_exc()))
        success = False
        if IsFileSystemThrottledError(e):
          return Response.ThrottledError('Throttled')
        raise
      finally:
        _log.info('Refreshing %s took %s' %
            (class_name, timer.Stop().FormatElapsed()))

    except:
      success = False
      # This should never actually happen.
      _log.error('uncaught error: %s' % traceback.format_exc())
      raise
    finally:
      _log.info('finished (%s)', 'success' if success else 'FAILED')
      return (Response.Ok('Success') if success else
              Response.InternalError('Failure'))
    def _GetImpl(self):
        # Cron strategy:
        #
        # Find all public template files and static files, and render them. Most of
        # the time these won't have changed since the last cron run, so it's a
        # little wasteful, but hopefully rendering is really fast (if it isn't we
        # have a problem).
        logging.info('cron: starting')

        # This is returned every time RenderServlet wants to create a new
        # ServerInstance.
        server_instance = self._GetSafeServerInstance()

        def get_via_render_servlet(path):
            request = Request(path, self._request.host, self._request.headers)
            delegate = _SingletonRenderServletDelegate(server_instance)
            return RenderServlet(request, delegate).Get()

        def run_cron_for_dir(d, path_prefix=''):
            success = True
            start_time = time.time()
            files = dict(
                CreateURLsFromPaths(server_instance.host_file_system, d,
                                    path_prefix))
            logging.info('cron: rendering %s files from %s...' %
                         (len(files), d))
            try:
                for i, path in enumerate(files):
                    error = None
                    try:
                        response = get_via_render_servlet(path)
                        if response.status != 200:
                            error = 'Got %s response' % response.status
                    except DeadlineExceededError:
                        logging.error(
                            'cron: deadline exceeded rendering %s (%s of %s): %s'
                            %
                            (path, i + 1, len(files), traceback.format_exc()))
                        raise
                    except error:
                        pass
                    if error:
                        logging.error('cron: error rendering %s: %s' %
                                      (path, error))
                        success = False
            finally:
                logging.info(
                    'cron: rendering %s files from %s took %s seconds' %
                    (len(files), d, time.time() - start_time))
            return success

        success = True
        try:
            # Render all of the publicly accessible files.
            cron_runs = [
                # Note: rendering the public templates will pull in all of the private
                # templates.
                (svn_constants.PUBLIC_TEMPLATE_PATH, ''),
                # Note: rendering the public templates will have pulled in the .js
                # and manifest.json files (for listing examples on the API reference
                # pages), but there are still images, CSS, etc.
                (svn_constants.STATIC_PATH, 'static/'),
            ]
            if not IsDevServer():
                cron_runs.append(
                    (svn_constants.EXAMPLES_PATH, 'extensions/examples/'))

            # Note: don't try to short circuit any of this stuff. We want to run
            # the cron for all the directories regardless of intermediate
            # failures.
            for path, path_prefix in cron_runs:
                success = run_cron_for_dir(path,
                                           path_prefix=path_prefix) and success

            # TODO(kalman): Generic way for classes to request cron access. The next
            # two special cases are ugly. It would potentially greatly speed up cron
            # runs, too.

            # Extension examples have zip files too. Well, so do apps, but the app
            # file system doesn't get the Offline treatment so they don't need cron.
            if not IsDevServer():
                manifest_json = 'manifest.json'
                example_zips = []
                for root, _, files in server_instance.host_file_system.Walk(
                        svn_constants.EXAMPLES_PATH):
                    example_zips.extend(root + '.zip' for name in files
                                        if name == manifest_json)
                logging.info('cron: rendering %s example zips...' %
                             len(example_zips))
                start_time = time.time()
                try:
                    success = success and all(
                        get_via_render_servlet('extensions/examples/%s' %
                                               z).status == 200
                        for z in example_zips)
                finally:
                    logging.info(
                        'cron: rendering %s example zips took %s seconds' %
                        (len(example_zips), time.time() - start_time))

        except DeadlineExceededError:
            success = False

        logging.info('cron: running Redirector cron...')
        server_instance.redirector.Cron()

        logging.info('cron: finished (%s)' %
                     ('success' if success else 'failure', ))

        return (Response.Ok('Success')
                if success else Response.InternalError('Failure'))
Esempio n. 5
0
    def _GetImpl(self):
        # Cron strategy:
        #
        # Find all public template files and static files, and render them. Most of
        # the time these won't have changed since the last cron run, so it's a
        # little wasteful, but hopefully rendering is really fast (if it isn't we
        # have a problem).
        _cronlog.info('starting')

        # This is returned every time RenderServlet wants to create a new
        # ServerInstance.
        #
        # TODO(kalman): IMPORTANT. This sometimes throws an exception, breaking
        # everything. Need retry logic at the fetcher level.
        server_instance = self._GetSafeServerInstance()
        trunk_fs = server_instance.host_file_system_provider.GetTrunk()

        def render(path):
            request = Request(path, self._request.host, self._request.headers)
            delegate = _SingletonRenderServletDelegate(server_instance)
            return RenderServlet(request, delegate).Get()

        def request_files_in_dir(path, prefix='', strip_ext=None):
            '''Requests every file found under |path| in this host file system, with
      a request prefix of |prefix|. |strip_ext| is an optional list of file
      extensions that should be stripped from paths before requesting.
      '''
            def maybe_strip_ext(name):
                if name == SITE_VERIFICATION_FILE or not strip_ext:
                    return name
                base, ext = posixpath.splitext(name)
                return base if ext in strip_ext else name

            files = [
                maybe_strip_ext(name)
                for name, _ in CreateURLsFromPaths(trunk_fs, path, prefix)
            ]
            return _RequestEachItem(path, files, render)

        results = []

        try:
            # Start running the hand-written Cron methods first; they can be run in
            # parallel. They are resolved at the end.
            def run_cron_for_future(target):
                title = target.__class__.__name__
                future, init_timer = TimerClosure(target.Cron)
                assert isinstance(
                    future,
                    Future), ('%s.Cron() did not return a Future' % title)

                def resolve():
                    resolve_timer = Timer()
                    try:
                        future.Get()
                    except Exception as e:
                        _cronlog.error('%s: error %s' %
                                       (title, traceback.format_exc()))
                        results.append(False)
                        if IsDeadlineExceededError(e): raise
                    finally:
                        resolve_timer.Stop()
                        _cronlog.info(
                            '%s took %s: %s to initialize and %s to resolve' %
                            (title,
                             init_timer.With(resolve_timer).FormatElapsed(),
                             init_timer.FormatElapsed(),
                             resolve_timer.FormatElapsed()))

                return Future(delegate=Gettable(resolve))

            targets = (CreateDataSources(server_instance).values() +
                       [server_instance.content_providers])
            title = 'initializing %s parallel Cron targets' % len(targets)
            _cronlog.info(title)
            timer = Timer()
            try:
                cron_futures = [
                    run_cron_for_future(target) for target in targets
                ]
            finally:
                _cronlog.info('%s took %s' %
                              (title, timer.Stop().FormatElapsed()))

            # Rendering the public templates will also pull in all of the private
            # templates.
            results.append(
                request_files_in_dir(PUBLIC_TEMPLATES,
                                     strip_ext=('.html', '.md')))

            # Rendering the public templates will have pulled in the .js and
            # manifest.json files (for listing examples on the API reference pages),
            # but there are still images, CSS, etc.
            results.append(request_files_in_dir(STATIC_DOCS, prefix='static'))

            # Samples are too expensive to run on the dev server, where there is no
            # parallel fetch.
            if not IsDevServer():
                # Fetch each individual sample file.
                results.append(
                    request_files_in_dir(EXAMPLES,
                                         prefix='extensions/examples'))

                # Fetch the zip file of each example (contains all the individual
                # files).
                example_zips = []
                for root, _, files in trunk_fs.Walk(EXAMPLES):
                    example_zips.extend(root + '.zip' for name in files
                                        if name == 'manifest.json')
                results.append(
                    _RequestEachItem(
                        'example zips', example_zips,
                        lambda path: render('extensions/examples/' + path)))

            # Resolve the hand-written Cron method futures.
            title = 'resolving %s parallel Cron targets' % len(targets)
            _cronlog.info(title)
            timer = Timer()
            try:
                for future in cron_futures:
                    future.Get()
            finally:
                _cronlog.info('%s took %s' %
                              (title, timer.Stop().FormatElapsed()))

        except:
            results.append(False)
            # This should never actually happen (each cron step does its own
            # conservative error checking), so re-raise no matter what it is.
            _cronlog.error('uncaught error: %s' % traceback.format_exc())
            raise
        finally:
            success = all(results)
            _cronlog.info('finished (%s)', 'success' if success else 'FAILED')
            return (Response.Ok('Success')
                    if success else Response.InternalError('Failure'))
Esempio n. 6
0
    def _GetImpl(self):
        path = self._request.path.strip('/')
        parts = self._request.path.split('/', 1)
        source_name = parts[0]
        if len(parts) == 2:
            source_path = parts[1]
        else:
            source_path = None

        _log.info(
            'starting refresh of %s DataSource %s' %
            (source_name, '' if source_path is None else '[%s]' % source_path))

        if 'commit' in self._request.arguments:
            commit = self._request.arguments['commit']
        else:
            _log.warning('No commit given; refreshing from master. '
                         'This is probably NOT what you want.')
            commit = None

        server_instance = self._CreateServerInstance(commit)
        commit_tracker = CommitTracker(server_instance.object_store_creator)
        refresh_tracker = RefreshTracker(server_instance.object_store_creator)

        # If no commit was given, use the ID of the last cached master commit.
        # This allows sources external to the chromium repository to be updated
        # independently from individual refresh cycles.
        if commit is None:
            commit = commit_tracker.Get('master').Get()

        success = True
        try:
            if source_name == 'platform_bundle':
                data_source = server_instance.platform_bundle
            elif source_name == 'content_providers':
                data_source = server_instance.content_providers
            else:
                data_source = CreateDataSource(source_name, server_instance)

            class_name = data_source.__class__.__name__
            refresh_future = data_source.Refresh(source_path)
            assert isinstance(
                refresh_future,
                Future), ('%s.Refresh() did not return a Future' % class_name)
            timer = Timer()
            try:
                refresh_future.Get()

                # Mark this (commit, task) pair as completed and then see if this
                # concludes the full cache refresh. The list of tasks required to
                # complete a cache refresh is registered (and keyed on commit ID) by the
                # CronServlet before kicking off all the refresh tasks.
                (refresh_tracker.MarkTaskComplete(
                    commit,
                    path).Then(lambda _: refresh_tracker.GetRefreshComplete(
                        commit)).Then(lambda is_complete: commit_tracker.Set(
                            'master', commit) if is_complete else None).Get())
            except Exception as e:
                _log.error('%s: error %s' %
                           (class_name, traceback.format_exc()))
                success = False
                if IsFileSystemThrottledError(e):
                    return Response.ThrottledError('Throttled')
                raise
            finally:
                _log.info('Refreshing %s took %s' %
                          (class_name, timer.Stop().FormatElapsed()))

        except:
            success = False
            # This should never actually happen.
            _log.error('uncaught error: %s' % traceback.format_exc())
            raise
        finally:
            _log.info('finished (%s)', 'success' if success else 'FAILED')
            return (Response.Ok('Success')
                    if success else Response.InternalError('Failure'))
Esempio n. 7
0
  def _GetImpl(self):
    # Cron strategy:
    #
    # Find all public template files and static files, and render them. Most of
    # the time these won't have changed since the last cron run, so it's a
    # little wasteful, but hopefully rendering is really fast (if it isn't we
    # have a problem).
    channel = self._channel
    logging.info('cron/%s: starting' % channel)

    # This is returned every time RenderServlet wants to create a new
    # ServerInstance.
    server_instance = self._GetSafeServerInstance()

    def get_via_render_servlet(path):
      return RenderServlet(
          Request(path, self._request.host, self._request.headers),
          _SingletonRenderServletDelegate(server_instance)).Get()

    def run_cron_for_dir(d, path_prefix=''):
      success = True
      start_time = time.time()
      files = [f for f in server_instance.content_cache.GetFromFileListing(d)
               if not f.endswith('/')]
      logging.info('cron/%s: rendering %s files from %s...' % (
          channel, len(files), d))
      try:
        for i, f in enumerate(files):
          error = None
          path = '%s%s' % (path_prefix, f)
          try:
            response = get_via_render_servlet(path)
            if response.status != 200:
              error = 'Got %s response' % response.status
          except DeadlineExceededError:
            logging.error(
                'cron/%s: deadline exceeded rendering %s (%s of %s): %s' % (
                    channel, path, i + 1, len(files), traceback.format_exc()))
            raise
          except error:
            pass
          if error:
            logging.error('cron/%s: error rendering %s: %s' % (
                channel, path, error))
            success = False
      finally:
        logging.info('cron/%s: rendering %s files from %s took %s seconds' % (
            channel, len(files), d, time.time() - start_time))
      return success

    success = True
    try:
      # Render all of the publicly accessible files.
      cron_runs = [
        # Note: rendering the public templates will pull in all of the private
        # templates.
        (svn_constants.PUBLIC_TEMPLATE_PATH, ''),
        # Note: rendering the public templates will have pulled in the .js
        # and manifest.json files (for listing examples on the API reference
        # pages), but there are still images, CSS, etc.
        (svn_constants.STATIC_PATH, 'static/'),
      ]
      if not IsDevServer():
        cron_runs.append(
            (svn_constants.EXAMPLES_PATH, 'extensions/examples/'))

      # Note: don't try to short circuit any of this stuff. We want to run
      # the cron for all the directories regardless of intermediate
      # failures.
      for path, path_prefix in cron_runs:
        success = run_cron_for_dir(path, path_prefix=path_prefix) and success

      # TODO(kalman): Generic way for classes to request cron access. The next
      # two special cases are ugly. It would potentially greatly speed up cron
      # runs, too.

      # Extension examples have zip files too. Well, so do apps, but the app
      # file system doesn't get the Offline treatment so they don't need cron.
      if not IsDevServer():
        manifest_json = '/manifest.json'
        example_zips = [
            '%s.zip' % filename[:-len(manifest_json)]
            for filename in server_instance.content_cache.GetFromFileListing(
                svn_constants.EXAMPLES_PATH)
            if filename.endswith(manifest_json)]
        logging.info('cron/%s: rendering %s example zips...' % (
            channel, len(example_zips)))
        start_time = time.time()
        try:
          success = success and all(
              get_via_render_servlet('extensions/examples/%s' % z).status == 200
              for z in example_zips)
        finally:
          logging.info('cron/%s: rendering %s example zips took %s seconds' % (
              channel, len(example_zips), time.time() - start_time))

      # Also trigger a redirect so that PathCanonicalizer has an opportunity to
      # cache file listings.
      logging.info('cron/%s: triggering a redirect...' % channel)
      redirect_response = get_via_render_servlet('storage.html')
      success = success and redirect_response.status == 302
    except DeadlineExceededError:
      success = False

    logging.info('cron/%s: finished' % channel)

    return (Response.Ok('Success') if success else
            Response.InternalError('Failure'))
Esempio n. 8
0
    def _GetImpl(self):
        # Cron strategy:
        #
        # Find all public template files and static files, and render them. Most of
        # the time these won't have changed since the last cron run, so it's a
        # little wasteful, but hopefully rendering is really fast (if it isn't we
        # have a problem).
        channel = self._request.path.strip('/')
        logging.info('cron/%s: starting' % channel)

        server_instance = ServerInstance.CreateOnline(channel)

        def run_cron_for_dir(d, path_prefix=''):
            success = True
            start_time = time.time()
            files = [
                f for f in server_instance.content_cache.GetFromFileListing(d)
                if not f.endswith('/')
            ]
            logging.info('cron/%s: rendering %s files from %s...' %
                         (channel, len(files), d))
            try:
                for i, f in enumerate(files):
                    error = None
                    path = '%s%s' % (path_prefix, f)
                    try:
                        response = RenderServlet(
                            Request(path, self._request.headers)).Get(
                                server_instance=server_instance)
                        if response.status != 200:
                            error = 'Got %s response' % response.status
                    except DeadlineExceededError:
                        logging.error(
                            'cron/%s: deadline exceeded rendering %s (%s of %s): %s'
                            % (channel, path, i + 1, len(files),
                               traceback.format_exc()))
                        raise
                    except error:
                        pass
                    if error:
                        logging.error('cron/%s: error rendering %s: %s' %
                                      (channel, path, error))
                        success = False
            finally:
                logging.info(
                    'cron/%s: rendering %s files from %s took %s seconds' %
                    (channel, len(files), d, time.time() - start_time))
            return success

        success = True
        for path, path_prefix in (
                # Note: rendering the public templates will pull in all of the private
                # templates.
            (svn_constants.PUBLIC_TEMPLATE_PATH, ''),
                # Note: rendering the public templates will have pulled in the .js and
                # manifest.json files (for listing examples on the API reference pages),
                # but there are still images, CSS, etc.
            (svn_constants.STATIC_PATH, 'static/'),
            (svn_constants.EXAMPLES_PATH, 'extensions/examples/')):
            try:
                # Note: don't try to short circuit any of this stuff. We want to run
                # the cron for all the directories regardless of intermediate failures.
                success = run_cron_for_dir(path,
                                           path_prefix=path_prefix) and success
            except DeadlineExceededError:
                success = False
                break

        logging.info('cron/%s: finished' % channel)

        return (Response.Ok('Success')
                if success else Response.InternalError('Failure'))