Example #1
0
class Rebaseliner(object):
    """Class to produce new baselines for a given platform."""

    REVISION_REGEX = r"<a href=\"(\d+)/\">"

    def __init__(
        self, running_port, target_port, platform, options, url_fetcher, zip_factory, scm, logged_before=False
    ):
        """
        Args:
            running_port: the Port the script is running on.
            target_port: the Port the script uses to find port-specific
                configuration information like the test_expectations.txt
                file location and the list of test platforms.
            platform: the test platform to rebaseline
            options: the command-line options object.
            url_fetcher: object that can fetch objects from URLs
            zip_factory: optional object that can fetch zip files from URLs
            scm: scm object for adding new baselines
            logged_before: whether the previous running port logged anything.
        """
        self._platform = platform
        self._options = options
        self._port = running_port
        self._filesystem = running_port._filesystem
        self._target_port = target_port

        # FIXME: This should get its PortFactory from a Host object.
        # Note: using running_port.executive, running_port.user since we can't get them from a host.
        self._rebaseline_port = PortFactory().get(
            platform, options, filesystem=self._filesystem, executive=running_port.executive, user=running_port.user
        )
        self._rebaselining_tests = set()
        self._rebaselined_tests = []
        self._logged_before = logged_before
        self.did_log = False

        # Create tests and expectations helper which is used to:
        #   -. compile list of tests that need rebaselining.
        #   -. update the tests in test_expectations file after rebaseline
        #      is done.
        expectations_str = self._rebaseline_port.test_expectations()
        self._test_expectations = test_expectations.TestExpectations(
            self._rebaseline_port, None, expectations_str, self._rebaseline_port.test_configuration(), False
        )
        self._url_fetcher = url_fetcher
        self._zip_factory = zip_factory
        self._scm = scm

    def run(self):
        """Run rebaseline process."""

        log_dashed_string("Compiling rebaselining tests", self._platform, logging.DEBUG)
        if not self._compile_rebaselining_tests():
            return False
        if not self._rebaselining_tests:
            return True

        self.did_log = True
        log_dashed_string("Downloading archive", self._platform, logging.DEBUG)
        archive_file = self._download_buildbot_archive()
        _log.debug("")
        if not archive_file:
            _log.error("No archive found.")
            return False

        log_dashed_string("Extracting and adding new baselines", self._platform, logging.DEBUG)
        self._extract_and_add_new_baselines(archive_file)
        archive_file.close()

        log_dashed_string("Updating rebaselined tests in file", self._platform)

        if len(self._rebaselining_tests) != len(self._rebaselined_tests):
            _log.debug("")
            _log.debug("NOT ALL TESTS WERE REBASELINED.")
            _log.debug("  Number marked for rebaselining: %d", len(self._rebaselining_tests))
            _log.debug("  Number actually rebaselined: %d", len(self._rebaselined_tests))
            _log.info("")
            return False

        _log.debug("  All tests needing rebaselining were successfully rebaselined.")
        _log.info("")
        return True

    def remove_rebaselining_expectations(self, tests, backup):
        """if backup is True, we backup the original test expectations file."""
        new_expectations = self._test_expectations.remove_rebaselined_tests(tests)
        path = self._target_port.path_to_test_expectations_file()
        if backup:
            date_suffix = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
            backup_file = "%s.orig.%s" % (path, date_suffix)
            if self._filesystem.exists(backup_file):
                self._filesystem.remove(backup_file)
            _log.debug('Saving original file to "%s"', backup_file)
            self._filesystem.move(path, backup_file)

        self._filesystem.write_text_file(path, new_expectations)
        # self._scm.add(path)

    def get_rebaselined_tests(self):
        return self._rebaselined_tests

    def _compile_rebaselining_tests(self):
        """Compile list of tests that need rebaselining for the platform.

        Returns:
          False if reftests are wrongly marked as 'needs rebaselining' or True
        """

        self._rebaselining_tests = self._test_expectations.get_rebaselining_failures()
        if not self._rebaselining_tests:
            _log.info("%s: No tests to rebaseline.", self._platform)
            return True

        fs = self._target_port._filesystem
        for test in self._rebaselining_tests:
            test_abspath = self._target_port.abspath_for_test(test)
            if fs.exists(self._target_port.reftest_expected_filename(test_abspath)) or fs.exists(
                self._target_port.reftest_expected_mismatch_filename(test_abspath)
            ):
                _log.error("%s seems to be a reftest. We can not rebase for reftests.", test)
                self._rebaselining_tests = set()
                return False

        if not self._logged_before:
            _log.info("")
        _log.info("%s: Rebaselining %d tests:", self._platform, len(self._rebaselining_tests))
        test_no = 1
        for test in self._rebaselining_tests:
            _log.debug("  %d: %s", test_no, test)
            test_no += 1

        return True

    def _get_latest_revision(self, url):
        """Get the latest layout test revision number from buildbot.

        Args:
          url: Url to retrieve layout test revision numbers.

        Returns:
          latest revision or
          None on failure.
        """

        _log.debug('Url to retrieve revision: "%s"', url)

        content = self._url_fetcher.fetch(url)

        revisions = re.findall(self.REVISION_REGEX, content)
        if not revisions:
            _log.error('Failed to find revision, content: "%s"', content)
            return None

        revisions.sort(key=int)
        _log.debug("  Latest revision: %s", revisions[len(revisions) - 1])
        return revisions[len(revisions) - 1]

    def _get_archive_dir_name(self, platform):
        """Get name of the layout test archive directory.

        Returns:
          Directory name or
          None on failure
        """

        if platform in ARCHIVE_DIR_NAME_DICT:
            return ARCHIVE_DIR_NAME_DICT[platform]
        else:
            _log.error("Cannot find platform key %s in archive " "directory name dictionary", platform)
            return None

    def _get_archive_url(self):
        """Generate the url to download latest layout test archive.

        Returns:
          Url to download archive or
          None on failure
        """

        if self._options.force_archive_url:
            return self._options.force_archive_url

        dir_name = self._get_archive_dir_name(self._platform)
        if not dir_name:
            return None

        _log.debug('Buildbot platform dir name: "%s"', dir_name)

        url_base = "%s/%s/" % (self._options.archive_url, dir_name)
        latest_revision = self._get_latest_revision(url_base)
        if latest_revision is None or latest_revision <= 0:
            return None
        archive_url = "%s%s/layout-test-results.zip" % (url_base, latest_revision)
        _log.info("  Using %s", archive_url)
        return archive_url

    def _download_buildbot_archive(self):
        """Download layout test archive file from buildbot and return a handle to it."""
        url = self._get_archive_url()
        if url is None:
            return None

        archive_file = zipfileset.ZipFileSet(url, filesystem=self._filesystem, zip_factory=self._zip_factory)
        _log.debug("Archive downloaded")
        return archive_file

    def _extract_and_add_new_baselines(self, zip_file):
        """Extract new baselines from the zip file and add them to SVN repository.

        Returns:
          List of tests that have been rebaselined or None on failure."""
        zip_namelist = zip_file.namelist()

        _log.debug("zip file namelist:")
        for name in zip_namelist:
            _log.debug("  " + name)

        _log.debug('Platform dir: "%s"', self._platform)

        self._rebaselined_tests = []
        for test_no, test in enumerate(self._rebaselining_tests):
            _log.debug("Test %d: %s", test_no + 1, test)
            self._extract_and_add_new_baseline(test, zip_file)

    def _extract_and_add_new_baseline(self, test, zip_file):
        found = False
        scm_error = False
        test_basename = self._filesystem.splitext(test)[0]
        for suffix in BASELINE_SUFFIXES:
            archive_test_name = "layout-test-results/%s-actual%s" % (test_basename, suffix)
            _log.debug('  Archive test file name: "%s"', archive_test_name)
            if not archive_test_name in zip_file.namelist():
                _log.debug("  %s file not in archive.", suffix)
                continue

            found = True
            _log.debug("  %s file found in archive.", suffix)

            temp_name = self._extract_from_zip_to_tempfile(zip_file, archive_test_name)

            expected_filename = "%s-expected%s" % (test_basename, suffix)
            expected_fullpath = self._filesystem.join(self._rebaseline_port.baseline_path(), expected_filename)
            expected_fullpath = self._filesystem.normpath(expected_fullpath)
            _log.debug('  Expected file full path: "%s"', expected_fullpath)

            relpath = self._filesystem.relpath(expected_fullpath, self._target_port.layout_tests_dir())

            # TODO(victorw): for now, the rebaselining tool checks whether
            # or not THIS baseline is duplicate and should be skipped.
            # We could improve the tool to check all baselines in upper
            # and lower levels and remove all duplicated baselines.
            if self._is_dup_baseline(temp_name, expected_fullpath, test, suffix, self._platform):
                self._filesystem.remove(temp_name)
                if self._filesystem.exists(expected_fullpath):
                    _log.info("  Removing %s" % relpath)
                    self._delete_baseline(expected_fullpath)
                _log.debug("  %s is a duplicate" % relpath)

                # FIXME: We consider a duplicate baseline a success in the normal case.
                # FIXME: This may not be what you want sometimes; should this be
                # FIXME: controllable?
                self._rebaselined_tests.append(test)
                continue

            if suffix == ".checksum" and self._png_has_same_checksum(temp_name, test, expected_fullpath):
                self._filesystem.remove(temp_name)
                # If an old checksum exists, delete it.
                self._delete_baseline(expected_fullpath)
                continue

            self._filesystem.maybe_make_directory(self._filesystem.dirname(expected_fullpath))
            self._filesystem.move(temp_name, expected_fullpath)

            path_from_base = self._filesystem.relpath(expected_fullpath)
            if self._scm.exists(path_from_base):
                _log.info("  Updating %s" % relpath)
            else:
                _log.info("  Adding %s" % relpath)

            if self._scm.add(expected_fullpath, return_exit_code=True):
                # FIXME: print detailed diagnose messages
                scm_error = True
            elif suffix != ".checksum":
                self._create_html_baseline_files(expected_fullpath)

        if not found:
            _log.warn("No results in archive for %s" % test)
        elif scm_error:
            _log.warn("Failed to add baselines to your repository.")
        else:
            _log.debug("  Rebaseline succeeded.")
            self._rebaselined_tests.append(test)

    def _extract_from_zip_to_tempfile(self, zip_file, filename):
        """Extracts |filename| from |zip_file|, a ZipFileSet. Returns the full
           path name to the extracted file."""
        data = zip_file.read(filename)
        suffix = self._filesystem.splitext(filename)[1]
        tempfile, temp_name = self._filesystem.open_binary_tempfile(suffix)
        tempfile.write(data)
        tempfile.close()
        return temp_name

    def _png_has_same_checksum(self, checksum_path, test, checksum_expected_fullpath):
        """Returns True if the fallback png for |checksum_expected_fullpath|
        contains the same checksum."""
        fs = self._filesystem
        png_fullpath = self._first_fallback_png_for_test(test)

        if not fs.exists(png_fullpath):
            _log.error("  Checksum without png file found! Expected %s to exist." % png_fullpath)
            return False

        with fs.open_binary_file_for_reading(png_fullpath) as filehandle:
            checksum_in_png = read_checksum_from_png.read_checksum(filehandle)
            checksum_in_text_file = fs.read_text_file(checksum_path)
            if checksum_in_png and checksum_in_png != checksum_in_text_file:
                _log.error(
                    "  checksum in %s and %s don't match!  Continuing"
                    " to copy but please investigate." % (checksum_expected_fullpath, png_fullpath)
                )
            return checksum_in_text_file == checksum_in_png

    def _first_fallback_png_for_test(self, test):
        all_baselines = self._rebaseline_port.expected_baselines(test, ".png", True)
        return self._filesystem.join(all_baselines[0][0], all_baselines[0][1])

    def _is_dup_baseline(self, new_baseline, baseline_path, test, suffix, platform):
        """Check whether a baseline is duplicate and can fallback to same
           baseline for another platform. For example, if a test has same
           baseline on linux and windows, then we only store windows
           baseline and linux baseline will fallback to the windows version.

        Args:
          new_baseline: temp filename containing the new baseline results
          baseline_path: baseline expectation file name.
          test: test name.
          suffix: file suffix of the expected results, including dot;
                  e.g. '.txt' or '.png'.
          platform: baseline platform 'mac', 'win' or 'linux'.

        Returns:
          True if the baseline is unnecessary.
          False otherwise.
        """
        all_baselines = self._rebaseline_port.expected_baselines(test, suffix, True)

        for fallback_dir, fallback_file in all_baselines:
            if not fallback_dir or not fallback_file:
                continue

            fallback_fullpath = self._filesystem.normpath(self._filesystem.join(fallback_dir, fallback_file))
            if fallback_fullpath.lower() == baseline_path.lower():
                continue
            fallback_dir_relpath = self._filesystem.relpath(fallback_dir, self._target_port.layout_tests_dir())
            if fallback_dir_relpath == "":
                fallback_dir_relpath = "<generic>"

            new_output = self._filesystem.read_binary_file(new_baseline)
            fallback_output = self._filesystem.read_binary_file(fallback_fullpath)
            is_image = baseline_path.lower().endswith(".png")
            if not self._diff_baselines(new_output, fallback_output, is_image):
                _log.info("  Skipping %s (matches %s)", test, fallback_dir_relpath)
                return True
            return False

        return False

    def _diff_baselines(self, output1, output2, is_image):
        """Check whether two baselines are different.

        Args:
          output1, output2: contents of the baselines to compare.

        Returns:
          True if two files are different or have different extensions.
          False otherwise.
        """

        if is_image:
            return self._port.diff_image(output1, output2)[0]

        return self._port.compare_text(output1, output2)

    def _delete_baseline(self, filename):
        """Remove the file from repository and delete it from disk.

        Args:
          filename: full path of the file to delete.
        """

        if not filename or not self._filesystem.isfile(filename):
            return
        self._scm.delete(filename)

    def _create_html_baseline_files(self, baseline_fullpath):
        """Create baseline files (old, new and diff) in html directory.

           The files are used to compare the rebaselining results.

        Args:
          baseline_fullpath: full path of the expected baseline file.
        """

        baseline_relpath = self._filesystem.relpath(baseline_fullpath)
        _log.debug('  Html: create baselines for "%s"', baseline_relpath)

        if not baseline_fullpath or not self._filesystem.exists(baseline_fullpath):
            _log.debug('  Html: Does not exist: "%s"', baseline_fullpath)
            return

        if not self._scm.exists(baseline_relpath):
            _log.debug('  Html: Does not exist in scm: "%s"', baseline_relpath)
            return

        # Copy the new baseline to html directory for result comparison.
        baseline_filename = self._filesystem.basename(baseline_fullpath)
        new_file = get_result_file_fullpath(
            self._filesystem, self._options.html_directory, baseline_filename, self._platform, "new"
        )
        self._filesystem.copyfile(baseline_fullpath, new_file)
        _log.debug('  Html: copied new baseline file from "%s" to "%s".', baseline_fullpath, new_file)

        # Get the old baseline from the repository and save to the html directory.
        try:
            output = self._scm.show_head(baseline_relpath)
        except ScriptError, e:
            _log.warning(e)
            output = ""

        if (not output) or (output.upper().rstrip().endswith("NO SUCH FILE OR DIRECTORY")):
            _log.warning('  No base file: "%s"', baseline_fullpath)
            return
        base_file = get_result_file_fullpath(
            self._filesystem, self._options.html_directory, baseline_filename, self._platform, "old"
        )
        if base_file.upper().endswith(".PNG"):
            self._filesystem.write_binary_file(base_file, output)
        else:
            self._filesystem.write_text_file(base_file, output)
        _log.debug('  Html: created old baseline file: "%s".', base_file)

        # Get the diff between old and new baselines and save to the html dir.
        diff_file = get_result_file_fullpath(
            self._filesystem, self._options.html_directory, baseline_filename, self._platform, "diff"
        )
        has_diff = False
        if baseline_filename.upper().endswith(".TXT"):
            output = self._scm.diff_for_file(baseline_relpath, log=_log)
            if output:
                self._filesystem.write_text_file(diff_file, output)
                has_diff = True
        elif baseline_filename.upper().endswith(".PNG"):
            old_file = get_result_file_fullpath(
                self._filesystem, self._options.html_directory, baseline_filename, self._platform, "old"
            )
            new_file = get_result_file_fullpath(
                self._filesystem, self._options.html_directory, baseline_filename, self._platform, "new"
            )
            _log.debug(' Html: diffing "%s" and "%s"', old_file, new_file)
            old_output = self._filesystem.read_binary_file(old_file)
            new_output = self._filesystem.read_binary_file(new_file)
            image_diff = self._port.diff_image(old_output, new_output)[0]
            self._filesystem.write_binary_file(diff_file, image_diff)

        if has_diff:
            _log.debug('  Html: created baseline diff file: "%s".', diff_file)