Exemple #1
0
def test_closest_report(mock_cache, mock_hgmo):
    """
    Test algo to find the closest report for any changeset
    """
    # Build revision for push 992
    revision = "992{}".format(uuid.uuid4().hex[3:])

    # No data at first
    assert mock_cache.redis.zcard("reports") == 0
    assert len(mock_cache.redis.keys("changeset:myrepo:*")) == 0

    # Try to find a report, but none is available
    with pytest.raises(Exception) as e:
        mock_cache.find_closest_report("myrepo", revision)
    assert str(e.value) == "No report found"

    # Some pushes were ingested though
    assert len(mock_cache.redis.keys("changeset:myrepo:*")) > 0

    # Add a report on 994, 2 pushes after our revision
    report_rev = hashlib.md5(b"994").hexdigest()
    mock_cache.bucket.add_mock_blob(
        "myrepo/{}/all:all.json.zstd".format(report_rev), coverage=0.5
    )
    report_994 = Report(
        mock_cache.reports_dir, "myrepo", report_rev, push_id=1, date=994
    )

    # Add a report on 990, 2 pushes before our revision
    base_rev = hashlib.md5(b"990").hexdigest()
    mock_cache.bucket.add_mock_blob(
        "myrepo/{}/all:all.json.zstd".format(base_rev), coverage=0.4
    )
    report_990 = Report(mock_cache.reports_dir, "myrepo", base_rev, push_id=1, date=990)

    # Now we have a report !
    assert mock_cache.list_reports("myrepo") == []
    assert mock_cache.find_closest_report("myrepo", revision) == report_994
    assert mock_cache.list_reports("myrepo") == [report_994]

    # This should also work for revisions before
    revision = "991{}".format(uuid.uuid4().hex[3:])
    assert mock_cache.find_closest_report("myrepo", revision) == report_994

    # ... and the revision on the push itself
    revision = "994{}".format(uuid.uuid4().hex[3:])
    assert mock_cache.find_closest_report("myrepo", revision) == report_994

    # We can also retrieve the base revision
    revision = "990{}".format(uuid.uuid4().hex[3:])
    assert mock_cache.find_closest_report("myrepo", revision) == report_990
    revision = "989{}".format(uuid.uuid4().hex[3:])
    assert mock_cache.find_closest_report("myrepo", revision) == report_990
    assert mock_cache.list_reports("myrepo") == [report_994, report_990]

    # But not for revisions after the push
    revision = "995{}".format(uuid.uuid4().hex[3:])
    with pytest.raises(Exception) as e:
        mock_cache.find_closest_report("myrepo", revision)
    assert str(e.value) == "No report found"
Exemple #2
0
def test_expiry(mock_cache):
    """
    Test expiry for platform & suite reports
    """
    mock_cache.bucket.add_mock_blob("myrepo/rev1/all:somesuite.json.zstd", coverage=1.0)
    report_suite = Report(
        mock_cache.reports_dir,
        "myrepo",
        "rev1",
        platform="all",
        suite="somesuite",
        date=1000,
        push_id=1,
    )
    mock_cache.ingest_report(report_suite)
    assert report_suite.ttl == 1296000
    assert mock_cache.redis.ttl(report_suite.key_overall) > 0

    mock_cache.bucket.add_mock_blob("myrepo/rev1/win:all.json.zstd", coverage=1.0)
    report_platform = Report(
        mock_cache.reports_dir, "myrepo", "rev1", platform="win", date=2000, push_id=2
    )
    mock_cache.ingest_report(report_platform)
    assert report_platform.ttl == 1296000
    assert mock_cache.redis.ttl(report_platform.key_overall) > 0
Exemple #3
0
def test_download_report(mock_cache):
    """
    Test base method to download a report & store it on local FS
    """
    mock_cache.bucket.add_mock_blob("myrepo/deadbeef123/all:all.json.zstd")

    # Does not exist
    report = Report(mock_cache.reports_dir, "myrepo", "missing", date=1, push_id=1)
    assert mock_cache.download_report(report) is False

    archive = os.path.join(
        mock_cache.reports_dir, "myrepo", "deadbeef123", "all:all.json.zstd"
    )
    payload = os.path.join(
        mock_cache.reports_dir, "myrepo", "deadbeef123", "all:all.json"
    )
    assert not os.path.exists(archive)
    assert not os.path.exists(payload)

    # Valid blob
    report = Report(mock_cache.reports_dir, "myrepo", "deadbeef123", date=1, push_id=1)
    assert mock_cache.download_report(report) is True
    assert archive == report.archive_path
    assert payload == report.path

    # Only the payload remains after download
    assert not os.path.exists(archive)
    assert os.path.exists(payload)

    assert json.load(open(payload)) == {"children": {}, "coveragePercent": 0.0}

    assert mock_cache.redis.keys("*") == []
Exemple #4
0
    def ingest_available_reports(self, repository, until=None):
        """
        Ingest all the available reports for a repository
        """
        assert isinstance(repository, str)

        REGEX_BLOB = re.compile(
            r"^{}/(\w+)/([\w\-]+):([\w\-]+).json.zstd$".format(repository))
        now = datetime.utcnow().replace(tzinfo=pytz.UTC)
        for blob in self.bucket.list_blobs(prefix=repository):

            if isinstance(until,
                          timedelta) and (now - blob.time_created) >= until:
                logger.debug(f"Skipping old blob {blob}")
                continue

            # Get changeset from blob name
            match = REGEX_BLOB.match(blob.name)
            if match is None:
                logger.warn("Invalid blob found {}".format(blob.name))
                continue
            changeset = match.group(1)
            platform = match.group(2)
            suite = match.group(3)

            # Build report instance and ingest it
            report = Report(self.reports_dir, repository, changeset, platform,
                            suite)
            self.ingest_report(report)
Exemple #5
0
    def ingest_pushes(self, repository, platform, suite, min_push_id=None, nb_pages=3):
        """
        Ingest HGMO changesets and pushes into our Redis Cache
        The pagination goes from oldest to newest, starting from the optional min_push_id
        """
        ingested = False
        for push_id, push in hgmo_pushes(repository, min_push_id, nb_pages):
            for changeset in push["changesets"]:
                report = Report(
                    self.reports_dir,
                    repository,
                    changeset,
                    platform,
                    suite,
                    push_id=push_id,
                    date=push["date"],
                )

                # Always link changeset to push to find closest available report
                self.redis.hmset(
                    KEY_CHANGESET.format(
                        repository=report.repository, changeset=report.changeset
                    ),
                    {"push": report.push_id, "date": report.date},
                )

                if not ingested and self.ingest_report(report):
                    logger.info(
                        "Found report in that push", push_id=push_id, report=str(report)
                    )

                    # Only ingest first report found in a push in order to stay below 30s response time
                    ingested = True
Exemple #6
0
def test_get_coverage(mock_cache):
    """
    Test coverage access with re-download
    """
    # No report at first
    report = Report(mock_cache.reports_dir,
                    "myrepo",
                    "myhash",
                    push_id=1,
                    date=1)
    with pytest.raises(AssertionError) as e:
        mock_cache.get_coverage(report, "")
    assert str(e.value) == "Missing report myrepo/myhash/all:all"

    # Report available online
    mock_cache.bucket.add_mock_blob("myrepo/myhash/all:all.json.zstd")

    # Coverage available
    coverage = mock_cache.get_coverage(report, "")
    assert coverage == {
        "children": [],
        "coveragePercent": 0.0,
        "path": "",
        "type": "directory",
        "changeset": "myhash",
    }

    # Remove local file
    path = os.path.join(mock_cache.reports_dir, "myrepo", "myhash",
                        "all:all.json")
    assert os.path.exists(path)
    os.unlink(path)

    # Coverage still available
    coverage = mock_cache.get_coverage(report, "")
    assert coverage == {
        "children": [],
        "coveragePercent": 0.0,
        "path": "",
        "type": "directory",
        "changeset": "myhash",
    }

    # Make invalid json
    assert os.path.exists(path)
    with open(path, "a") as f:
        f.write("break")

    # Coverage still available
    coverage = mock_cache.get_coverage(report, "")
    assert coverage == {
        "children": [],
        "coveragePercent": 0.0,
        "path": "",
        "type": "directory",
        "changeset": "myhash",
    }
    assert os.path.exists(path)
    assert isinstance(json.load(open(path)), dict)
Exemple #7
0
def test_latest(mock_cache):
    """
    Test the /v2/latest function
    """

    # Empty at first
    assert coverage_latest() == []

    # Add some reports on mozilla-central
    for rev in range(30):
        mock_cache.bucket.add_mock_blob(
            f"mozilla-central/rev{rev}/all:all.json.zstd", coverage=rev / 100.0
        )
        report = Report(
            mock_cache.reports_dir,
            "mozilla-central",
            f"rev{rev}",
            date=1000 + rev,
            push_id=rev * 5,
        )
        mock_cache.ingest_report(report)

    # And one on another repo
    mock_cache.bucket.add_mock_blob("myrepo/deadbeef/all:all.json.zstd", coverage=1)
    report = Report(mock_cache.reports_dir, "myrepo", "deadbeef", date=1000, push_id=2)
    mock_cache.ingest_report(report)

    # Check endpoint lists last 10 revisions
    assert coverage_latest() == [
        {"push": 145, "revision": "rev29"},
        {"push": 140, "revision": "rev28"},
        {"push": 135, "revision": "rev27"},
        {"push": 130, "revision": "rev26"},
        {"push": 125, "revision": "rev25"},
        {"push": 120, "revision": "rev24"},
        {"push": 115, "revision": "rev23"},
        {"push": 110, "revision": "rev22"},
        {"push": 105, "revision": "rev21"},
        {"push": 100, "revision": "rev20"},
    ]

    # Another repository does not
    assert coverage_latest("myrepo") == [{"push": 2, "revision": "deadbeef"}]
Exemple #8
0
        def _coverage(changeset, date):
            # Load overall coverage for specified path
            changeset = changeset.decode("utf-8")

            report = Report(
                self.reports_dir, repository, changeset, platform, suite, date=date
            )
            coverage = self.redis.hget(report.key_overall, path)
            if coverage is not None:
                coverage = float(coverage)
            return {"changeset": changeset, "date": int(date), "coverage": coverage}
Exemple #9
0
    def ingest_available_reports(self,
                                 repository: str,
                                 until: Optional[datetime] = None) -> None:
        """
        Ingest all the available reports for a repository
        """
        assert isinstance(repository, str)

        for changeset, platform, suite in list_reports(self.bucket, repository,
                                                       until):
            # Build report instance and ingest it
            report = Report(self.reports_dir, repository, changeset, platform,
                            suite)
            self.ingest_report(report)
Exemple #10
0
    def find_closest_report(self,
                            repository,
                            changeset,
                            platform=DEFAULT_FILTER,
                            suite=DEFAULT_FILTER):
        """
        Find the closest report from specified changeset:
        1. Lookup the changeset push in cache
        2. Lookup the changeset push in HGMO
        3. Find the first report after that push
        """

        # Lookup push from cache (fast)
        key = KEY_CHANGESET.format(repository=repository, changeset=changeset)
        push_id = self.redis.hget(key, "push")
        if push_id:
            # Redis lib uses bytes for all output
            push_id = int(push_id.decode("utf-8"))
            date = self.redis.hget(key, "date").decode("utf-8")

            # Check the report variant is available locally
            report = Report(
                self.reports_dir,
                repository,
                changeset,
                platform,
                suite,
                push_id=push_id,
                date=date,
            )
            if not os.path.exists(report.path):
                self.ingest_report(report)
        else:

            # Lookup push from HGMO (slow)
            push_id, _ = hgmo_revision_details(repository, changeset)

            # Ingest pushes as we clearly don't have it in cache
            self.ingest_pushes(repository,
                               platform,
                               suite,
                               min_push_id=push_id - 1,
                               nb_pages=1)

        # Load report from that push
        return self.find_report(repository,
                                platform,
                                suite,
                                push_range=(push_id, MAX_PUSH))
Exemple #11
0
def test_ingest_hgmo(mock_cache, mock_hgmo):
    """
    Test ingestion using a mock HGMO
    """

    # Add a report on push 995
    rev = hashlib.md5(b"995").hexdigest()
    mock_cache.bucket.add_mock_blob("myrepo/{}/all:all.json.zstd".format(rev),
                                    coverage=0.5)

    # Ingest last pushes
    assert mock_cache.list_reports("myrepo") == []
    assert len(mock_cache.redis.keys("changeset:myrepo:*")) == 0
    mock_cache.ingest_pushes("myrepo", "all", "all")
    assert len(mock_cache.redis.keys("changeset:myrepo:*")) > 0
    assert mock_cache.list_reports("myrepo") == [
        Report(mock_cache.reports_dir, "myrepo", rev, push_id=1, date=995)
    ]
Exemple #12
0
    def list_reports(
            self,
            repository,
            platform=DEFAULT_FILTER,
            suite=DEFAULT_FILTER,
            nb=5,
            push_range=(MAX_PUSH, MIN_PUSH),
    ):
        """
        List the last reports available on the server, ordered by push
        by default from newer to older
        The order is detected from the push range
        """
        assert isinstance(nb, int)
        assert nb > 0
        assert isinstance(push_range, tuple) and len(push_range) == 2

        # Detect ordering from push range
        start, end = push_range
        op = self.redis.zrangebyscore if start < end else self.redis.zrevrangebyscore

        reports = op(
            KEY_REPORTS.format(repository=repository,
                               platform=platform,
                               suite=suite),
            start,
            end,
            start=0,
            num=nb,
            withscores=True,
        )

        return [
            Report(
                self.reports_dir,
                repository,
                changeset.decode("utf-8"),
                platform,
                suite,
                push_id=push,
            ) for changeset, push in reports
        ]
Exemple #13
0
    def ingest_available_reports(self, repository):
        """
        Ingest all the available reports for a repository
        """
        assert isinstance(repository, str)

        REGEX_BLOB = re.compile(
            r"^{}/(\w+)/([\w\-]+):([\w\-]+).json.zstd$".format(repository)
        )
        for blob in self.bucket.list_blobs(prefix=repository):

            # Get changeset from blob name
            match = REGEX_BLOB.match(blob.name)
            if match is None:
                logger.warn("Invalid blob found {}".format(blob.name))
                continue
            changeset = match.group(1)
            platform = match.group(2)
            suite = match.group(3)

            # Build report instance and ingest it
            report = Report(self.reports_dir, repository, changeset, platform, suite)
            self.ingest_report(report)
Exemple #14
0
def test_ingestion(mock_cache):
    """
    Test ingestion of several reports and their retrieval through Redis index
    """
    # Setup blobs
    mock_cache.bucket.add_mock_blob("myrepo/rev1/all:all.json.zstd",
                                    coverage=0.1)
    mock_cache.bucket.add_mock_blob("myrepo/rev2/all:all.json.zstd",
                                    coverage=0.2)
    mock_cache.bucket.add_mock_blob("myrepo/rev10/all:all.json.zstd",
                                    coverage=1.0)

    # No reports at first
    assert mock_cache.redis.zcard(b"reports:myrepo") == 0
    assert mock_cache.redis.zcard(b"history:myrepo") == 0
    assert mock_cache.list_reports("myrepo") == []

    # Ingest those 3 reports
    report_1 = Report(mock_cache.reports_dir,
                      "myrepo",
                      "rev1",
                      date=1000,
                      push_id=1)
    report_2 = Report(mock_cache.reports_dir,
                      "myrepo",
                      "rev2",
                      date=2000,
                      push_id=2)
    report_10 = Report(mock_cache.reports_dir,
                       "myrepo",
                       "rev10",
                       date=9000,
                       push_id=10)
    mock_cache.ingest_report(report_1)
    mock_cache.ingest_report(report_2)
    mock_cache.ingest_report(report_10)

    # They must be in redis and on the file system
    assert mock_cache.redis.zcard(b"reports:myrepo:all:all") == 3
    assert mock_cache.redis.zcard(b"history:myrepo") == 3
    assert os.path.exists(
        os.path.join(mock_cache.reports_dir, "myrepo", "rev1", "all:all.json"))
    assert os.path.exists(
        os.path.join(mock_cache.reports_dir, "myrepo", "rev2", "all:all.json"))
    assert os.path.exists(
        os.path.join(mock_cache.reports_dir, "myrepo", "rev10",
                     "all:all.json"))

    # Reports are exposed, and sorted by push
    assert mock_cache.list_reports("another") == []
    assert mock_cache.list_reports("myrepo") == [report_10, report_2, report_1]
    assert mock_cache.find_report("myrepo") == report_10
    assert mock_cache.get_history("myrepo", start=200, end=20000) == [
        {
            "changeset": "rev10",
            "coverage": 1.0,
            "date": 9000
        },
        {
            "changeset": "rev2",
            "coverage": 0.2,
            "date": 2000
        },
        {
            "changeset": "rev1",
            "coverage": 0.1,
            "date": 1000
        },
    ]

    # Even if we add a smaller one later on, reports are still sorted
    mock_cache.bucket.add_mock_blob("myrepo/rev5/all:all.json.zstd",
                                    coverage=0.5)
    report_5 = Report(mock_cache.reports_dir,
                      "myrepo",
                      "rev5",
                      date=5000,
                      push_id=5)
    mock_cache.ingest_report(report_5)
    assert mock_cache.list_reports("myrepo") == [
        report_10,
        report_5,
        report_2,
        report_1,
    ]
    assert mock_cache.find_report("myrepo") == report_10
    assert mock_cache.find_report("myrepo", push_range=(7, 0)) == report_5
    assert mock_cache.get_history("myrepo", start=200, end=20000) == [
        {
            "changeset": "rev10",
            "coverage": 1.0,
            "date": 9000
        },
        {
            "changeset": "rev5",
            "coverage": 0.5,
            "date": 5000
        },
        {
            "changeset": "rev2",
            "coverage": 0.2,
            "date": 2000
        },
        {
            "changeset": "rev1",
            "coverage": 0.1,
            "date": 1000
        },
    ]