def test_closest_report(mock_cache, mock_hgmo): """ Test algo to find the closest report for any changeset """ # Build revision for push 992 revision = "992{}".format(uuid.uuid4().hex[3:]) # No data at first assert mock_cache.redis.zcard("reports") == 0 assert len(mock_cache.redis.keys("changeset:myrepo:*")) == 0 # Try to find a report, but none is available with pytest.raises(Exception) as e: mock_cache.find_closest_report("myrepo", revision) assert str(e.value) == "No report found" # Some pushes were ingested though assert len(mock_cache.redis.keys("changeset:myrepo:*")) > 0 # Add a report on 994, 2 pushes after our revision report_rev = hashlib.md5(b"994").hexdigest() mock_cache.bucket.add_mock_blob( "myrepo/{}/all:all.json.zstd".format(report_rev), coverage=0.5 ) report_994 = Report( mock_cache.reports_dir, "myrepo", report_rev, push_id=1, date=994 ) # Add a report on 990, 2 pushes before our revision base_rev = hashlib.md5(b"990").hexdigest() mock_cache.bucket.add_mock_blob( "myrepo/{}/all:all.json.zstd".format(base_rev), coverage=0.4 ) report_990 = Report(mock_cache.reports_dir, "myrepo", base_rev, push_id=1, date=990) # Now we have a report ! assert mock_cache.list_reports("myrepo") == [] assert mock_cache.find_closest_report("myrepo", revision) == report_994 assert mock_cache.list_reports("myrepo") == [report_994] # This should also work for revisions before revision = "991{}".format(uuid.uuid4().hex[3:]) assert mock_cache.find_closest_report("myrepo", revision) == report_994 # ... and the revision on the push itself revision = "994{}".format(uuid.uuid4().hex[3:]) assert mock_cache.find_closest_report("myrepo", revision) == report_994 # We can also retrieve the base revision revision = "990{}".format(uuid.uuid4().hex[3:]) assert mock_cache.find_closest_report("myrepo", revision) == report_990 revision = "989{}".format(uuid.uuid4().hex[3:]) assert mock_cache.find_closest_report("myrepo", revision) == report_990 assert mock_cache.list_reports("myrepo") == [report_994, report_990] # But not for revisions after the push revision = "995{}".format(uuid.uuid4().hex[3:]) with pytest.raises(Exception) as e: mock_cache.find_closest_report("myrepo", revision) assert str(e.value) == "No report found"
def test_expiry(mock_cache): """ Test expiry for platform & suite reports """ mock_cache.bucket.add_mock_blob("myrepo/rev1/all:somesuite.json.zstd", coverage=1.0) report_suite = Report( mock_cache.reports_dir, "myrepo", "rev1", platform="all", suite="somesuite", date=1000, push_id=1, ) mock_cache.ingest_report(report_suite) assert report_suite.ttl == 1296000 assert mock_cache.redis.ttl(report_suite.key_overall) > 0 mock_cache.bucket.add_mock_blob("myrepo/rev1/win:all.json.zstd", coverage=1.0) report_platform = Report( mock_cache.reports_dir, "myrepo", "rev1", platform="win", date=2000, push_id=2 ) mock_cache.ingest_report(report_platform) assert report_platform.ttl == 1296000 assert mock_cache.redis.ttl(report_platform.key_overall) > 0
def test_download_report(mock_cache): """ Test base method to download a report & store it on local FS """ mock_cache.bucket.add_mock_blob("myrepo/deadbeef123/all:all.json.zstd") # Does not exist report = Report(mock_cache.reports_dir, "myrepo", "missing", date=1, push_id=1) assert mock_cache.download_report(report) is False archive = os.path.join( mock_cache.reports_dir, "myrepo", "deadbeef123", "all:all.json.zstd" ) payload = os.path.join( mock_cache.reports_dir, "myrepo", "deadbeef123", "all:all.json" ) assert not os.path.exists(archive) assert not os.path.exists(payload) # Valid blob report = Report(mock_cache.reports_dir, "myrepo", "deadbeef123", date=1, push_id=1) assert mock_cache.download_report(report) is True assert archive == report.archive_path assert payload == report.path # Only the payload remains after download assert not os.path.exists(archive) assert os.path.exists(payload) assert json.load(open(payload)) == {"children": {}, "coveragePercent": 0.0} assert mock_cache.redis.keys("*") == []
def ingest_available_reports(self, repository, until=None): """ Ingest all the available reports for a repository """ assert isinstance(repository, str) REGEX_BLOB = re.compile( r"^{}/(\w+)/([\w\-]+):([\w\-]+).json.zstd$".format(repository)) now = datetime.utcnow().replace(tzinfo=pytz.UTC) for blob in self.bucket.list_blobs(prefix=repository): if isinstance(until, timedelta) and (now - blob.time_created) >= until: logger.debug(f"Skipping old blob {blob}") continue # Get changeset from blob name match = REGEX_BLOB.match(blob.name) if match is None: logger.warn("Invalid blob found {}".format(blob.name)) continue changeset = match.group(1) platform = match.group(2) suite = match.group(3) # Build report instance and ingest it report = Report(self.reports_dir, repository, changeset, platform, suite) self.ingest_report(report)
def ingest_pushes(self, repository, platform, suite, min_push_id=None, nb_pages=3): """ Ingest HGMO changesets and pushes into our Redis Cache The pagination goes from oldest to newest, starting from the optional min_push_id """ ingested = False for push_id, push in hgmo_pushes(repository, min_push_id, nb_pages): for changeset in push["changesets"]: report = Report( self.reports_dir, repository, changeset, platform, suite, push_id=push_id, date=push["date"], ) # Always link changeset to push to find closest available report self.redis.hmset( KEY_CHANGESET.format( repository=report.repository, changeset=report.changeset ), {"push": report.push_id, "date": report.date}, ) if not ingested and self.ingest_report(report): logger.info( "Found report in that push", push_id=push_id, report=str(report) ) # Only ingest first report found in a push in order to stay below 30s response time ingested = True
def test_get_coverage(mock_cache): """ Test coverage access with re-download """ # No report at first report = Report(mock_cache.reports_dir, "myrepo", "myhash", push_id=1, date=1) with pytest.raises(AssertionError) as e: mock_cache.get_coverage(report, "") assert str(e.value) == "Missing report myrepo/myhash/all:all" # Report available online mock_cache.bucket.add_mock_blob("myrepo/myhash/all:all.json.zstd") # Coverage available coverage = mock_cache.get_coverage(report, "") assert coverage == { "children": [], "coveragePercent": 0.0, "path": "", "type": "directory", "changeset": "myhash", } # Remove local file path = os.path.join(mock_cache.reports_dir, "myrepo", "myhash", "all:all.json") assert os.path.exists(path) os.unlink(path) # Coverage still available coverage = mock_cache.get_coverage(report, "") assert coverage == { "children": [], "coveragePercent": 0.0, "path": "", "type": "directory", "changeset": "myhash", } # Make invalid json assert os.path.exists(path) with open(path, "a") as f: f.write("break") # Coverage still available coverage = mock_cache.get_coverage(report, "") assert coverage == { "children": [], "coveragePercent": 0.0, "path": "", "type": "directory", "changeset": "myhash", } assert os.path.exists(path) assert isinstance(json.load(open(path)), dict)
def test_latest(mock_cache): """ Test the /v2/latest function """ # Empty at first assert coverage_latest() == [] # Add some reports on mozilla-central for rev in range(30): mock_cache.bucket.add_mock_blob( f"mozilla-central/rev{rev}/all:all.json.zstd", coverage=rev / 100.0 ) report = Report( mock_cache.reports_dir, "mozilla-central", f"rev{rev}", date=1000 + rev, push_id=rev * 5, ) mock_cache.ingest_report(report) # And one on another repo mock_cache.bucket.add_mock_blob("myrepo/deadbeef/all:all.json.zstd", coverage=1) report = Report(mock_cache.reports_dir, "myrepo", "deadbeef", date=1000, push_id=2) mock_cache.ingest_report(report) # Check endpoint lists last 10 revisions assert coverage_latest() == [ {"push": 145, "revision": "rev29"}, {"push": 140, "revision": "rev28"}, {"push": 135, "revision": "rev27"}, {"push": 130, "revision": "rev26"}, {"push": 125, "revision": "rev25"}, {"push": 120, "revision": "rev24"}, {"push": 115, "revision": "rev23"}, {"push": 110, "revision": "rev22"}, {"push": 105, "revision": "rev21"}, {"push": 100, "revision": "rev20"}, ] # Another repository does not assert coverage_latest("myrepo") == [{"push": 2, "revision": "deadbeef"}]
def _coverage(changeset, date): # Load overall coverage for specified path changeset = changeset.decode("utf-8") report = Report( self.reports_dir, repository, changeset, platform, suite, date=date ) coverage = self.redis.hget(report.key_overall, path) if coverage is not None: coverage = float(coverage) return {"changeset": changeset, "date": int(date), "coverage": coverage}
def ingest_available_reports(self, repository: str, until: Optional[datetime] = None) -> None: """ Ingest all the available reports for a repository """ assert isinstance(repository, str) for changeset, platform, suite in list_reports(self.bucket, repository, until): # Build report instance and ingest it report = Report(self.reports_dir, repository, changeset, platform, suite) self.ingest_report(report)
def find_closest_report(self, repository, changeset, platform=DEFAULT_FILTER, suite=DEFAULT_FILTER): """ Find the closest report from specified changeset: 1. Lookup the changeset push in cache 2. Lookup the changeset push in HGMO 3. Find the first report after that push """ # Lookup push from cache (fast) key = KEY_CHANGESET.format(repository=repository, changeset=changeset) push_id = self.redis.hget(key, "push") if push_id: # Redis lib uses bytes for all output push_id = int(push_id.decode("utf-8")) date = self.redis.hget(key, "date").decode("utf-8") # Check the report variant is available locally report = Report( self.reports_dir, repository, changeset, platform, suite, push_id=push_id, date=date, ) if not os.path.exists(report.path): self.ingest_report(report) else: # Lookup push from HGMO (slow) push_id, _ = hgmo_revision_details(repository, changeset) # Ingest pushes as we clearly don't have it in cache self.ingest_pushes(repository, platform, suite, min_push_id=push_id - 1, nb_pages=1) # Load report from that push return self.find_report(repository, platform, suite, push_range=(push_id, MAX_PUSH))
def test_ingest_hgmo(mock_cache, mock_hgmo): """ Test ingestion using a mock HGMO """ # Add a report on push 995 rev = hashlib.md5(b"995").hexdigest() mock_cache.bucket.add_mock_blob("myrepo/{}/all:all.json.zstd".format(rev), coverage=0.5) # Ingest last pushes assert mock_cache.list_reports("myrepo") == [] assert len(mock_cache.redis.keys("changeset:myrepo:*")) == 0 mock_cache.ingest_pushes("myrepo", "all", "all") assert len(mock_cache.redis.keys("changeset:myrepo:*")) > 0 assert mock_cache.list_reports("myrepo") == [ Report(mock_cache.reports_dir, "myrepo", rev, push_id=1, date=995) ]
def list_reports( self, repository, platform=DEFAULT_FILTER, suite=DEFAULT_FILTER, nb=5, push_range=(MAX_PUSH, MIN_PUSH), ): """ List the last reports available on the server, ordered by push by default from newer to older The order is detected from the push range """ assert isinstance(nb, int) assert nb > 0 assert isinstance(push_range, tuple) and len(push_range) == 2 # Detect ordering from push range start, end = push_range op = self.redis.zrangebyscore if start < end else self.redis.zrevrangebyscore reports = op( KEY_REPORTS.format(repository=repository, platform=platform, suite=suite), start, end, start=0, num=nb, withscores=True, ) return [ Report( self.reports_dir, repository, changeset.decode("utf-8"), platform, suite, push_id=push, ) for changeset, push in reports ]
def ingest_available_reports(self, repository): """ Ingest all the available reports for a repository """ assert isinstance(repository, str) REGEX_BLOB = re.compile( r"^{}/(\w+)/([\w\-]+):([\w\-]+).json.zstd$".format(repository) ) for blob in self.bucket.list_blobs(prefix=repository): # Get changeset from blob name match = REGEX_BLOB.match(blob.name) if match is None: logger.warn("Invalid blob found {}".format(blob.name)) continue changeset = match.group(1) platform = match.group(2) suite = match.group(3) # Build report instance and ingest it report = Report(self.reports_dir, repository, changeset, platform, suite) self.ingest_report(report)
def test_ingestion(mock_cache): """ Test ingestion of several reports and their retrieval through Redis index """ # Setup blobs mock_cache.bucket.add_mock_blob("myrepo/rev1/all:all.json.zstd", coverage=0.1) mock_cache.bucket.add_mock_blob("myrepo/rev2/all:all.json.zstd", coverage=0.2) mock_cache.bucket.add_mock_blob("myrepo/rev10/all:all.json.zstd", coverage=1.0) # No reports at first assert mock_cache.redis.zcard(b"reports:myrepo") == 0 assert mock_cache.redis.zcard(b"history:myrepo") == 0 assert mock_cache.list_reports("myrepo") == [] # Ingest those 3 reports report_1 = Report(mock_cache.reports_dir, "myrepo", "rev1", date=1000, push_id=1) report_2 = Report(mock_cache.reports_dir, "myrepo", "rev2", date=2000, push_id=2) report_10 = Report(mock_cache.reports_dir, "myrepo", "rev10", date=9000, push_id=10) mock_cache.ingest_report(report_1) mock_cache.ingest_report(report_2) mock_cache.ingest_report(report_10) # They must be in redis and on the file system assert mock_cache.redis.zcard(b"reports:myrepo:all:all") == 3 assert mock_cache.redis.zcard(b"history:myrepo") == 3 assert os.path.exists( os.path.join(mock_cache.reports_dir, "myrepo", "rev1", "all:all.json")) assert os.path.exists( os.path.join(mock_cache.reports_dir, "myrepo", "rev2", "all:all.json")) assert os.path.exists( os.path.join(mock_cache.reports_dir, "myrepo", "rev10", "all:all.json")) # Reports are exposed, and sorted by push assert mock_cache.list_reports("another") == [] assert mock_cache.list_reports("myrepo") == [report_10, report_2, report_1] assert mock_cache.find_report("myrepo") == report_10 assert mock_cache.get_history("myrepo", start=200, end=20000) == [ { "changeset": "rev10", "coverage": 1.0, "date": 9000 }, { "changeset": "rev2", "coverage": 0.2, "date": 2000 }, { "changeset": "rev1", "coverage": 0.1, "date": 1000 }, ] # Even if we add a smaller one later on, reports are still sorted mock_cache.bucket.add_mock_blob("myrepo/rev5/all:all.json.zstd", coverage=0.5) report_5 = Report(mock_cache.reports_dir, "myrepo", "rev5", date=5000, push_id=5) mock_cache.ingest_report(report_5) assert mock_cache.list_reports("myrepo") == [ report_10, report_5, report_2, report_1, ] assert mock_cache.find_report("myrepo") == report_10 assert mock_cache.find_report("myrepo", push_range=(7, 0)) == report_5 assert mock_cache.get_history("myrepo", start=200, end=20000) == [ { "changeset": "rev10", "coverage": 1.0, "date": 9000 }, { "changeset": "rev5", "coverage": 0.5, "date": 5000 }, { "changeset": "rev2", "coverage": 0.2, "date": 2000 }, { "changeset": "rev1", "coverage": 0.1, "date": 1000 }, ]