def test_metadata_from_config(mock_get, experiments): config_str = dedent( """ [metrics] weekly = ["view_about_logins", "my_cool_metric"] daily = ["my_cool_metric"] [metrics.my_cool_metric] data_source = "main" select_expression = "{{agg_histogram_mean('payload.content.my_cool_histogram')}}" friendly_name = "Cool metric" description = "Cool cool cool" bigger_is_better = false [metrics.my_cool_metric.statistics.bootstrap_mean] [metrics.view_about_logins.statistics.bootstrap_mean] """ ) spec = AnalysisSpec.from_dict(toml.loads(config_str)) config = spec.resolve(experiments[4]) metadata = ExperimentMetadata.from_config(config) assert StatisticResult.SCHEMA_VERSION == metadata.schema_version assert "view_about_logins" in metadata.metrics assert metadata.metrics["view_about_logins"].bigger_is_better assert metadata.metrics["view_about_logins"].description != "" assert "my_cool_metric" in metadata.metrics assert metadata.metrics["my_cool_metric"].bigger_is_better is False assert metadata.metrics["my_cool_metric"].friendly_name == "Cool metric" assert metadata.metrics["my_cool_metric"].description == "Cool cool cool" assert metadata.metrics["my_cool_metric"].analysis_bases == ["enrollments"] assert metadata.external_config is None
def from_github_repo(cls) -> "ExternalConfigCollection": """Pull in external config files.""" # download files to tmp directory with TemporaryDirectory() as tmp_dir: repo = Repo.clone_from(cls.JETSTREAM_CONFIG_URL, tmp_dir) external_configs = [] for config_file in tmp_dir.glob("*.toml"): last_modified = next(repo.iter_commits("main", paths=config_file)).committed_date external_configs.append( ExternalConfig( config_file.stem, AnalysisSpec.from_dict(toml.load(config_file)), UTC.localize(dt.datetime.utcfromtimestamp(last_modified)), ) ) outcomes = [] for outcome_file in tmp_dir.glob(f"**/{OUTCOMES_DIR}/*/*.toml"): commit_hash = next(repo.iter_commits("main", paths=outcome_file)).hexsha outcomes.append( ExternalOutcome( slug=outcome_file.stem, spec=OutcomeSpec.from_dict(toml.load(outcome_file)), platform=outcome_file.parent.name, commit_hash=commit_hash, ) ) return cls(external_configs, outcomes)
def test_experiments_to_analyze_end_date_override(self): executor = cli.AnalysisExecutor( project_id="project", dataset_id="dataset", bucket="bucket", date=dt.datetime(2021, 2, 15, tzinfo=UTC), experiment_slugs=cli.All, ) result = executor._experiment_configs_to_analyse( cli_experiments, external_config.ExternalConfigCollection) assert result == [] conf = dedent(""" [experiment] end_date = 2021-03-01 """) external_configs = external_config.ExternalConfigCollection([ external_config.ExternalConfig( slug="my_cool_experiment", spec=AnalysisSpec.from_dict(toml.loads(conf)), last_modified=dt.datetime(2021, 2, 15, tzinfo=UTC), ) ]) def config_getter(): return external_configs result = executor._experiment_configs_to_analyse( cli_experiments, config_getter) assert set(e.experiment.normandy_slug for e in result) == {"my_cool_experiment"}
def test_metadata_from_config_missing_metadata(mock_get, experiments): config_str = dedent( """ [metrics] weekly = ["view_about_logins", "my_cool_metric"] daily = ["my_cool_metric"] [metrics.my_cool_metric] data_source = "main" select_expression = "{{agg_histogram_mean('payload.content.my_cool_histogram')}}" analysis_bases = ["exposures"] [metrics.my_cool_metric.statistics.bootstrap_mean] [metrics.view_about_logins.statistics.bootstrap_mean] """ ) spec = AnalysisSpec.from_dict(toml.loads(config_str)) config = spec.resolve(experiments[0]) metadata = ExperimentMetadata.from_config(config) assert "my_cool_metric" in metadata.metrics assert metadata.metrics["my_cool_metric"].bigger_is_better assert metadata.metrics["my_cool_metric"].friendly_name == "" assert metadata.metrics["my_cool_metric"].description == "" assert metadata.metrics["my_cool_metric"].analysis_bases == ["exposures"]
def test_validating_external_config(self, monkeypatch, experiments): Analysis = Mock() monkeypatch.setattr("jetstream.external_config.Analysis", Analysis) spec = AnalysisSpec.from_dict({}) extern = ExternalConfig( slug="cool_experiment", spec=spec, last_modified=dt.datetime.now(), ) extern.validate(experiments[0]) assert Analysis.validate.called_once()
def entity_from_path(path: Path) -> Union[ExternalConfig, ExternalOutcome]: is_outcome = path.parent.parent.name == OUTCOMES_DIR slug = path.stem config_dict = toml.loads(path.read_text()) if is_outcome: platform = path.parent.name spec = OutcomeSpec.from_dict(config_dict) return ExternalOutcome(slug=slug, spec=spec, platform=platform, commit_hash=None) return ExternalConfig( slug=slug, spec=AnalysisSpec.from_dict(config_dict), last_modified=dt.datetime.fromtimestamp(path.stat().st_mtime, UTC), )
def test_skip_works(experiments): conf = dedent(""" [experiment] skip = true """) spec = AnalysisSpec.from_dict(toml.loads(conf)) configured = spec.resolve(experiments[0]) with pytest.raises(ExplicitSkipException): Analysis("test", "test", configured).run(current_date=dt.datetime(2020, 1, 1, tzinfo=pytz.utc), dry_run=True)
def test_metadata_reference_branch(mock_get, experiments): config_str = dedent( """ [experiment] reference_branch = "a" [metrics] weekly = ["view_about_logins"] [metrics.view_about_logins.statistics.bootstrap_mean] """ ) spec = AnalysisSpec.from_dict(toml.loads(config_str)) config = spec.resolve(experiments[4]) metadata = ExperimentMetadata.from_config(config) assert metadata.external_config.reference_branch == "a" assert ( metadata.external_config.url == ExternalConfigCollection.JETSTREAM_CONFIG_URL + "/blob/main/normandy-test-slug.toml" ) config_str = dedent( """ [metrics] weekly = ["view_about_logins"] [metrics.view_about_logins.statistics.bootstrap_mean] """ ) spec = AnalysisSpec.from_dict(toml.loads(config_str)) config = spec.resolve(experiments[2]) metadata = ExperimentMetadata.from_config(config) assert metadata.external_config is None
def test_analysis_doesnt_choke_on_segments(experiments): conf = dedent(""" [experiment] segments = ["regular_users_v3"] """) spec = AnalysisSpec.from_dict(toml.loads(conf)) configured = spec.resolve(experiments[0]) assert isinstance(configured.experiment.segments[0], mozanalysis.segments.Segment) Analysis("test", "test", configured).run(current_date=dt.datetime(2020, 1, 1, tzinfo=pytz.utc), dry_run=True)
def test_busted_config_fails(self, experiments): config = dedent("""\ [metrics] weekly = ["bogus_metric"] [metrics.bogus_metric] select_expression = "SUM(fake_column)" data_source = "clients_daily" statistics = { bootstrap_mean = {} } """) spec = AnalysisSpec.from_dict(toml.loads(config)) extern = ExternalConfig( slug="bad_experiment", spec=spec, last_modified=datetime.datetime.now(), ) with pytest.raises(DryRunFailedError): extern.validate(experiments[0])
def from_github_repo(cls) -> "ExternalConfigCollection": """Pull in external config files.""" g = Github() repo = g.get_repo(cls.JETSTREAM_CONFIG_REPO) files = repo.get_contents("") if isinstance(files, ContentFile): files = [files] configs = [] for file in files: if file.name.endswith(".toml"): slug = os.path.splitext(file.name)[0] spec = AnalysisSpec.from_dict(toml.loads(file.decoded_content.decode("utf-8"))) last_modified = parser.parse(str(file.last_modified)) configs.append(ExternalConfig(slug, spec, last_modified)) return cls(configs)
def test_metadata_with_outcomes(experiments, fake_outcome_resolver): config_str = dedent( """ [metrics] weekly = ["view_about_logins"] [metrics.view_about_logins.statistics.bootstrap_mean] """ ) spec = AnalysisSpec.from_dict(toml.loads(config_str)) config = spec.resolve(experiments[5]) metadata = ExperimentMetadata.from_config(config) assert "view_about_logins" in metadata.metrics assert metadata.metrics["view_about_logins"].bigger_is_better assert metadata.metrics["view_about_logins"].description != "" assert "tastiness" in metadata.outcomes assert "performance" in metadata.outcomes assert "speed" in metadata.outcomes["performance"].default_metrics assert metadata.outcomes["tastiness"].friendly_name == "Tastiness outcomes" assert "meals_eaten" in metadata.outcomes["tastiness"].metrics assert metadata.outcomes["tastiness"].default_metrics == []
class TestExternalConfigIntegration: config_str = dedent(""" [metrics] weekly = ["view_about_logins"] [metrics.view_about_logins.statistics.bootstrap_mean] """) spec = AnalysisSpec.from_dict(toml.loads(config_str)) def test_old_config(self, client, project_id, temporary_dataset): config = ExternalConfig( slug="new_table", spec=self.spec, last_modified=pytz.UTC.localize(datetime.datetime.utcnow() - datetime.timedelta(days=1)), ) # table created after config loaded client.client.create_table(f"{temporary_dataset}.new_table_day1") client.add_labels_to_table( "new_table_day1", {"last_updated": client._current_timestamp_label()}, ) config_collection = ExternalConfigCollection([config]) updated_configs = config_collection.updated_configs( project_id, temporary_dataset) assert len(updated_configs) == 0 def test_updated_config(self, client, temporary_dataset, project_id): config = ExternalConfig( slug="old_table", spec=self.spec, last_modified=pytz.UTC.localize(datetime.datetime.utcnow() + datetime.timedelta(days=1)), ) client.client.create_table(f"{temporary_dataset}.old_table_day1") client.add_labels_to_table( "old_table_day1", {"last_updated": client._current_timestamp_label()}, ) client.client.create_table(f"{temporary_dataset}.old_table_day2") client.add_labels_to_table( "old_table_day2", {"last_updated": client._current_timestamp_label()}, ) config_collection = ExternalConfigCollection([config]) updated_configs = config_collection.updated_configs( project_id, temporary_dataset) assert len(updated_configs) == 1 assert updated_configs[0].slug == config.slug def test_updated_config_while_analysis_active(self, client, temporary_dataset, project_id): client.client.create_table(f"{temporary_dataset}.active_table_day0") client.add_labels_to_table( "active_table_day0", {"last_updated": client._current_timestamp_label()}, ) client.client.create_table(f"{temporary_dataset}.active_table_day1") client.add_labels_to_table( "active_table_day1", {"last_updated": client._current_timestamp_label()}, ) config = ExternalConfig( slug="active_table", spec=self.spec, last_modified=pytz.UTC.localize(datetime.datetime.utcnow()), ) client.client.create_table(f"{temporary_dataset}.active_table_day2") client.add_labels_to_table( "active_table_day2", {"last_updated": client._current_timestamp_label()}, ) client.client.create_table(f"{temporary_dataset}.active_table_weekly") client.add_labels_to_table( "active_table_weekly", {"last_updated": client._current_timestamp_label()}, ) config_collection = ExternalConfigCollection([config]) updated_configs = config_collection.updated_configs( project_id, temporary_dataset) assert len(updated_configs) == 1 assert updated_configs[0].slug == config.slug def test_new_config_without_a_table_is_marked_changed( self, client, temporary_dataset, project_id): config = ExternalConfig( slug="my_cool_experiment", spec=self.spec, last_modified=pytz.UTC.localize(datetime.datetime.utcnow()), ) config_collection = ExternalConfigCollection([config]) updated_configs = config_collection.updated_configs( project_id, temporary_dataset) assert [updated.slug for updated in updated_configs] == ["my_cool_experiment"] def test_valid_config_validates(self, experiments): extern = ExternalConfig( slug="cool_experiment", spec=self.spec, last_modified=datetime.datetime.now(), ) extern.validate(experiments[0]) def test_busted_config_fails(self, experiments): config = dedent("""\ [metrics] weekly = ["bogus_metric"] [metrics.bogus_metric] select_expression = "SUM(fake_column)" data_source = "clients_daily" statistics = { bootstrap_mean = {} } """) spec = AnalysisSpec.from_dict(toml.loads(config)) extern = ExternalConfig( slug="bad_experiment", spec=spec, last_modified=datetime.datetime.now(), ) with pytest.raises(DryRunFailedError): extern.validate(experiments[0]) def test_valid_outcome_validates(self): config = dedent("""\ friendly_name = "Fred" description = "Just your average paleolithic dad." [metrics.rocks_mined] select_expression = "COALESCE(SUM(pings_aggregated_by_this_row), 0)" data_source = "clients_daily" statistics = { bootstrap_mean = {} } friendly_name = "Rocks mined" description = "Number of rocks mined at the quarry" """) spec = OutcomeSpec.from_dict(toml.loads(config)) extern = ExternalOutcome( slug="good_outcome", spec=spec, platform="firefox_desktop", commit_hash="0000000", ) extern.validate() def test_busted_outcome_fails(self): config = dedent("""\ friendly_name = "Fred" description = "Just your average paleolithic dad." [metrics.rocks_mined] select_expression = "COALESCE(SUM(fake_column_whoop_whoop), 0)" data_source = "clients_daily" statistics = { bootstrap_mean = {} } friendly_name = "Rocks mined" description = "Number of rocks mined at the quarry" """) spec = OutcomeSpec.from_dict(toml.loads(config)) extern = ExternalOutcome( slug="bogus_outcome", spec=spec, platform="firefox_desktop", commit_hash="0000000", ) with pytest.raises(DryRunFailedError): extern.validate()
def test_export_metadata(mock_storage_client, experiments): config_str = dedent( """ [experiment] end_date = "2021-07-01" [metrics] weekly = ["view_about_logins", "my_cool_metric"] [metrics.my_cool_metric] data_source = "main" select_expression = "{{agg_histogram_mean('payload.content.my_cool_histogram')}}" [metrics.my_cool_metric.statistics.bootstrap_mean] [metrics.view_about_logins.statistics.bootstrap_mean] """ ) spec = AnalysisSpec.from_dict(toml.loads(config_str)) config = spec.resolve(experiments[0]) mock_client = MagicMock() mock_storage_client.return_value = mock_client mock_bucket = MagicMock() mock_client.get_bucket.return_value = mock_bucket mock_blob = MagicMock() mock_bucket.blob.return_value = mock_blob mock_blob.upload_from_string.return_value = "" export_metadata(config, "test_bucket", "project") mock_client.get_bucket.assert_called_once() mock_bucket.blob.assert_called_once() expected = json.loads( r""" { "metrics": { "view_about_logins": { "friendly_name": "about:logins viewers", "description": "Counts the number of clients that viewed about:logins.\n", "bigger_is_better": true, "analysis_bases": ["enrollments"] }, "my_cool_metric": { "friendly_name": "", "description": "", "bigger_is_better": true, "analysis_bases": ["enrollments"] } }, "outcomes": {}, "external_config": { "end_date": "2021-07-01", "enrollment_period": null, "reference_branch": null, "skip": false, "start_date": null, "url": """ + '"https://github.com/mozilla/jetstream-config/blob/main/normandy-test-slug.toml"' + r"""}, "schema_version":""" + str(StatisticResult.SCHEMA_VERSION) + """ } """ ) mock_blob.upload_from_string.assert_called_once_with( data=json.dumps(expected, sort_keys=True, indent=4), content_type="application/json" )
class TestExternalConfigIntegration: config_str = dedent(""" [metrics] weekly = ["view_about_logins"] [metrics.view_about_logins.statistics.bootstrap_mean] """) spec = AnalysisSpec.from_dict(toml.loads(config_str)) def test_new_config(self, client, project_id, temporary_dataset): config = ExternalConfig( slug="new_experiment", spec=self.spec, last_modified=datetime.datetime.utcnow(), ) config_collection = ExternalConfigCollection([config]) updated_configs = config_collection.updated_configs( project_id, temporary_dataset) assert len(updated_configs) == 0 def test_old_config(self, client, project_id, temporary_dataset): config = ExternalConfig( slug="new_table", spec=self.spec, last_modified=pytz.UTC.localize(datetime.datetime.utcnow() - datetime.timedelta(days=1)), ) # table created after config loaded client.client.create_table(f"{temporary_dataset}.new_table_day1") client.add_labels_to_table( "new_table_day1", {"last_updated": client._current_timestamp_label()}, ) config_collection = ExternalConfigCollection([config]) updated_configs = config_collection.updated_configs( project_id, temporary_dataset) assert len(updated_configs) == 0 def test_updated_config(self, client, temporary_dataset, project_id): config = ExternalConfig( slug="old_table", spec=self.spec, last_modified=pytz.UTC.localize(datetime.datetime.utcnow() + datetime.timedelta(days=1)), ) client.client.create_table(f"{temporary_dataset}.old_table_day1") client.add_labels_to_table( "old_table_day1", {"last_updated": client._current_timestamp_label()}, ) client.client.create_table(f"{temporary_dataset}.old_table_day2") client.add_labels_to_table( "old_table_day2", {"last_updated": client._current_timestamp_label()}, ) config_collection = ExternalConfigCollection([config]) updated_configs = config_collection.updated_configs( project_id, temporary_dataset) assert len(updated_configs) == 1 assert updated_configs[0].slug == config.slug def test_updated_config_while_analysis_active(self, client, temporary_dataset, project_id): client.client.create_table(f"{temporary_dataset}.active_table_day0") client.add_labels_to_table( "active_table_day0", {"last_updated": client._current_timestamp_label()}, ) client.client.create_table(f"{temporary_dataset}.active_table_day1") client.add_labels_to_table( "active_table_day1", {"last_updated": client._current_timestamp_label()}, ) config = ExternalConfig( slug="active_table", spec=self.spec, last_modified=pytz.UTC.localize(datetime.datetime.utcnow()), ) client.client.create_table(f"{temporary_dataset}.active_table_day2") client.add_labels_to_table( "active_table_day2", {"last_updated": client._current_timestamp_label()}, ) client.client.create_table(f"{temporary_dataset}.active_table_weekly") client.add_labels_to_table( "active_table_weekly", {"last_updated": client._current_timestamp_label()}, ) config_collection = ExternalConfigCollection([config]) updated_configs = config_collection.updated_configs( project_id, temporary_dataset) assert len(updated_configs) == 1 assert updated_configs[0].slug == config.slug