Beispiel #1
0
def test_metadata_from_config(mock_get, experiments):
    config_str = dedent(
        """
        [metrics]
        weekly = ["view_about_logins", "my_cool_metric"]
        daily = ["my_cool_metric"]

        [metrics.my_cool_metric]
        data_source = "main"
        select_expression = "{{agg_histogram_mean('payload.content.my_cool_histogram')}}"
        friendly_name = "Cool metric"
        description = "Cool cool cool"
        bigger_is_better = false

        [metrics.my_cool_metric.statistics.bootstrap_mean]

        [metrics.view_about_logins.statistics.bootstrap_mean]
        """
    )

    spec = AnalysisSpec.from_dict(toml.loads(config_str))
    config = spec.resolve(experiments[4])
    metadata = ExperimentMetadata.from_config(config)

    assert StatisticResult.SCHEMA_VERSION == metadata.schema_version
    assert "view_about_logins" in metadata.metrics
    assert metadata.metrics["view_about_logins"].bigger_is_better
    assert metadata.metrics["view_about_logins"].description != ""
    assert "my_cool_metric" in metadata.metrics
    assert metadata.metrics["my_cool_metric"].bigger_is_better is False
    assert metadata.metrics["my_cool_metric"].friendly_name == "Cool metric"
    assert metadata.metrics["my_cool_metric"].description == "Cool cool cool"
    assert metadata.metrics["my_cool_metric"].analysis_bases == ["enrollments"]
    assert metadata.external_config is None
Beispiel #2
0
    def from_github_repo(cls) -> "ExternalConfigCollection":
        """Pull in external config files."""
        # download files to tmp directory
        with TemporaryDirectory() as tmp_dir:
            repo = Repo.clone_from(cls.JETSTREAM_CONFIG_URL, tmp_dir)

            external_configs = []

            for config_file in tmp_dir.glob("*.toml"):
                last_modified = next(repo.iter_commits("main", paths=config_file)).committed_date

                external_configs.append(
                    ExternalConfig(
                        config_file.stem,
                        AnalysisSpec.from_dict(toml.load(config_file)),
                        UTC.localize(dt.datetime.utcfromtimestamp(last_modified)),
                    )
                )

            outcomes = []

            for outcome_file in tmp_dir.glob(f"**/{OUTCOMES_DIR}/*/*.toml"):
                commit_hash = next(repo.iter_commits("main", paths=outcome_file)).hexsha

                outcomes.append(
                    ExternalOutcome(
                        slug=outcome_file.stem,
                        spec=OutcomeSpec.from_dict(toml.load(outcome_file)),
                        platform=outcome_file.parent.name,
                        commit_hash=commit_hash,
                    )
                )

        return cls(external_configs, outcomes)
Beispiel #3
0
    def test_experiments_to_analyze_end_date_override(self):
        executor = cli.AnalysisExecutor(
            project_id="project",
            dataset_id="dataset",
            bucket="bucket",
            date=dt.datetime(2021, 2, 15, tzinfo=UTC),
            experiment_slugs=cli.All,
        )
        result = executor._experiment_configs_to_analyse(
            cli_experiments, external_config.ExternalConfigCollection)
        assert result == []

        conf = dedent("""
            [experiment]
            end_date = 2021-03-01
            """)

        external_configs = external_config.ExternalConfigCollection([
            external_config.ExternalConfig(
                slug="my_cool_experiment",
                spec=AnalysisSpec.from_dict(toml.loads(conf)),
                last_modified=dt.datetime(2021, 2, 15, tzinfo=UTC),
            )
        ])

        def config_getter():
            return external_configs

        result = executor._experiment_configs_to_analyse(
            cli_experiments, config_getter)
        assert set(e.experiment.normandy_slug
                   for e in result) == {"my_cool_experiment"}
Beispiel #4
0
def test_metadata_from_config_missing_metadata(mock_get, experiments):
    config_str = dedent(
        """
        [metrics]
        weekly = ["view_about_logins", "my_cool_metric"]
        daily = ["my_cool_metric"]

        [metrics.my_cool_metric]
        data_source = "main"
        select_expression = "{{agg_histogram_mean('payload.content.my_cool_histogram')}}"
        analysis_bases = ["exposures"]

        [metrics.my_cool_metric.statistics.bootstrap_mean]

        [metrics.view_about_logins.statistics.bootstrap_mean]
        """
    )

    spec = AnalysisSpec.from_dict(toml.loads(config_str))
    config = spec.resolve(experiments[0])
    metadata = ExperimentMetadata.from_config(config)

    assert "my_cool_metric" in metadata.metrics
    assert metadata.metrics["my_cool_metric"].bigger_is_better
    assert metadata.metrics["my_cool_metric"].friendly_name == ""
    assert metadata.metrics["my_cool_metric"].description == ""
    assert metadata.metrics["my_cool_metric"].analysis_bases == ["exposures"]
 def test_validating_external_config(self, monkeypatch, experiments):
     Analysis = Mock()
     monkeypatch.setattr("jetstream.external_config.Analysis", Analysis)
     spec = AnalysisSpec.from_dict({})
     extern = ExternalConfig(
         slug="cool_experiment",
         spec=spec,
         last_modified=dt.datetime.now(),
     )
     extern.validate(experiments[0])
     assert Analysis.validate.called_once()
Beispiel #6
0
def entity_from_path(path: Path) -> Union[ExternalConfig, ExternalOutcome]:
    is_outcome = path.parent.parent.name == OUTCOMES_DIR
    slug = path.stem
    config_dict = toml.loads(path.read_text())
    if is_outcome:
        platform = path.parent.name
        spec = OutcomeSpec.from_dict(config_dict)
        return ExternalOutcome(slug=slug, spec=spec, platform=platform, commit_hash=None)
    return ExternalConfig(
        slug=slug,
        spec=AnalysisSpec.from_dict(config_dict),
        last_modified=dt.datetime.fromtimestamp(path.stat().st_mtime, UTC),
    )
Beispiel #7
0
def test_skip_works(experiments):
    conf = dedent("""
        [experiment]
        skip = true
        """)
    spec = AnalysisSpec.from_dict(toml.loads(conf))
    configured = spec.resolve(experiments[0])
    with pytest.raises(ExplicitSkipException):
        Analysis("test", "test",
                 configured).run(current_date=dt.datetime(2020,
                                                          1,
                                                          1,
                                                          tzinfo=pytz.utc),
                                 dry_run=True)
Beispiel #8
0
def test_metadata_reference_branch(mock_get, experiments):
    config_str = dedent(
        """
        [experiment]
        reference_branch = "a"

        [metrics]
        weekly = ["view_about_logins"]

        [metrics.view_about_logins.statistics.bootstrap_mean]
        """
    )

    spec = AnalysisSpec.from_dict(toml.loads(config_str))
    config = spec.resolve(experiments[4])
    metadata = ExperimentMetadata.from_config(config)

    assert metadata.external_config.reference_branch == "a"
    assert (
        metadata.external_config.url
        == ExternalConfigCollection.JETSTREAM_CONFIG_URL + "/blob/main/normandy-test-slug.toml"
    )

    config_str = dedent(
        """
        [metrics]
        weekly = ["view_about_logins"]

        [metrics.view_about_logins.statistics.bootstrap_mean]
        """
    )

    spec = AnalysisSpec.from_dict(toml.loads(config_str))
    config = spec.resolve(experiments[2])
    metadata = ExperimentMetadata.from_config(config)

    assert metadata.external_config is None
Beispiel #9
0
def test_analysis_doesnt_choke_on_segments(experiments):
    conf = dedent("""
        [experiment]
        segments = ["regular_users_v3"]
        """)
    spec = AnalysisSpec.from_dict(toml.loads(conf))
    configured = spec.resolve(experiments[0])
    assert isinstance(configured.experiment.segments[0],
                      mozanalysis.segments.Segment)
    Analysis("test", "test",
             configured).run(current_date=dt.datetime(2020,
                                                      1,
                                                      1,
                                                      tzinfo=pytz.utc),
                             dry_run=True)
    def test_busted_config_fails(self, experiments):
        config = dedent("""\
            [metrics]
            weekly = ["bogus_metric"]

            [metrics.bogus_metric]
            select_expression = "SUM(fake_column)"
            data_source = "clients_daily"
            statistics = { bootstrap_mean = {} }
            """)
        spec = AnalysisSpec.from_dict(toml.loads(config))
        extern = ExternalConfig(
            slug="bad_experiment",
            spec=spec,
            last_modified=datetime.datetime.now(),
        )
        with pytest.raises(DryRunFailedError):
            extern.validate(experiments[0])
Beispiel #11
0
    def from_github_repo(cls) -> "ExternalConfigCollection":
        """Pull in external config files."""

        g = Github()
        repo = g.get_repo(cls.JETSTREAM_CONFIG_REPO)
        files = repo.get_contents("")

        if isinstance(files, ContentFile):
            files = [files]

        configs = []

        for file in files:
            if file.name.endswith(".toml"):
                slug = os.path.splitext(file.name)[0]
                spec = AnalysisSpec.from_dict(toml.loads(file.decoded_content.decode("utf-8")))
                last_modified = parser.parse(str(file.last_modified))
                configs.append(ExternalConfig(slug, spec, last_modified))

        return cls(configs)
Beispiel #12
0
def test_metadata_with_outcomes(experiments, fake_outcome_resolver):
    config_str = dedent(
        """
        [metrics]
        weekly = ["view_about_logins"]

        [metrics.view_about_logins.statistics.bootstrap_mean]
        """
    )

    spec = AnalysisSpec.from_dict(toml.loads(config_str))
    config = spec.resolve(experiments[5])
    metadata = ExperimentMetadata.from_config(config)

    assert "view_about_logins" in metadata.metrics
    assert metadata.metrics["view_about_logins"].bigger_is_better
    assert metadata.metrics["view_about_logins"].description != ""

    assert "tastiness" in metadata.outcomes
    assert "performance" in metadata.outcomes
    assert "speed" in metadata.outcomes["performance"].default_metrics
    assert metadata.outcomes["tastiness"].friendly_name == "Tastiness outcomes"
    assert "meals_eaten" in metadata.outcomes["tastiness"].metrics
    assert metadata.outcomes["tastiness"].default_metrics == []
class TestExternalConfigIntegration:
    config_str = dedent("""
        [metrics]
        weekly = ["view_about_logins"]

        [metrics.view_about_logins.statistics.bootstrap_mean]
        """)
    spec = AnalysisSpec.from_dict(toml.loads(config_str))

    def test_old_config(self, client, project_id, temporary_dataset):
        config = ExternalConfig(
            slug="new_table",
            spec=self.spec,
            last_modified=pytz.UTC.localize(datetime.datetime.utcnow() -
                                            datetime.timedelta(days=1)),
        )

        # table created after config loaded
        client.client.create_table(f"{temporary_dataset}.new_table_day1")
        client.add_labels_to_table(
            "new_table_day1",
            {"last_updated": client._current_timestamp_label()},
        )
        config_collection = ExternalConfigCollection([config])
        updated_configs = config_collection.updated_configs(
            project_id, temporary_dataset)

        assert len(updated_configs) == 0

    def test_updated_config(self, client, temporary_dataset, project_id):
        config = ExternalConfig(
            slug="old_table",
            spec=self.spec,
            last_modified=pytz.UTC.localize(datetime.datetime.utcnow() +
                                            datetime.timedelta(days=1)),
        )

        client.client.create_table(f"{temporary_dataset}.old_table_day1")
        client.add_labels_to_table(
            "old_table_day1",
            {"last_updated": client._current_timestamp_label()},
        )
        client.client.create_table(f"{temporary_dataset}.old_table_day2")
        client.add_labels_to_table(
            "old_table_day2",
            {"last_updated": client._current_timestamp_label()},
        )

        config_collection = ExternalConfigCollection([config])
        updated_configs = config_collection.updated_configs(
            project_id, temporary_dataset)

        assert len(updated_configs) == 1
        assert updated_configs[0].slug == config.slug

    def test_updated_config_while_analysis_active(self, client,
                                                  temporary_dataset,
                                                  project_id):
        client.client.create_table(f"{temporary_dataset}.active_table_day0")
        client.add_labels_to_table(
            "active_table_day0",
            {"last_updated": client._current_timestamp_label()},
        )
        client.client.create_table(f"{temporary_dataset}.active_table_day1")
        client.add_labels_to_table(
            "active_table_day1",
            {"last_updated": client._current_timestamp_label()},
        )

        config = ExternalConfig(
            slug="active_table",
            spec=self.spec,
            last_modified=pytz.UTC.localize(datetime.datetime.utcnow()),
        )

        client.client.create_table(f"{temporary_dataset}.active_table_day2")
        client.add_labels_to_table(
            "active_table_day2",
            {"last_updated": client._current_timestamp_label()},
        )
        client.client.create_table(f"{temporary_dataset}.active_table_weekly")
        client.add_labels_to_table(
            "active_table_weekly",
            {"last_updated": client._current_timestamp_label()},
        )

        config_collection = ExternalConfigCollection([config])
        updated_configs = config_collection.updated_configs(
            project_id, temporary_dataset)

        assert len(updated_configs) == 1
        assert updated_configs[0].slug == config.slug

    def test_new_config_without_a_table_is_marked_changed(
            self, client, temporary_dataset, project_id):
        config = ExternalConfig(
            slug="my_cool_experiment",
            spec=self.spec,
            last_modified=pytz.UTC.localize(datetime.datetime.utcnow()),
        )
        config_collection = ExternalConfigCollection([config])
        updated_configs = config_collection.updated_configs(
            project_id, temporary_dataset)
        assert [updated.slug
                for updated in updated_configs] == ["my_cool_experiment"]

    def test_valid_config_validates(self, experiments):
        extern = ExternalConfig(
            slug="cool_experiment",
            spec=self.spec,
            last_modified=datetime.datetime.now(),
        )
        extern.validate(experiments[0])

    def test_busted_config_fails(self, experiments):
        config = dedent("""\
            [metrics]
            weekly = ["bogus_metric"]

            [metrics.bogus_metric]
            select_expression = "SUM(fake_column)"
            data_source = "clients_daily"
            statistics = { bootstrap_mean = {} }
            """)
        spec = AnalysisSpec.from_dict(toml.loads(config))
        extern = ExternalConfig(
            slug="bad_experiment",
            spec=spec,
            last_modified=datetime.datetime.now(),
        )
        with pytest.raises(DryRunFailedError):
            extern.validate(experiments[0])

    def test_valid_outcome_validates(self):
        config = dedent("""\
            friendly_name = "Fred"
            description = "Just your average paleolithic dad."

            [metrics.rocks_mined]
            select_expression = "COALESCE(SUM(pings_aggregated_by_this_row), 0)"
            data_source = "clients_daily"
            statistics = { bootstrap_mean = {} }
            friendly_name = "Rocks mined"
            description = "Number of rocks mined at the quarry"
            """)
        spec = OutcomeSpec.from_dict(toml.loads(config))
        extern = ExternalOutcome(
            slug="good_outcome",
            spec=spec,
            platform="firefox_desktop",
            commit_hash="0000000",
        )
        extern.validate()

    def test_busted_outcome_fails(self):
        config = dedent("""\
            friendly_name = "Fred"
            description = "Just your average paleolithic dad."

            [metrics.rocks_mined]
            select_expression = "COALESCE(SUM(fake_column_whoop_whoop), 0)"
            data_source = "clients_daily"
            statistics = { bootstrap_mean = {} }
            friendly_name = "Rocks mined"
            description = "Number of rocks mined at the quarry"
            """)
        spec = OutcomeSpec.from_dict(toml.loads(config))
        extern = ExternalOutcome(
            slug="bogus_outcome",
            spec=spec,
            platform="firefox_desktop",
            commit_hash="0000000",
        )
        with pytest.raises(DryRunFailedError):
            extern.validate()
Beispiel #14
0
def test_export_metadata(mock_storage_client, experiments):
    config_str = dedent(
        """
        [experiment]
        end_date = "2021-07-01"

        [metrics]
        weekly = ["view_about_logins", "my_cool_metric"]

        [metrics.my_cool_metric]
        data_source = "main"
        select_expression = "{{agg_histogram_mean('payload.content.my_cool_histogram')}}"

        [metrics.my_cool_metric.statistics.bootstrap_mean]

        [metrics.view_about_logins.statistics.bootstrap_mean]
        """
    )

    spec = AnalysisSpec.from_dict(toml.loads(config_str))
    config = spec.resolve(experiments[0])

    mock_client = MagicMock()
    mock_storage_client.return_value = mock_client
    mock_bucket = MagicMock()
    mock_client.get_bucket.return_value = mock_bucket
    mock_blob = MagicMock()
    mock_bucket.blob.return_value = mock_blob
    mock_blob.upload_from_string.return_value = ""

    export_metadata(config, "test_bucket", "project")

    mock_client.get_bucket.assert_called_once()
    mock_bucket.blob.assert_called_once()

    expected = json.loads(
        r"""
        {
            "metrics": {
                "view_about_logins": {
                    "friendly_name": "about:logins viewers",
                    "description": "Counts the number of clients that viewed about:logins.\n",
                    "bigger_is_better": true,
                    "analysis_bases": ["enrollments"]
                },
                "my_cool_metric": {
                    "friendly_name": "",
                    "description": "",
                    "bigger_is_better": true,
                    "analysis_bases": ["enrollments"]
                }
            },
            "outcomes": {},
            "external_config": {
                "end_date": "2021-07-01",
                "enrollment_period": null,
                "reference_branch": null,
                "skip": false,
                "start_date": null,
                "url": """
        + '"https://github.com/mozilla/jetstream-config/blob/main/normandy-test-slug.toml"'
        + r"""},
            "schema_version":"""
        + str(StatisticResult.SCHEMA_VERSION)
        + """
        }
    """
    )
    mock_blob.upload_from_string.assert_called_once_with(
        data=json.dumps(expected, sort_keys=True, indent=4), content_type="application/json"
    )
Beispiel #15
0
class TestExternalConfigIntegration:
    config_str = dedent("""
        [metrics]
        weekly = ["view_about_logins"]

        [metrics.view_about_logins.statistics.bootstrap_mean]
        """)
    spec = AnalysisSpec.from_dict(toml.loads(config_str))

    def test_new_config(self, client, project_id, temporary_dataset):
        config = ExternalConfig(
            slug="new_experiment",
            spec=self.spec,
            last_modified=datetime.datetime.utcnow(),
        )
        config_collection = ExternalConfigCollection([config])
        updated_configs = config_collection.updated_configs(
            project_id, temporary_dataset)

        assert len(updated_configs) == 0

    def test_old_config(self, client, project_id, temporary_dataset):
        config = ExternalConfig(
            slug="new_table",
            spec=self.spec,
            last_modified=pytz.UTC.localize(datetime.datetime.utcnow() -
                                            datetime.timedelta(days=1)),
        )

        # table created after config loaded
        client.client.create_table(f"{temporary_dataset}.new_table_day1")
        client.add_labels_to_table(
            "new_table_day1",
            {"last_updated": client._current_timestamp_label()},
        )
        config_collection = ExternalConfigCollection([config])
        updated_configs = config_collection.updated_configs(
            project_id, temporary_dataset)

        assert len(updated_configs) == 0

    def test_updated_config(self, client, temporary_dataset, project_id):
        config = ExternalConfig(
            slug="old_table",
            spec=self.spec,
            last_modified=pytz.UTC.localize(datetime.datetime.utcnow() +
                                            datetime.timedelta(days=1)),
        )

        client.client.create_table(f"{temporary_dataset}.old_table_day1")
        client.add_labels_to_table(
            "old_table_day1",
            {"last_updated": client._current_timestamp_label()},
        )
        client.client.create_table(f"{temporary_dataset}.old_table_day2")
        client.add_labels_to_table(
            "old_table_day2",
            {"last_updated": client._current_timestamp_label()},
        )

        config_collection = ExternalConfigCollection([config])
        updated_configs = config_collection.updated_configs(
            project_id, temporary_dataset)

        assert len(updated_configs) == 1
        assert updated_configs[0].slug == config.slug

    def test_updated_config_while_analysis_active(self, client,
                                                  temporary_dataset,
                                                  project_id):
        client.client.create_table(f"{temporary_dataset}.active_table_day0")
        client.add_labels_to_table(
            "active_table_day0",
            {"last_updated": client._current_timestamp_label()},
        )
        client.client.create_table(f"{temporary_dataset}.active_table_day1")
        client.add_labels_to_table(
            "active_table_day1",
            {"last_updated": client._current_timestamp_label()},
        )

        config = ExternalConfig(
            slug="active_table",
            spec=self.spec,
            last_modified=pytz.UTC.localize(datetime.datetime.utcnow()),
        )

        client.client.create_table(f"{temporary_dataset}.active_table_day2")
        client.add_labels_to_table(
            "active_table_day2",
            {"last_updated": client._current_timestamp_label()},
        )
        client.client.create_table(f"{temporary_dataset}.active_table_weekly")
        client.add_labels_to_table(
            "active_table_weekly",
            {"last_updated": client._current_timestamp_label()},
        )

        config_collection = ExternalConfigCollection([config])
        updated_configs = config_collection.updated_configs(
            project_id, temporary_dataset)

        assert len(updated_configs) == 1
        assert updated_configs[0].slug == config.slug