Beispiel #1
0
def test_experiment_v6_status():
    experiment_live = ExperimentV6(
        slug="test_slug",
        startDate=dt.datetime(2019, 1, 1),
        endDate=dt.datetime.now() + timedelta(days=1),
        proposedEnrollment=14,
        branches=[
            Branch(slug="control", ratio=2),
            Branch(slug="treatment", ratio=1)
        ],
        referenceBranch="control",
    )

    assert experiment_live.to_experiment().status == "Live"

    experiment_complete = ExperimentV6(
        slug="test_slug",
        startDate=dt.datetime(2019, 1, 1),
        endDate=dt.datetime.now() - timedelta(minutes=1),
        proposedEnrollment=14,
        branches=[
            Branch(slug="control", ratio=2),
            Branch(slug="treatment", ratio=1)
        ],
        referenceBranch="control",
    )

    assert experiment_complete.to_experiment().status == "Complete"
Beispiel #2
0
def fenix_experiments():
    return [
        Experiment(
            experimenter_slug="my_fenix_experiment",
            normandy_slug="my_fenix_experiment",
            type="v6",
            status="Live",
            branches=[Branch(slug="foo", ratio=1), Branch(slug="bar", ratio=1)],
            start_date=dt.datetime(2020, 1, 1, tzinfo=pytz.UTC),
            end_date=dt.datetime(2020, 10, 10, tzinfo=pytz.UTC),
            proposed_enrollment=7,
            reference_branch="foo",
            app_name="fenix",
            app_id="org.mozilla.firefox",
            is_high_population=False,
        ),
        Experiment(
            experimenter_slug="my_fenix_nightly_experiment",
            normandy_slug="my_fenix_nightly_experiment",
            type="v6",
            status="Live",
            branches=[Branch(slug="foo", ratio=1), Branch(slug="bar", ratio=1)],
            start_date=dt.datetime(2020, 1, 1, tzinfo=pytz.UTC),
            end_date=dt.datetime(2020, 10, 10, tzinfo=pytz.UTC),
            proposed_enrollment=7,
            reference_branch="foo",
            app_name="fenix",
            app_id="org.mozilla.fenix",
            is_high_population=False,
        ),
    ]
Beispiel #3
0
    def test_no_enrollments(self, client, project_id, static_dataset,
                            temporary_dataset):
        experiment = Experiment(
            experimenter_slug="test-experiment-2",
            type="rollout",
            status="Live",
            start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[
                Branch(slug="a", ratio=0.5),
                Branch(slug="b", ratio=0.5)
            ],
            reference_branch="a",
            features=[],
            normandy_slug="test-experiment-2",
        )

        config = AnalysisSpec().resolve(experiment)

        self.analysis_mock_run(config, static_dataset, temporary_dataset,
                               project_id)

        query_job = client.client.query(f"""
            SELECT
              *
            FROM `{project_id}.{temporary_dataset}.test_experiment_2_week_1`
            ORDER BY enrollment_date DESC
        """)

        assert query_job.result().total_rows == 0

        stats = client.client.list_rows(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_2_week_1"
        ).to_dataframe()

        count_by_branch = stats.query("statistic == 'count'").set_index(
            "branch")
        assert count_by_branch.loc["a", "point"] == 0.0
        assert count_by_branch.loc["b", "point"] == 0.0

        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_2_weekly"
        ) is not None)
Beispiel #4
0
def experiments():
    return [
        Experiment(
            experimenter_slug="test_slug",
            type="pref",
            status="Complete",
            start_date=dt.datetime(2019, 12, 1, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 3, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[Branch(slug="a", ratio=1),
                      Branch(slug="b", ratio=1)],
            normandy_slug="normandy-test-slug",
            features=[],
            reference_branch="b",
        ),
        Experiment(
            experimenter_slug="test_slug",
            type="addon",
            status="Complete",
            start_date=dt.datetime(2019, 12, 1, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 3, 1, tzinfo=pytz.utc),
            proposed_enrollment=0,
            branches=[],
            features=[],
            normandy_slug=None,
            reference_branch=None,
        ),
        Experiment(
            experimenter_slug="test_slug",
            type="pref",
            status="Live",
            start_date=dt.datetime(2019, 12, 1, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 3, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[],
            features=[],
            normandy_slug="normandy-test-slug",
            reference_branch=None,
        ),
    ]
Beispiel #5
0
def test_convert_experiment_v6_to_experiment():
    experiment_v6 = ExperimentV6(
        slug="test_slug",
        startDate=dt.datetime(2019, 1, 1),
        endDate=dt.datetime(2019, 1, 10),
        proposedEnrollment=14,
        branches=[
            Branch(slug="control", ratio=2),
            Branch(slug="treatment", ratio=1)
        ],
        referenceBranch="control",
    )

    experiment = experiment_v6.to_experiment()

    assert experiment.experimenter_slug is None
    assert experiment.normandy_slug == "test_slug"
    assert experiment.status == "Complete"
    assert experiment.type == "v6"
    assert len(experiment.branches) == 2
    assert experiment.reference_branch == "control"
    assert experiment.is_high_population is False
    assert experiment.outcomes == []
Beispiel #6
0
def test_convert_experiment_v4_to_experiment():
    experiment_v4 = ExperimentV4(
        slug="test_slug",
        active=True,
        startDate=dt.datetime(2019, 1, 1, tzinfo=pytz.utc),
        endDate=dt.datetime(2019, 1, 10, tzinfo=pytz.utc),
        proposedEnrollment=14,
        branches=[
            Branch(slug="control", ratio=2),
            Branch(slug="treatment", ratio=1)
        ],
        referenceBranch="control",
        features=["fake_feature"],
    )

    experiment = experiment_v4.to_experiment()

    assert experiment.experimenter_slug is None
    assert experiment.normandy_slug == "test_slug"
    assert experiment.status == "Live"
    assert experiment.type == "v4"
    assert len(experiment.features) == 1
    assert len(experiment.branches) == 2
    assert experiment.reference_branch == "control"
Beispiel #7
0
    def test_metrics(self, client, project_id, static_dataset,
                     temporary_dataset):
        experiment = Experiment(
            experimenter_slug="test-experiment",
            type="rollout",
            status="Live",
            start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[
                Branch(slug="branch1", ratio=0.5),
                Branch(slug="branch2", ratio=0.5)
            ],
            reference_branch="branch2",
            features=[],
            normandy_slug="test-experiment",
        )

        config = AnalysisSpec().resolve(experiment)

        test_clients_daily = DataSource(
            name="clients_daily",
            from_expr=f"`{project_id}.test_data.clients_daily`",
        )

        test_active_hours = Metric(
            name="active_hours",
            data_source=test_clients_daily,
            select_expr=agg_sum("active_hours_sum"),
        )

        config.metrics = {
            AnalysisPeriod.WEEK: [Summary(test_active_hours, BootstrapMean())]
        }

        self.analysis_mock_run(config, static_dataset, temporary_dataset,
                               project_id)

        query_job = client.client.query(f"""
            SELECT
              *
            FROM `{project_id}.{temporary_dataset}.test_experiment_week_1`
            ORDER BY enrollment_date DESC
        """)

        expected_metrics_results = [
            {
                "client_id": "bbbb",
                "branch": "branch2",
                "enrollment_date": datetime.date(2020, 4, 3),
                "num_enrollment_events": 1,
                "analysis_window_start": 0,
                "analysis_window_end": 6,
            },
            {
                "client_id": "aaaa",
                "branch": "branch1",
                "enrollment_date": datetime.date(2020, 4, 2),
                "num_enrollment_events": 1,
                "analysis_window_start": 0,
                "analysis_window_end": 6,
            },
        ]

        for i, row in enumerate(query_job.result()):
            for k, v in expected_metrics_results[i].items():
                assert row[k] == v

        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.test_experiment_weekly")
                is not None)
        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1"
        ) is not None)

        stats = client.client.list_rows(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1"
        ).to_dataframe()

        count_by_branch = stats.query("statistic == 'count'").set_index(
            "branch")
        assert count_by_branch.loc["branch1", "point"] == 1.0
        assert count_by_branch.loc["branch2", "point"] == 1.0

        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_weekly"
        ) is not None)
    def test_logging(self, monkeypatch, client, project_id, static_dataset,
                     temporary_dataset):
        experiment = Experiment(
            experimenter_slug="test-experiment",
            type="rollout",
            status="Live",
            start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[
                Branch(slug="branch1", ratio=0.5),
                Branch(slug="branch2", ratio=0.5)
            ],
            reference_branch="branch2",
            normandy_slug="test-experiment",
            is_high_population=False,
            app_name="firefox_desktop",
            app_id="firefox-desktop",
        )

        config = AnalysisSpec().resolve(experiment)

        test_clients_daily = DataSource(
            name="clients_daily",
            from_expr=f"`{project_id}.test_data.clients_daily`",
        )

        test_active_hours = Metric(
            name="active_hours",
            data_source=test_clients_daily,
            select_expression=agg_sum("active_hours_sum"),
        )

        config.metrics = {
            AnalysisPeriod.WEEK: [
                Summary(test_active_hours,
                        BootstrapMean(confidence_interval=10))
            ]
        }

        log_config = LogConfiguration(
            log_project_id=project_id,
            log_dataset_id=temporary_dataset,
            log_table_id="logs",
            log_to_bigquery=True,
            task_profiling_log_table_id="task_profiling_logs",
            task_monitoring_log_table_id="task_monitoring_logs",
            capacity=1,
        )
        self.analysis_mock_run(monkeypatch, config, static_dataset,
                               temporary_dataset, project_id, log_config)

        assert client.client.get_table(
            f"{project_id}.{temporary_dataset}.logs") is not None

        logs = list(
            client.client.list_rows(f"{project_id}.{temporary_dataset}.logs"))

        assert len(logs) >= 1
        error_logs = [log for log in logs if log.get("log_level") == "ERROR"]
        assert (
            "Error while computing statistic bootstrap_mean for metric active_hours"
            in error_logs[0].get("message"))
        assert error_logs[0].get("log_level") == "ERROR"
    def test_with_segments(self, monkeypatch, client, project_id,
                           static_dataset, temporary_dataset):
        experiment = Experiment(
            experimenter_slug="test-experiment",
            type="rollout",
            status="Live",
            start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[
                Branch(slug="branch1", ratio=0.5),
                Branch(slug="branch2", ratio=0.5)
            ],
            reference_branch="branch2",
            normandy_slug="test-experiment",
            is_high_population=False,
            app_name="firefox_desktop",
            app_id="firefox-desktop",
        )

        config = AnalysisSpec().resolve(experiment)

        test_clients_daily = DataSource(
            name="clients_daily",
            from_expr=f"`{project_id}.test_data.clients_daily`",
        )

        test_active_hours = Metric(
            name="active_hours",
            data_source=test_clients_daily,
            select_expression=agg_sum("active_hours_sum"),
        )

        test_clients_last_seen = SegmentDataSource(
            "clients_last_seen", f"`{project_id}.test_data.clients_last_seen`")
        regular_user_v3 = Segment(
            "regular_user_v3",
            test_clients_last_seen,
            "COALESCE(LOGICAL_OR(is_regular_user_v3), FALSE)",
        )
        config.experiment.segments = [regular_user_v3]

        config.metrics = {
            AnalysisPeriod.WEEK: [Summary(test_active_hours, BootstrapMean())]
        }

        self.analysis_mock_run(monkeypatch, config, static_dataset,
                               temporary_dataset, project_id)

        query_job = client.client.query(f"""
            SELECT
              *
            FROM `{project_id}.{temporary_dataset}.test_experiment_enrollments_week_1`
            ORDER BY enrollment_date DESC
        """)

        expected_metrics_results = [
            {
                "client_id": "bbbb",
                "branch": "branch2",
                "enrollment_date": datetime.date(2020, 4, 3),
                "num_enrollment_events": 1,
                "analysis_window_start": 0,
                "analysis_window_end": 6,
                "regular_user_v3": True,
            },
            {
                "client_id": "aaaa",
                "branch": "branch1",
                "enrollment_date": datetime.date(2020, 4, 2),
                "num_enrollment_events": 1,
                "analysis_window_start": 0,
                "analysis_window_end": 6,
                "regular_user_v3": False,
            },
        ]

        for i, row in enumerate(query_job.result()):
            for k, v in expected_metrics_results[i].items():
                assert row[k] == v

        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.test_experiment_enrollments_weekly"
        ) is not None)
        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1"
        ) is not None)

        stats = client.client.list_rows(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1"
        ).to_dataframe()

        # Only one count per segment and branch, please
        assert (stats.query(
            "metric == 'identity' and statistic == 'count'").groupby(
                ["segment", "analysis_basis", "window_index",
                 "branch"]).size() == 1).all()

        count_by_branch = stats.query(
            "segment == 'all' and statistic == 'count'").set_index("branch")
        assert count_by_branch.loc["branch1", "point"] == 1.0
        assert count_by_branch.loc["branch2", "point"] == 1.0
        assert count_by_branch.loc["branch2",
                                   "analysis_basis"] == "enrollments"
    def test_no_enrollments(self, monkeypatch, client, project_id,
                            static_dataset, temporary_dataset):
        experiment = Experiment(
            experimenter_slug="test-experiment-2",
            type="rollout",
            status="Live",
            start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[
                Branch(slug="a", ratio=0.5),
                Branch(slug="b", ratio=0.5)
            ],
            reference_branch="a",
            normandy_slug="test-experiment-2",
            is_high_population=False,
            app_name="firefox_desktop",
            app_id="firefox-desktop",
        )

        config = AnalysisSpec().resolve(experiment)

        test_clients_daily = DataSource(
            name="clients_daily",
            from_expr=f"`{project_id}.test_data.clients_daily`",
        )

        test_active_hours = Metric(
            name="active_hours",
            data_source=test_clients_daily,
            select_expression=agg_sum("active_hours_sum"),
        )

        config.metrics = {
            AnalysisPeriod.WEEK: [Summary(test_active_hours, BootstrapMean())]
        }

        self.analysis_mock_run(monkeypatch, config, static_dataset,
                               temporary_dataset, project_id)

        query_job = client.client.query(f"""
            SELECT
              *
            FROM `{project_id}.{temporary_dataset}.test_experiment_2_enrollments_week_1`
            ORDER BY enrollment_date DESC
        """)

        assert query_job.result().total_rows == 0

        stats = client.client.list_rows(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_2_week_1"
        ).to_dataframe()

        count_by_branch = stats.query("statistic == 'count'").set_index(
            "branch")
        assert count_by_branch.loc["a", "point"] == 0.0
        assert count_by_branch.loc["b", "point"] == 0.0
        assert count_by_branch.loc["b", "analysis_basis"] == "enrollments"

        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_2_weekly"
        ) is not None)
    def test_metrics_with_exposure(self, monkeypatch, client, project_id,
                                   static_dataset, temporary_dataset):
        experiment = Experiment(
            experimenter_slug="test-experiment",
            type="rollout",
            status="Live",
            start_date=dt.datetime(2020, 3, 30, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 6, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[
                Branch(slug="branch1", ratio=0.5),
                Branch(slug="branch2", ratio=0.5)
            ],
            reference_branch="branch2",
            normandy_slug="test-experiment",
            is_high_population=False,
            app_name="firefox_desktop",
            app_id="firefox-desktop",
        )

        config = AnalysisSpec().resolve(experiment)

        test_clients_daily = DataSource(
            name="clients_daily",
            from_expr=f"`{project_id}.test_data.clients_daily`",
        )

        test_active_hours = Metric(
            name="active_hours",
            data_source=test_clients_daily,
            select_expression=agg_sum("active_hours_sum"),
            analysis_bases=[AnalysisBasis.EXPOSURES],
        )

        config.metrics = {
            AnalysisPeriod.WEEK: [Summary(test_active_hours, BootstrapMean())]
        }
        config.experiment.exposure_signal = ExposureSignal(
            name="ad_exposure",
            data_source=test_clients_daily,
            select_expression="active_hours_sum > 0",
            friendly_name="Ad exposure",
            description="Clients have clicked on ad",
            window_start="enrollment_start",
            window_end="analysis_window_end",
        )

        self.analysis_mock_run(monkeypatch, config, static_dataset,
                               temporary_dataset, project_id)

        query_job = client.client.query(f"""
            SELECT
              *
            FROM `{project_id}.{temporary_dataset}.test_experiment_exposures_week_1`
            ORDER BY enrollment_date DESC
        """)

        expected_metrics_results = [
            {
                "client_id": "bbbb",
                "branch": "branch2",
                "enrollment_date": datetime.date(2020, 4, 3),
                "num_enrollment_events": 1,
                "analysis_window_start": 0,
                "analysis_window_end": 6,
            },
            {
                "client_id": "aaaa",
                "branch": "branch1",
                "enrollment_date": datetime.date(2020, 4, 2),
                "num_enrollment_events": 1,
                "analysis_window_start": 0,
                "analysis_window_end": 6,
            },
        ]

        r = query_job.result()

        for i, row in enumerate(r):
            for k, v in expected_metrics_results[i].items():
                assert row[k] == v

        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.test_experiment_exposures_weekly"
        ) is not None)
        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_week_1"
        ) is not None)

        assert (client.client.get_table(
            f"{project_id}.{temporary_dataset}.statistics_test_experiment_weekly"
        ) is not None)
Beispiel #12
0
def experiments():
    return [
        Experiment(
            experimenter_slug="test_slug",
            type="pref",
            status="Complete",
            start_date=dt.datetime(2019, 12, 1, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 3, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[Branch(slug="a", ratio=1), Branch(slug="b", ratio=1)],
            normandy_slug="normandy-test-slug",
            reference_branch="b",
            is_high_population=False,
            app_name="firefox_desktop",
            app_id="firefox-desktop",
        ),
        Experiment(
            experimenter_slug="test_slug",
            type="addon",
            status="Complete",
            start_date=dt.datetime(2019, 12, 1, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 3, 1, tzinfo=pytz.utc),
            proposed_enrollment=0,
            branches=[],
            normandy_slug=None,
            reference_branch=None,
            is_high_population=False,
            app_name="firefox_desktop",
            app_id="firefox-desktop",
        ),
        Experiment(
            experimenter_slug="test_slug",
            type="pref",
            status="Live",
            start_date=dt.datetime(2019, 12, 1, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 3, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[],
            normandy_slug="normandy-test-slug",
            reference_branch=None,
            is_high_population=False,
            app_name="firefox_desktop",
            app_id="firefox-desktop",
        ),
        Experiment(
            experimenter_slug="test_slug",
            type="pref",
            status="Live",
            start_date=dt.datetime(2019, 12, 1, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 3, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[],
            normandy_slug="normandy-test-slug",
            reference_branch=None,
            is_high_population=True,
            app_name="firefox_desktop",
            app_id="firefox-desktop",
        ),
        Experiment(
            experimenter_slug="test_slug",
            type="pref",
            status="Complete",
            start_date=dt.datetime(2019, 12, 1, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 3, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[Branch(slug="a", ratio=1), Branch(slug="b", ratio=1)],
            normandy_slug="normandy-test-slug",
            reference_branch="b",
            is_high_population=False,
            app_name="firefox_desktop",
            app_id="firefox-desktop",
        ),
        Experiment(
            experimenter_slug="test_slug",
            type="pref",
            status="Live",
            start_date=dt.datetime(2019, 12, 1, tzinfo=pytz.utc),
            end_date=dt.datetime(2020, 3, 1, tzinfo=pytz.utc),
            proposed_enrollment=7,
            branches=[],
            normandy_slug="normandy-test-slug",
            reference_branch=None,
            is_high_population=True,
            outcomes=["performance", "tastiness"],
            app_name="firefox_desktop",
            app_id="firefox-desktop",
        ),
    ]