Exemple #1
0
def test_ts_time_limits_create_not_enough_data():
    with pytest.raises(ValueError):
        TimeLimits.for_ts(
            first_enrollment_date="2019-01-01",
            last_date_full_data="2019-01-13",
            time_series_period="weekly",
            num_dates_enrollment=8,
        )
Exemple #2
0
def test_time_limits_create5():
    # But not an 8 day window
    with pytest.raises(ValueError):
        TimeLimits.for_single_analysis_window(
            first_enrollment_date="2019-01-01",
            last_date_full_data="2019-01-14",
            analysis_start_days=0,
            analysis_length_dates=8,
            num_dates_enrollment=8,
        )
Exemple #3
0
def test_time_limits_has_right_date_in_error_message():
    msg_re = r"until we have data for 2020-03-30."
    with pytest.raises(ValueError, match=msg_re):
        TimeLimits.for_single_analysis_window(
            first_enrollment_date="2020-03-03",
            last_date_full_data="2020-03-23",
            analysis_start_days=0,
            analysis_length_dates=21,
            num_dates_enrollment=8,
        )
Exemple #4
0
def test_time_limits_validates():
    # Mainly check that the validation is running at all
    # No need to specify the same checks twice(?)
    with pytest.raises(TypeError):
        TimeLimits()

    with pytest.raises(AssertionError):
        TimeLimits(
            first_enrollment_date="2019-01-05",
            last_enrollment_date="2019-01-05",
            analysis_windows=(AnalysisWindow(1, 1),),
            first_date_data_required="2019-01-01",  # Before enrollments
            last_date_data_required="2019-01-01",
        )
Exemple #5
0
def test_exposure_signal_query_custom_windows():
    exp = Experiment("slug", "2019-01-01", 8, app_id="my_cool_app")

    tl = TimeLimits.for_ts(
        first_enrollment_date="2019-01-01",
        last_date_full_data="2019-03-01",
        time_series_period="weekly",
        num_dates_enrollment=8,
    )

    enrollment_sql = exp.build_enrollments_query(
        time_limits=tl,
        enrollments_query_type="glean-event",
        exposure_signal=ExposureSignal(
            name="exposures",
            data_source=mozanalysis.metrics.fenix.baseline,
            select_expr="metrics.counter.events_total_uri_count > 0",
            friendly_name="URI visited exposure",
            description="Exposed when URI visited",
            window_start=1,
            window_end=3,
        ),
    )

    sql_lint(enrollment_sql)

    assert "exposures" in enrollment_sql
    assert "metrics.counter.events_total_uri_count > 0" in enrollment_sql
    assert "DATE_ADD('2019-01-01', INTERVAL 1 DAY)" in enrollment_sql
    assert "DATE_ADD('2019-01-01', INTERVAL 3 DAY)" in enrollment_sql
Exemple #6
0
def test_metrics_query_based_on_exposure():
    exp = Experiment("slug", "2019-01-01", 8)

    tl = TimeLimits.for_ts(
        first_enrollment_date="2019-01-01",
        last_date_full_data="2019-03-01",
        time_series_period="weekly",
        num_dates_enrollment=8,
    )

    enrollments_sql = exp.build_enrollments_query(
        time_limits=tl, enrollments_query_type="fenix-fallback")

    sql_lint(enrollments_sql)

    metrics_sql = exp.build_metrics_query(
        metric_list=[
            m for m in mozanalysis.metrics.fenix.__dict__.values()
            if isinstance(m, Metric)
        ],
        time_limits=tl,
        enrollments_table="enrollments",
        analysis_basis=AnalysisBasis.EXPOSURES,
    )

    sql_lint(metrics_sql)

    assert "e.exposure_date" in metrics_sql
Exemple #7
0
def test_process_enrollments(spark):
    exp = Experiment('a-stub', '20190101')
    enrollments = exp.get_enrollments(spark,
                                      _get_enrollment_view(slug="a-stub"))
    assert enrollments.count() == 4

    # With final data collected on '20190114', we have 7 dates of data
    # for 'cccc' enrolled on '20190108' but not for 'dddd' enrolled on
    # '20190109'.
    tl = TimeLimits.for_single_analysis_window(
        first_enrollment_date=exp.start_date,
        last_date_full_data='20190114',
        analysis_start_days=0,
        analysis_length_dates=7,
        num_dates_enrollment=exp.num_dates_enrollment)
    assert tl.last_enrollment_date == '20190108'
    assert len(tl.analysis_windows) == 1
    assert tl.analysis_windows[0].end == 6

    pe = exp._process_enrollments(enrollments, tl)
    assert pe.count() == 3

    pe = exp._process_enrollments(enrollments.alias('main_summary'), tl)
    assert pe.select(F.col('enrollments.enrollment_date'))
    with pytest.raises(AnalysisException):
        assert pe.select(F.col('main_summary.enrollment_date'))
Exemple #8
0
def test_add_analysis_windows_to_enrollments(spark):
    exp = Experiment('a-stub', '20190101', num_dates_enrollment=8)
    enrollments = exp.get_enrollments(spark,
                                      _get_enrollment_view(slug="a-stub"))
    assert enrollments.count() == 3

    tl = TimeLimits.for_ts(
        first_enrollment_date=exp.start_date,
        last_date_full_data='20190114',
        time_series_period='daily',
        num_dates_enrollment=exp.num_dates_enrollment,
    )
    assert len(tl.analysis_windows) == 7

    new_enrollments = exp._add_analysis_windows_to_enrollments(enrollments, tl)

    nep = new_enrollments.toPandas()
    assert len(nep) == enrollments.count() * len(tl.analysis_windows)

    a = nep[nep['client_id'] == 'aaaa']
    assert len(a) == len(tl.analysis_windows)
    assert (a.mozanalysis_analysis_window_start.sort_values() == np.arange(
        len(tl.analysis_windows))).all()
    assert (a.mozanalysis_analysis_window_end.sort_values() == np.arange(
        len(tl.analysis_windows))).all()
Exemple #9
0
def test_segments_megaquery_not_detectably_malformed():
    exp = Experiment("slug", "2019-01-01", 8)

    tl = TimeLimits.for_ts(
        first_enrollment_date="2019-01-01",
        last_date_full_data="2019-03-01",
        time_series_period="weekly",
        num_dates_enrollment=8,
    )

    enrollments_sql = exp.build_enrollments_query(
        time_limits=tl,
        segment_list=[s for s in msd.__dict__.values() if isinstance(s, msd.Segment)],
        enrollments_query_type="normandy",
    )

    sql_lint(enrollments_sql)

    metrics_sql = exp.build_metrics_query(
        metric_list=[m for m in mad.__dict__.values() if isinstance(m, mad.Metric)],
        time_limits=tl,
        enrollments_table="enrollments",
    )

    sql_lint(metrics_sql)
Exemple #10
0
def test_no_analysis_exception_when_shared_parent_dataframe(spark):
    # Check that we don't fall victim to
    # https://issues.apache.org/jira/browse/SPARK-10925
    df = spark.createDataFrame(
        [  # Just need the schema, really
            ['someone', '20190102', 'fake', 1],
        ],
        [
            "client_id",
            "submission_date_s3",
            "branch",
            "some_value",
        ])

    enrollments = df.groupby('client_id', 'branch').agg(
        F.min('submission_date_s3').alias('enrollment_date'))

    exp = Experiment('a-stub', '20180101')

    time_limits = TimeLimits.for_single_analysis_window(
        exp.start_date,
        last_date_full_data='20190522',
        analysis_start_days=28,
        analysis_length_dates=7)

    enrollments = exp._add_analysis_windows_to_enrollments(
        enrollments, time_limits)

    exp._get_results_for_one_data_source(
        enrollments,
        df,
        [F.max(F.col('some_value'))],
    )
Exemple #11
0
def test_process_data_source_df(spark):
    start_date = '20190101'
    exp_8d = Experiment('experiment-with-8-day-cohort', start_date, 8)
    data_source_df = _get_data_source_df(spark)

    end_date = '20190114'

    # Are the fixtures sufficiently complicated that we're actually testing
    # things?
    assert _simple_return_agg_date(F.min, data_source_df) < start_date
    assert _simple_return_agg_date(F.max, data_source_df) > end_date

    tl_03 = TimeLimits.for_single_analysis_window(
        first_enrollment_date=exp_8d.start_date,
        last_date_full_data=end_date,
        analysis_start_days=0,
        analysis_length_dates=3,
        num_dates_enrollment=exp_8d.num_dates_enrollment)
    assert tl_03.first_date_data_required == start_date
    assert tl_03.last_date_data_required == '20190110'

    proc_ds = exp_8d._process_data_source_df(data_source_df, tl_03)

    assert _simple_return_agg_date(F.min,
                                   proc_ds) == tl_03.first_date_data_required
    assert _simple_return_agg_date(F.max,
                                   proc_ds) == tl_03.last_date_data_required

    tl_23 = TimeLimits.for_single_analysis_window(
        first_enrollment_date=exp_8d.start_date,
        last_date_full_data=end_date,
        analysis_start_days=2,
        analysis_length_dates=3,
        num_dates_enrollment=exp_8d.num_dates_enrollment)
    assert tl_23.first_date_data_required == add_days(start_date, 2)
    assert tl_23.last_date_data_required == '20190112'

    p_ds_2 = exp_8d._process_data_source_df(data_source_df, tl_23)

    assert _simple_return_agg_date(F.min,
                                   p_ds_2) == tl_23.first_date_data_required
    assert _simple_return_agg_date(F.max,
                                   p_ds_2) == tl_23.last_date_data_required

    assert proc_ds.select(F.col('data_source.client_id'))
    with pytest.raises(AnalysisException):
        assert data_source_df.select(F.col('data_source.client_id'))
Exemple #12
0
def test_time_limits_create2():
    # We don't have 14 dates of data for an 8-day cohort:
    with pytest.raises(ValueError):
        TimeLimits.for_single_analysis_window(
            first_enrollment_date="2019-01-01",
            last_date_full_data="2019-01-14",
            analysis_start_days=0,
            analysis_length_dates=14,
            num_dates_enrollment=8,
        )

    # We don't have 15 full dates of data for any users
    with pytest.raises(AssertionError):
        TimeLimits.for_single_analysis_window(
            first_enrollment_date="2019-01-01",
            last_date_full_data="2019-01-14",
            analysis_start_days=0,
            analysis_length_dates=15,
        )
Exemple #13
0
def test_ts_time_limits_create3():
    tl = TimeLimits.for_ts(first_enrollment_date='2019-01-01',
                           last_date_full_data='2019-01-15',
                           time_series_period='weekly',
                           num_dates_enrollment=8)

    assert tl.first_enrollment_date == '2019-01-01'
    assert tl.last_enrollment_date == '2019-01-08'
    assert len(tl.analysis_windows) == 1
    assert tl.analysis_windows[0].start == 0
    assert tl.analysis_windows[0].end == 6
    assert tl.first_date_data_required == '2019-01-01'
    assert tl.last_date_data_required == '2019-01-14'
Exemple #14
0
def test_time_limits_create6():
    # Of course the flexi-experiment has data for a 1 day window
    tl = TimeLimits.for_single_analysis_window(
        first_enrollment_date="2019-01-01",
        last_date_full_data="2019-01-14",
        analysis_start_days=0,
        analysis_length_dates=1,
    )
    assert tl.first_enrollment_date == "2019-01-01"
    assert tl.last_enrollment_date == "2019-01-14"
    assert len(tl.analysis_windows) == 1
    assert tl.analysis_windows[0].start == 0
    assert tl.analysis_windows[0].end == 0
    assert tl.first_date_data_required == "2019-01-01"
    assert tl.last_date_data_required == "2019-01-14"
Exemple #15
0
def test_time_limits_create7():
    # If the analysis starts later, so does the data source
    tl = TimeLimits.for_single_analysis_window(
        first_enrollment_date="2019-01-01",
        last_date_full_data="2019-01-14",
        analysis_start_days=7,
        analysis_length_dates=1,
    )
    assert tl.first_enrollment_date == "2019-01-01"
    assert tl.last_enrollment_date == "2019-01-07"
    assert len(tl.analysis_windows) == 1
    assert tl.analysis_windows[0].start == 7
    assert tl.analysis_windows[0].end == 7
    assert tl.first_date_data_required == "2019-01-08"
    assert tl.last_date_data_required == "2019-01-14"
Exemple #16
0
def test_ts_time_limits_create3():
    tl = TimeLimits.for_ts(
        first_enrollment_date="2019-01-01",
        last_date_full_data="2019-01-15",
        time_series_period="weekly",
        num_dates_enrollment=8,
    )

    assert tl.first_enrollment_date == "2019-01-01"
    assert tl.last_enrollment_date == "2019-01-08"
    assert len(tl.analysis_windows) == 1
    assert tl.analysis_windows[0].start == 0
    assert tl.analysis_windows[0].end == 6
    assert tl.first_date_data_required == "2019-01-01"
    assert tl.last_date_data_required == "2019-01-14"
Exemple #17
0
def test_time_limits_create4():
    # Or a 2 day window
    tl = TimeLimits.for_single_analysis_window(
        first_enrollment_date="2019-01-01",
        last_date_full_data="2019-01-14",
        analysis_start_days=0,
        analysis_length_dates=2,
        num_dates_enrollment=8,
    )
    assert tl.first_enrollment_date == "2019-01-01"
    assert tl.last_enrollment_date == "2019-01-08"
    assert len(tl.analysis_windows) == 1
    assert tl.analysis_windows[0].start == 0
    assert tl.analysis_windows[0].end == 1
    assert tl.first_date_data_required == "2019-01-01"
    assert tl.last_date_data_required == "2019-01-09"
Exemple #18
0
def test_time_limits_create3():
    # For the 8-day cohort We have enough data for a 7 day window
    tl = TimeLimits.for_single_analysis_window(
        first_enrollment_date="2019-01-01",
        last_date_full_data="2019-01-14",
        analysis_start_days=0,
        analysis_length_dates=7,
        num_dates_enrollment=8,
    )
    assert tl.first_enrollment_date == "2019-01-01"
    assert tl.last_enrollment_date == "2019-01-08"
    assert len(tl.analysis_windows) == 1
    assert tl.analysis_windows[0].start == 0
    assert tl.analysis_windows[0].end == 6
    assert tl.first_date_data_required == "2019-01-01"
    assert tl.last_date_data_required == "2019-01-14"
Exemple #19
0
    def validate(self) -> None:
        self.check_runnable()

        dates_enrollment = self.config.experiment.proposed_enrollment + 1

        if self.config.experiment.end_date is not None:
            end_date = self.config.experiment.end_date
            analysis_length_dates = (
                (end_date - self.config.experiment.start_date).days -
                dates_enrollment + 1)
        else:
            analysis_length_dates = 21  # arbitrary
            end_date = self.config.experiment.start_date + timedelta(
                days=analysis_length_dates + dates_enrollment - 1)

        if analysis_length_dates < 0:
            logging.error(
                "Proposed enrollment longer than analysis dates length:" +
                f"{self.config.experiment.normandy_slug}")
            raise Exception("Cannot validate experiment")

        limits = TimeLimits.for_single_analysis_window(
            last_date_full_data=end_date.strftime("%Y-%m-%d"),
            analysis_start_days=0,
            analysis_length_dates=analysis_length_dates,
            first_enrollment_date=self.config.experiment.start_date.strftime(
                "%Y-%m-%d"),
            num_dates_enrollment=dates_enrollment,
        )

        exp = mozanalysis.experiment.Experiment(
            experiment_slug=self.config.experiment.normandy_slug,
            start_date=self.config.experiment.start_date.strftime("%Y-%m-%d"),
        )

        metrics = set()
        for v in self.config.metrics.values():
            metrics |= {m.metric for m in v}

        sql = exp.build_query(
            metrics,
            limits,
            "normandy",
            self.config.experiment.enrollment_query,
        )

        dry_run_query(sql)
def test_query_not_detectably_malformed():
    exp = Experiment('slug', '2019-01-01', 8)

    tl = TimeLimits.for_ts(
        first_enrollment_date='2019-01-01',
        last_date_full_data='2019-03-01',
        time_series_period='weekly',
        num_dates_enrollment=8
    )

    sql = exp.build_query(
        metric_list=[],
        time_limits=tl,
        enrollments_query_type='normandy',
    )

    sql_lint(sql)
Exemple #21
0
def test_megaquery_not_detectably_malformed():
    exp = Experiment('slug', '2019-01-01', 8)

    tl = TimeLimits.for_ts(first_enrollment_date='2019-01-01',
                           last_date_full_data='2019-03-01',
                           time_series_period='weekly',
                           num_dates_enrollment=8)

    sql = exp.build_query(
        metric_list=[
            m for m in mad.__dict__.values() if isinstance(m, mad.Metric)
        ],
        time_limits=tl,
        enrollments_query_type='normandy',
    )

    sql_lint(sql)
Exemple #22
0
def test_time_limits_create1():
    # When we have complete data for 2019-01-14...
    # ...We have 14 dates of data for those who enrolled on the 1st
    tl = TimeLimits.for_single_analysis_window(
        first_enrollment_date="2019-01-01",
        last_date_full_data="2019-01-14",
        analysis_start_days=0,
        analysis_length_dates=14,
    )

    assert tl.first_enrollment_date == "2019-01-01"
    assert tl.last_enrollment_date == "2019-01-01"
    assert len(tl.analysis_windows) == 1
    assert tl.analysis_windows[0].start == 0
    assert tl.analysis_windows[0].end == 13
    assert tl.first_date_data_required == "2019-01-01"
    assert tl.last_date_data_required == "2019-01-14"
Exemple #23
0
def test_firefox_ios_klar_app_id_propagation():
    exp = Experiment("slug", "2019-01-01", 8, app_id="my_cool_app")

    tl = TimeLimits.for_ts(
        first_enrollment_date="2019-01-01",
        last_date_full_data="2019-03-01",
        time_series_period="weekly",
        num_dates_enrollment=8,
    )

    sds = SegmentDataSource(
        name="cool_data_source",
        from_expr="`moz-fx-data-shared-prod`.{dataset}.cool_table",
        default_dataset="org_mozilla_ios_klar",
    )

    segment = Segment(
        name="cool_segment",
        select_expr="COUNT(*)",
        data_source=sds,
    )

    enrollments_sql = exp.build_enrollments_query(
        time_limits=tl,
        segment_list=[segment],
        enrollments_query_type="glean-event",
    )

    sql_lint(enrollments_sql)

    metrics_sql = exp.build_metrics_query(
        metric_list=[
            m for m in mozanalysis.metrics.klar_ios.__dict__.values()
            if isinstance(m, Metric)
        ],
        time_limits=tl,
        enrollments_table="enrollments",
    )

    sql_lint(metrics_sql)

    assert "org_mozilla_ios_klar" not in enrollments_sql
    assert "my_cool_app" in enrollments_sql

    sql_lint(metrics_sql)
Exemple #24
0
def test_query_not_detectably_malformed():
    exp = Experiment('slug', '2019-01-01', 8)

    tl = TimeLimits.for_ts(first_enrollment_date='2019-01-01',
                           last_date_full_data='2019-03-01',
                           time_series_period='weekly',
                           num_dates_enrollment=8)

    sql = exp.build_query(
        metric_list=[],
        time_limits=tl,
        enrollments_query_type='normandy',
    )

    # This query is actually slightly malformed, due to a trailing comma.
    # We should add a metric here if the linter ever improves.

    sql_lint(sql)
Exemple #25
0
def test_exposure_query():
    exp = Experiment("slug", "2019-01-01", 8, app_id="my_cool_app")

    tl = TimeLimits.for_ts(
        first_enrollment_date="2019-01-01",
        last_date_full_data="2019-03-01",
        time_series_period="weekly",
        num_dates_enrollment=8,
    )

    enrollment_sql = exp.build_enrollments_query(
        time_limits=tl,
        enrollments_query_type="glean-event",
    )

    sql_lint(enrollment_sql)

    assert "exposures" in enrollment_sql
Exemple #26
0
def dry_run_query(exp_path):
    report = validate_schema(op.join(exp_path, "report.json"))
    metric_list = _make_metric_list(report)

    exp = Experiment(experiment_slug=report["experiment_slug"],
                     start_date=report["start_date"],
                     num_dates_enrollment=report["num_dates_enrollment"])
    # create an archive of the sql generating analysis
    time_limits = TimeLimits.for_single_analysis_window(
        first_enrollment_date=report['start_date'],
        last_date_full_data=report['last_date_full_data'],
        analysis_start_days=report['analysis_start_days'],
        analysis_length_dates=report['analysis_length_days'],
        num_dates_enrollment=report['num_dates_enrollment'])
    query = exp.build_query(metric_list=metric_list,
                            time_limits=time_limits,
                            enrollments_query_type='normandy')

    return query
Exemple #27
0
def test_query_not_detectably_malformed_fenix_fallback():
    exp = Experiment("slug", "2019-01-01", 8)

    tl = TimeLimits.for_ts(
        first_enrollment_date="2019-01-01",
        last_date_full_data="2019-03-01",
        time_series_period="weekly",
        num_dates_enrollment=8,
    )

    enrollments_sql = exp.build_enrollments_query(
        time_limits=tl, enrollments_query_type="fenix-fallback")

    sql_lint(enrollments_sql)

    metrics_sql = exp.build_metrics_query(
        metric_list=[],
        time_limits=tl,
        enrollments_table="enrollments",
    )

    sql_lint(metrics_sql)
Exemple #28
0
def test_metrics_query_with_exposure_signal_custom_windows():
    exp = Experiment("slug", "2019-01-01", 8)

    tl = TimeLimits.for_ts(
        first_enrollment_date="2019-01-01",
        last_date_full_data="2019-03-01",
        time_series_period="weekly",
        num_dates_enrollment=8,
    )

    enrollments_sql = exp.build_enrollments_query(
        time_limits=tl, enrollments_query_type="fenix-fallback")

    sql_lint(enrollments_sql)

    metrics_sql = exp.build_metrics_query(
        metric_list=[
            m for m in mozanalysis.metrics.fenix.__dict__.values()
            if isinstance(m, Metric)
        ],
        time_limits=tl,
        enrollments_table="enrollments",
        analysis_basis=AnalysisBasis.EXPOSURES,
        exposure_signal=ExposureSignal(
            name="exposures",
            data_source=mozanalysis.metrics.fenix.baseline,
            select_expr="metrics.counter.events_total_uri_count > 0",
            friendly_name="URI visited exposure",
            description="Exposed when URI visited",
            window_start=1,
            window_end=3,
        ),
    )

    sql_lint(metrics_sql)

    assert "DATE_ADD('2019-01-01', INTERVAL 1 DAY)" in metrics_sql
    assert "DATE_ADD('2019-01-01', INTERVAL 3 DAY)" in metrics_sql
Exemple #29
0
    def _get_timelimits_if_ready(
        self, period: AnalysisPeriod, current_date: datetime
    ) -> Optional[TimeLimits]:
        """
        Returns a TimeLimits instance if experiment is due for analysis.
        Otherwise returns None.
        """
        prior_date = current_date - timedelta(days=1)
        prior_date_str = prior_date.strftime("%Y-%m-%d")
        current_date_str = current_date.strftime("%Y-%m-%d")

        dates_enrollment = self.config.experiment.proposed_enrollment + 1

        if self.config.experiment.start_date is None:
            return None

        time_limits_args = {
            "first_enrollment_date": self.config.experiment.start_date.strftime("%Y-%m-%d"),
            "num_dates_enrollment": dates_enrollment,
        }

        if period != AnalysisPeriod.OVERALL:
            try:
                current_time_limits = TimeLimits.for_ts(
                    last_date_full_data=current_date_str,
                    time_series_period=period.mozanalysis_label,
                    **time_limits_args,
                )
            except ValueError:
                # There are no analysis windows yet.
                # TODO: Add a more specific check.
                return None

            try:
                prior_time_limits = TimeLimits.for_ts(
                    last_date_full_data=prior_date_str,
                    time_series_period=period.mozanalysis_label,
                    **time_limits_args,
                )
            except ValueError:
                # We have an analysis window today, and we didn't yesterday,
                # so we must have just closed our first window.
                return current_time_limits

            if len(current_time_limits.analysis_windows) == len(prior_time_limits.analysis_windows):
                # No new data today
                return None

            return current_time_limits

        assert period == AnalysisPeriod.OVERALL
        if (
            self.config.experiment.end_date is None
            or self.config.experiment.end_date.date() != current_date.date()
            or self.config.experiment.status != "Complete"
        ):
            return None

        if self.config.experiment.end_date is None:
            return None

        analysis_length_dates = (
            (self.config.experiment.end_date - self.config.experiment.start_date).days
            - dates_enrollment
            + 1
        )

        if analysis_length_dates < 0:
            raise errors.EnrollmentLongerThanAnalysisException(self.config.experiment.normandy_slug)

        return TimeLimits.for_single_analysis_window(
            last_date_full_data=prior_date_str,
            analysis_start_days=0,
            analysis_length_dates=analysis_length_dates,
            **time_limits_args,
        )
Exemple #30
0
    def _get_timelimits_if_ready(
            self, period: AnalysisPeriod,
            current_date: datetime) -> Optional[TimeLimits]:
        """
        Returns a TimeLimits instance if experiment is due for analysis.
        Otherwise returns None.
        """
        prior_date = current_date - timedelta(days=1)
        prior_date_str = prior_date.strftime("%Y-%m-%d")
        current_date_str = current_date.strftime("%Y-%m-%d")

        if not self.config.experiment.proposed_enrollment:
            self.logger.info("Skipping %s; no enrollment period",
                             self.config.experiment.slug)
            return None

        dates_enrollment = self.config.experiment.proposed_enrollment + 1

        if self.config.experiment.start_date is None:
            return None

        time_limits_args = {
            "first_enrollment_date":
            self.config.experiment.start_date.strftime("%Y-%m-%d"),
            "num_dates_enrollment":
            dates_enrollment,
        }

        if period != AnalysisPeriod.OVERALL:
            try:
                current_time_limits = TimeLimits.for_ts(
                    last_date_full_data=current_date_str,
                    time_series_period=period.adjective,
                    **time_limits_args,
                )
            except ValueError:
                # There are no analysis windows yet.
                # TODO: Add a more specific check.
                return None

            try:
                prior_time_limits = TimeLimits.for_ts(
                    last_date_full_data=prior_date_str,
                    time_series_period=period.adjective,
                    **time_limits_args,
                )
            except ValueError:
                # We have an analysis window today, and we didn't yesterday,
                # so we must have just closed our first window.
                return current_time_limits

            if len(current_time_limits.analysis_windows) == len(
                    prior_time_limits.analysis_windows):
                # No new data today
                return None

            return current_time_limits

        # Period is OVERALL
        if self.config.experiment.end_date != prior_date:
            return None

        return TimeLimits.for_single_analysis_window(
            last_date_full_data=prior_date_str,
            analysis_start_days=0,
            analysis_length_dates=(self.config.experiment.end_date -
                                   self.config.experiment.start_date).days -
            dates_enrollment + 1,
            **time_limits_args,
        )