コード例 #1
0
def _make_docs_project(user):
    """
    Creates a project based on docs-project.json with forecasts from docs-predictions.json.
    """
    found_project = Project.objects.filter(name=DOCS_PROJECT_NAME).first()
    if found_project:
        click.echo("* deleting previous project: {}".format(found_project))
        found_project.delete()

    project = create_project_from_json(
        Path('forecast_app/tests/projects/docs-project.json'), user)  # atomic
    project.name = DOCS_PROJECT_NAME
    project.save()

    load_truth_data(
        project, Path('forecast_app/tests/truth_data/docs-ground-truth.csv'))

    forecast_model = ForecastModel.objects.create(project=project,
                                                  name='docs forecast model',
                                                  abbreviation='docs_mod')
    time_zero = project.timezeros.filter(
        timezero_date=datetime.date(2011, 10, 2)).first()
    forecast = Forecast.objects.create(forecast_model=forecast_model,
                                       source='docs-predictions.json',
                                       time_zero=time_zero,
                                       notes="a small prediction file")
    with open('forecast_app/tests/predictions/docs-predictions.json') as fp:
        json_io_dict_in = json.load(fp)
        load_predictions_from_json_io_dict(forecast, json_io_dict_in,
                                           False)  # atomic
        cache_forecast_metadata(forecast)  # atomic

    return project, time_zero, forecast_model, forecast
コード例 #2
0
    def test_enqueue_update_scores_for_all_models(self):
        # tests that Score.enqueue_update_scores_for_all_models() should only enqueue scores for changed models

        # test that with ModelScoreChanges but no ScoreLastUpdate, all Score/ForecastModel pairs are updated
        with patch('rq.queue.Queue.enqueue') as enqueue_mock:
            Score.enqueue_update_scores_for_all_models(is_only_changed=True)
            self.assertEqual(len(SCORE_ABBREV_TO_NAME_AND_DESCR),
                             enqueue_mock.call_count)  # 6 scores * 1 model

        # make all ScoreLastUpdates be after self.forecast_model's update, which means none should update
        Score.ensure_all_scores_exist()
        for score in Score.objects.all():
            score.set_last_update_for_forecast_model(self.forecast_model)
        with patch('rq.queue.Queue.enqueue') as enqueue_mock:
            Score.enqueue_update_scores_for_all_models(is_only_changed=True)
            enqueue_mock.assert_not_called()

        # same, but pass is_only_changed=False -> all Score/ForecastModel pairs should update
        with patch('rq.queue.Queue.enqueue') as enqueue_mock:
            Score.enqueue_update_scores_for_all_models(is_only_changed=False)
            self.assertEqual(len(SCORE_ABBREV_TO_NAME_AND_DESCR),
                             enqueue_mock.call_count)

        # loading truth should result in all Score/ForecastModel pairs being updated
        load_truth_data(self.project,
                        Path('forecast_app/tests/truth_data/truths-ok.csv'),
                        is_convert_na_none=True)
        with patch('rq.queue.Queue.enqueue') as enqueue_mock:
            Score.enqueue_update_scores_for_all_models(is_only_changed=True)
            self.assertEqual(len(SCORE_ABBREV_TO_NAME_AND_DESCR),
                             enqueue_mock.call_count)
コード例 #3
0
    def test_model_score_change_truths(self):
        project2 = Project.objects.create()
        make_cdc_units_and_targets(project2)
        # adding project truth should update all of its models' score_change.changed_at. test with no models -> ensure
        # Project._update_model_score_changes() is called
        with patch('forecast_app.models.Project._update_model_score_changes'
                   ) as update_mock:
            load_truth_data(
                project2, Path('forecast_app/tests/truth_data/truths-ok.csv'))
            self.assertEqual(
                2, update_mock.call_count
            )  # called once each: delete_truth_data(), load_truth_data()

        # adding project truth should update all of its models' score_change.changed_at. test with one model
        forecast_model2 = ForecastModel.objects.create(project=project2,
                                                       name='name',
                                                       abbreviation='abbrev')
        before_changed_at = forecast_model2.score_change.changed_at
        load_truth_data(project2,
                        Path('forecast_app/tests/truth_data/truths-ok.csv'))
        forecast_model2.score_change.refresh_from_db()
        self.assertNotEqual(before_changed_at,
                            forecast_model2.score_change.changed_at)

        # deleting project truth should update all of its models' score_change.changed_at
        before_changed_at = forecast_model2.score_change.changed_at
        project2.delete_truth_data()
        forecast_model2.score_change.refresh_from_db()
        self.assertNotEqual(before_changed_at,
                            forecast_model2.score_change.changed_at)
コード例 #4
0
    def setUpTestData(cls):
        cls.project = Project.objects.create()
        make_cdc_units_and_targets(cls.project)
        cls.forecast_model = ForecastModel.objects.create(
            project=cls.project, name='name', abbreviation='abbrev')

        time_zero = TimeZero.objects.create(
            project=cls.project,
            timezero_date=(pymmwr.mmwr_week_to_date(2017, 1)))
        csv_file_path = Path(
            'forecast_app/tests/model_error/ensemble/EW1-KoTstable-2017-01-17.csv'
        )  # EW01 2017
        cls.forecast1 = load_cdc_csv_forecast_file(2016, cls.forecast_model,
                                                   csv_file_path, time_zero)

        time_zero = TimeZero.objects.create(
            project=cls.project,
            timezero_date=(pymmwr.mmwr_week_to_date(2017, 2)))
        csv_file_path = Path(
            'forecast_app/tests/model_error/ensemble/EW2-KoTstable-2017-01-23.csv'
        )  # EW02 2017
        cls.forecast2 = load_cdc_csv_forecast_file(2016, cls.forecast_model,
                                                   csv_file_path, time_zero)

        time_zero = TimeZero.objects.create(
            project=cls.project,
            timezero_date=(pymmwr.mmwr_week_to_date(2016, 51)))
        csv_file_path = Path(
            'forecast_app/tests/model_error/ensemble/EW51-KoTstable-2017-01-03.csv'
        )  # EW51 2016
        cls.forecast3 = load_cdc_csv_forecast_file(2016, cls.forecast_model,
                                                   csv_file_path, time_zero)

        time_zero = TimeZero.objects.create(
            project=cls.project,
            timezero_date=(pymmwr.mmwr_week_to_date(2016, 52)))
        csv_file_path = Path(
            'forecast_app/tests/model_error/ensemble/EW52-KoTstable-2017-01-09.csv'
        )  # EW52 2016
        cls.forecast4 = load_cdc_csv_forecast_file(2016, cls.forecast_model,
                                                   csv_file_path, time_zero)

        # 'mini' season for testing. from:
        #   model_error_calculations.txt -> model_error_calculations.py -> model_error_calculations.xlsx:
        cls.exp_target_to_mae = {
            '1 wk ahead': 0.215904853,
            '2 wk ahead': 0.458186984,
            '3 wk ahead': 0.950515864,
            '4 wk ahead': 1.482010693
        }
        load_truth_data(
            cls.project,
            Path('forecast_app/tests/truth_data/mean-abs-error-truths.csv'))

        # score needed for MAE calculation
        Score.ensure_all_scores_exist()
        cls.score = Score.objects.filter(
            abbreviation='abs_error').first()  # hard-coded official abbrev
        cls.score.update_score_for_model(cls.forecast_model)
コード例 #5
0
def fill_cdc_project(project, mo_user, is_public):
    project.description = "description"
    project.home_url = "http://example.com/"
    project.core_data = "http://example.com/"

    # make the Units and Targets via cdc-project.json (recall it has no timezeros)
    make_cdc_units_and_targets(project)

    # make two TimeZeros - one for ground truth, and one for the forecast's data:
    # EW1-KoTsarima-2017-01-17-small.csv -> pymmwr.date_to_mmwr_week(datetime.date(2017, 1, 17))  # EW01 2017
    #   -> {'year': 2017, 'week': 3, 'day': 3}
    time_zero1 = TimeZero.objects.create(project=project,
                                         timezero_date=datetime.date(
                                             2017, 1, 17),
                                         data_version_date=None)
    TimeZero.objects.create(project=project,
                            timezero_date=datetime.date(2017, 1, 24),
                            data_version_date=None)

    # load ground truth
    load_truth_data(
        project,
        Path('forecast_app/tests/truth_data/2017-01-17-truths.csv'),
        is_convert_na_none=True)

    # create the two models
    click.echo("creating ForecastModel")
    forecast_model1 = ForecastModel.objects.create(
        project=project,
        name=f'Test ForecastModel1 ({"public" if is_public else "private"})',
        abbreviation='model1_abbrev',
        team_name='ForecastModel1 team',
        description="a ForecastModel for testing",
        home_url='http://example.com',
        owner=mo_user)

    # load the forecasts using a small data file
    csv_file_path = Path(
        'forecast_app/tests/EW1-KoTsarima-2017-01-17-small.csv')  # EW01 2017
    click.echo(
        "* loading forecast into forecast_model={}, csv_file_path={}".format(
            forecast_model1, csv_file_path))
    start_time = timeit.default_timer()
    forecast1 = load_cdc_csv_forecast_file(2016, forecast_model1,
                                           csv_file_path, time_zero1)
    click.echo("  loaded forecast={}. {}".format(
        forecast1,
        timeit.default_timer() - start_time))

    ForecastModel.objects.create(
        project=project,
        name=f'Test ForecastModel2 ({"public" if is_public else "private"})',
        abbreviation='model2_abbrev',
        # team_name='ForecastModel2 team',  # leave default ('')
        description="a second ForecastModel for testing",
        home_url='http://example.com',
        owner=mo_user)
コード例 #6
0
    def test__tz_unit_targ_pks_to_truth_values(self):
        # setup
        project = Project.objects.create()
        make_cdc_units_and_targets(project)

        # load truth only for the TimeZero in truths-2016-2017-reichlab.csv we're testing against
        time_zero = TimeZero.objects.create(project=project,
                                            timezero_date=datetime.date(
                                                2017, 1, 1),
                                            is_season_start=True,
                                            season_name='season1')
        load_truth_data(
            project,
            Path(
                'utils/ensemble-truth-table-script/truths-2016-2017-reichlab.csv'
            ))

        forecast_model = ForecastModel.objects.create(project=project,
                                                      name='test model',
                                                      abbreviation='abbrev')
        csv_file_path = Path(
            'forecast_app/tests/EW1-KoTsarima-2017-01-17-small.csv'
        )  # EW01 2017
        load_cdc_csv_forecast_file(2016, forecast_model, csv_file_path,
                                   time_zero)

        # test
        tz_pk = time_zero.pk
        loc1_pk = Unit.objects.filter(project=project,
                                      name='HHS Region 1').first().pk
        loc2_pk = Unit.objects.filter(project=project,
                                      name='HHS Region 2').first().pk
        loc3_pk = Unit.objects.filter(project=project,
                                      name='HHS Region 3').first().pk
        loc4_pk = Unit.objects.filter(project=project,
                                      name='HHS Region 4').first().pk
        loc5_pk = Unit.objects.filter(project=project,
                                      name='HHS Region 5').first().pk
        loc6_pk = Unit.objects.filter(project=project,
                                      name='HHS Region 6').first().pk
        loc7_pk = Unit.objects.filter(project=project,
                                      name='HHS Region 7').first().pk
        loc8_pk = Unit.objects.filter(project=project,
                                      name='HHS Region 8').first().pk
        loc9_pk = Unit.objects.filter(project=project,
                                      name='HHS Region 9').first().pk
        loc10_pk = Unit.objects.filter(project=project,
                                       name='HHS Region 10').first().pk
        loc11_pk = Unit.objects.filter(project=project,
                                       name='US National').first().pk
        target1_pk = Target.objects.filter(project=project,
                                           name='Season onset').first().pk
        target2_pk = Target.objects.filter(project=project,
                                           name='Season peak week').first().pk
        target3_pk = Target.objects.filter(
            project=project, name='Season peak percentage').first().pk
        target4_pk = Target.objects.filter(project=project,
                                           name='1 wk ahead').first().pk
        target5_pk = Target.objects.filter(project=project,
                                           name='2 wk ahead').first().pk
        target6_pk = Target.objects.filter(project=project,
                                           name='3 wk ahead').first().pk
        target7_pk = Target.objects.filter(project=project,
                                           name='4 wk ahead').first().pk
        exp_dict = {  # {timezero_pk: {unit_pk: {target_id: truth_value}}}
            tz_pk: {
                loc1_pk: {
                    target1_pk: ['2016-12-25'],
                    target2_pk: [datetime.date(2017, 2, 5)],
                    target3_pk: [3.19221],
                    target4_pk: [1.52411],
                    target5_pk: [1.73987],
                    target6_pk: [2.06524],
                    target7_pk: [2.51375]
                },
                loc2_pk: {
                    target1_pk: ['2016-11-20'],
                    target2_pk: [datetime.date(2017, 2, 5)],
                    target3_pk: [6.93759],
                    target4_pk: [5.07086],
                    target5_pk: [5.68166],
                    target6_pk: [6.01053],
                    target7_pk: [6.49829]
                },
                loc3_pk: {
                    target1_pk: ['2016-12-18'],
                    target2_pk: [datetime.date(2017, 2, 12)],
                    target3_pk: [5.20003],
                    target4_pk: [2.81366],
                    target5_pk: [3.09968],
                    target6_pk: [3.45232],
                    target7_pk: [3.73339]
                },
                loc4_pk: {
                    target1_pk: ['2016-11-13'],
                    target2_pk: [datetime.date(2017, 2, 12)],
                    target3_pk: [5.5107],
                    target4_pk: [2.89395],
                    target5_pk: [3.68564],
                    target6_pk: [3.69188],
                    target7_pk: [4.53169]
                },
                loc5_pk: {
                    target1_pk: ['2016-12-25'],
                    target2_pk: [datetime.date(2017, 2, 12)],
                    target3_pk: [4.31787],
                    target4_pk: [2.11757],
                    target5_pk: [2.4432],
                    target6_pk: [2.76295],
                    target7_pk: [3.182]
                },
                loc6_pk: {
                    target1_pk: ['2017-01-08'],
                    target2_pk: [datetime.date(2017, 2, 5)],
                    target3_pk: [9.87589],
                    target4_pk: [4.80185],
                    target5_pk: [5.26955],
                    target6_pk: [6.10427],
                    target7_pk: [8.13221]
                },
                loc7_pk: {
                    target1_pk: ['2016-12-25'],
                    target2_pk: [datetime.date(2017, 2, 5)],
                    target3_pk: [6.35948],
                    target4_pk: [2.75581],
                    target5_pk: [3.46528],
                    target6_pk: [4.56991],
                    target7_pk: [5.52653]
                },
                loc8_pk: {
                    target1_pk: ['2016-12-18'],
                    target2_pk: [datetime.date(2017, 2, 12)],
                    target3_pk: [2.72703],
                    target4_pk: [1.90851],
                    target5_pk: [2.2668],
                    target6_pk: [2.07104],
                    target7_pk: [2.27632]
                },
                loc9_pk: {
                    target1_pk: ['2016-12-18'],
                    target2_pk: [datetime.date(2016, 12, 25)],
                    target3_pk: [3.30484],
                    target4_pk: [2.83778],
                    target5_pk: [2.68071],
                    target6_pk: [2.9577],
                    target7_pk: [3.03987]
                },
                loc10_pk: {
                    target1_pk: ['2016-12-11'],
                    target2_pk: [datetime.date(2016, 12, 25)],
                    target3_pk: [3.67061],
                    target4_pk: [2.15197],
                    target5_pk: [3.25108],
                    target6_pk: [2.51434],
                    target7_pk: [2.28634]
                },
                loc11_pk: {
                    target1_pk: ['2016-12-11'],
                    target2_pk: [datetime.date(2017, 2, 5)],
                    target3_pk: [5.06094],
                    target4_pk: [3.07623],
                    target5_pk: [3.50708],
                    target6_pk: [3.79872],
                    target7_pk: [4.43601]
                }
            }
        }
        act_dict = _tz_unit_targ_pks_to_truth_values(forecast_model.project)
        self.assertEqual(exp_dict, act_dict)
コード例 #7
0
    def test_mae(self):
        project2 = Project.objects.create()
        make_cdc_units_and_targets(project2)
        TimeZero.objects.create(project=project2,
                                timezero_date=datetime.date(2016, 10, 23),
                                is_season_start=True,
                                season_name='s1')
        TimeZero.objects.create(project=project2,
                                timezero_date=datetime.date(2016, 10, 30))
        TimeZero.objects.create(project=project2,
                                timezero_date=datetime.date(2016, 11, 6))
        forecast_model2 = ForecastModel.objects.create(project=project2,
                                                       name='name',
                                                       abbreviation='abbrev')
        load_cdc_csv_forecasts_from_dir(
            forecast_model2, Path('forecast_app/tests/load_forecasts'), 2016)
        load_truth_data(
            project2,
            Path(
                'utils/ensemble-truth-table-script/truths-2016-2017-reichlab.csv'
            ))

        Score.ensure_all_scores_exist()
        score = Score.objects.filter(
            abbreviation='abs_error').first()  # hard-coded official abbrev
        score.update_score_for_model(forecast_model2)

        score_value_rows_for_season = _score_value_rows_for_season(
            project2, 's1')
        self.assertEqual(
            5 * 11, len(score_value_rows_for_season))  # 5 targets * 11 units

        # spot-check a unit
        exp_maes = [
            0.1830079332082548, 0.127335480231265, 0.040631614561185525,
            0.09119562794624952, 0.15125133156909953
        ]
        hhs1_loc = project2.units.filter(name='HHS Region 1').first()
        hhs1_loc_rows = filter(lambda row: row[0] == hhs1_loc.id,
                               score_value_rows_for_season)
        act_maes = [row[-1] for row in hhs1_loc_rows]
        for exp_mae, act_mae in zip(exp_maes, act_maes):
            self.assertAlmostEqual(exp_mae, act_mae)

        # test unit_to_mean_abs_error_rows_for_project(), since we have a nice fixture
        loc_to_mae_rows_no_season = unit_to_mean_abs_error_rows_for_project(
            project2, None)
        self.assertEqual(loc_to_mae_rows_no_season,
                         unit_to_mean_abs_error_rows_for_project(
                             project2, 's1'))  # season_name shouldn't matter
        self.assertEqual(set(project2.units.values_list('name', flat=True)),
                         set(loc_to_mae_rows_no_season))

        exp_rows = [[
            'Model', '1 wk ahead', '2 wk ahead', '3 wk ahead', '4 wk ahead',
            'Season peak percentage'
        ],
                    [
                        forecast_model2.pk, 0.127335480231265,
                        0.040631614561185525, 0.09119562794624952,
                        0.15125133156909953, 0.1830079332082548
                    ]]
        act_rows = loc_to_mae_rows_no_season[hhs1_loc.name][0]
        self.assertEqual(exp_rows[0], act_rows[0])  # header
        self.assertEqual(exp_rows[1][0], act_rows[1][0])  # model
        self.assertAlmostEqual(exp_rows[1][1], act_rows[1][1])  # 1 wk ahead
        self.assertAlmostEqual(exp_rows[1][2], act_rows[1][2])
        self.assertAlmostEqual(exp_rows[1][3], act_rows[1][3])
        self.assertAlmostEqual(exp_rows[1][4], act_rows[1][4])
        self.assertAlmostEqual(exp_rows[1][5], act_rows[1][5])

        target_spp = project2.targets.filter(
            name='Season peak percentage').first()
        target_1wk = project2.targets.filter(name='1 wk ahead').first()
        target_2wk = project2.targets.filter(name='2 wk ahead').first()
        target_3wk = project2.targets.filter(name='3 wk ahead').first()
        target_4wk = project2.targets.filter(name='4 wk ahead').first()
        exp_loc_to_min = {
            target_spp: 0.1830079332082548,
            target_1wk: 0.127335480231265,
            target_2wk: 0.040631614561185525,
            target_3wk: 0.09119562794624952,
            target_4wk: 0.15125133156909953
        }
        act_loc_to_min = loc_to_mae_rows_no_season[hhs1_loc.name][1]
        self.assertAlmostEqual(exp_loc_to_min[target_spp],
                               act_loc_to_min[target_spp.name])
        self.assertAlmostEqual(exp_loc_to_min[target_1wk],
                               act_loc_to_min[target_1wk.name])
        self.assertAlmostEqual(exp_loc_to_min[target_2wk],
                               act_loc_to_min[target_2wk.name])
        self.assertAlmostEqual(exp_loc_to_min[target_3wk],
                               act_loc_to_min[target_3wk.name])
        self.assertAlmostEqual(exp_loc_to_min[target_4wk],
                               act_loc_to_min[target_4wk.name])
コード例 #8
0
def make_thai_moph_project_app(data_dir, truths_csv_file):
    """
    Deletes and creates a database with one project, one group, and two classes of users. Hard-coded for 2017-2018
    season. Then loads models from the Impetus project. Note: The input files to this program are the output from a
    spamd export script located the dengue-data repo ( https://github.com/reichlab/dengue-data/blob/master/misc/cdc-csv-export.R )
    and are committed to https://epimodeling.springloops.io/project/156725/svn/source/browse/-/trunk%2Farchives%2Fdengue-reports%2Fdata-summaries
    They currently must be processed (currently by hand) via these rough steps:

        1. download template
        2. correct template header from 'bin_end_not_incl' to 'bin_end_notincl'
        3. delete files where first date (data_version_date) was before 0525
        4. for files with duplicate second dates (timezeros), keep the one with the most recent first date (data_version_date)
    """
    start_time = timeit.default_timer()
    data_dir = Path(data_dir)
    click.echo(f"* make_thai_moph_project_app(): data_dir={data_dir}, truths_csv_file={truths_csv_file}")

    project = Project.objects.filter(name=THAI_PROJECT_NAME).first()
    if project:
        click.echo("* Deleting existing project: {}".format(project))
        delete_project_iteratively(project)

    # create the Project (and Users if necessary), including loading the template and creating Targets
    po_user, _, mo_user, _, _, _ = get_or_create_super_po_mo_users(is_create_super=False)

    # !is_validate to bypass Impetus non-uniform bins: [0, 1), [1, 10), [10, 20), ..., [1990, 2000):
    project = create_project_from_json(Path('forecast_app/tests/projects/thai-project.json'), po_user)
    project.model_owners.add(mo_user)
    project.save()
    click.echo("* Created project: {}".format(project))

    # make the model
    forecast_model = make_model(project, mo_user)
    click.echo("* created model: {}".format(forecast_model))

    # create TimeZeros. NB: we skip existing TimeZeros in case we are loading new forecasts. for is_season_start and
    # season_name we use year transitions: the first 2017 we encounter -> start of that year, etc.
    seen_years = []  # indicates a year has been processed. used to determine season starts
    for cdc_csv_file, timezero_date, _, data_version_date in cdc_csv_components_from_data_dir(data_dir):
        timezero_year = timezero_date.year
        is_season_start = timezero_year not in seen_years
        if is_season_start:
            seen_years.append(timezero_year)

        found_time_zero = project.time_zero_for_timezero_date(timezero_date)
        if found_time_zero:
            click.echo(f"s (TimeZero exists)\t{cdc_csv_file}\t")  # 's' from load_cdc_csv_forecasts_from_dir()
            continue

        TimeZero.objects.create(project=project,
                                timezero_date=str(timezero_date),
                                data_version_date=str(data_version_date) if data_version_date else None,
                                is_season_start=(True if is_season_start else False),
                                season_name=(str(timezero_year) if is_season_start else None))
    click.echo("- created TimeZeros: {}".format(project.timezeros.all()))

    # load the truth
    click.echo("- loading truth values")
    load_truth_data(project, Path('utils/dengue-truth-table-script/truths.csv'), is_convert_na_none=True)

    # load data
    click.echo("* Loading forecasts")
    forecast_model = project.models.first()
    forecasts = load_cdc_csv_forecasts_from_dir(forecast_model, data_dir, None)  # season_start_year
    click.echo("- Loading forecasts: loaded {} forecast(s)".format(len(forecasts)))

    # done
    click.echo(f"* Done. time: {timeit.default_timer() - start_time}")