def _make_docs_project(user): """ Creates a project based on docs-project.json with forecasts from docs-predictions.json. """ found_project = Project.objects.filter(name=DOCS_PROJECT_NAME).first() if found_project: click.echo("* deleting previous project: {}".format(found_project)) found_project.delete() project = create_project_from_json( Path('forecast_app/tests/projects/docs-project.json'), user) # atomic project.name = DOCS_PROJECT_NAME project.save() load_truth_data( project, Path('forecast_app/tests/truth_data/docs-ground-truth.csv')) forecast_model = ForecastModel.objects.create(project=project, name='docs forecast model', abbreviation='docs_mod') time_zero = project.timezeros.filter( timezero_date=datetime.date(2011, 10, 2)).first() forecast = Forecast.objects.create(forecast_model=forecast_model, source='docs-predictions.json', time_zero=time_zero, notes="a small prediction file") with open('forecast_app/tests/predictions/docs-predictions.json') as fp: json_io_dict_in = json.load(fp) load_predictions_from_json_io_dict(forecast, json_io_dict_in, False) # atomic cache_forecast_metadata(forecast) # atomic return project, time_zero, forecast_model, forecast
def test_enqueue_update_scores_for_all_models(self): # tests that Score.enqueue_update_scores_for_all_models() should only enqueue scores for changed models # test that with ModelScoreChanges but no ScoreLastUpdate, all Score/ForecastModel pairs are updated with patch('rq.queue.Queue.enqueue') as enqueue_mock: Score.enqueue_update_scores_for_all_models(is_only_changed=True) self.assertEqual(len(SCORE_ABBREV_TO_NAME_AND_DESCR), enqueue_mock.call_count) # 6 scores * 1 model # make all ScoreLastUpdates be after self.forecast_model's update, which means none should update Score.ensure_all_scores_exist() for score in Score.objects.all(): score.set_last_update_for_forecast_model(self.forecast_model) with patch('rq.queue.Queue.enqueue') as enqueue_mock: Score.enqueue_update_scores_for_all_models(is_only_changed=True) enqueue_mock.assert_not_called() # same, but pass is_only_changed=False -> all Score/ForecastModel pairs should update with patch('rq.queue.Queue.enqueue') as enqueue_mock: Score.enqueue_update_scores_for_all_models(is_only_changed=False) self.assertEqual(len(SCORE_ABBREV_TO_NAME_AND_DESCR), enqueue_mock.call_count) # loading truth should result in all Score/ForecastModel pairs being updated load_truth_data(self.project, Path('forecast_app/tests/truth_data/truths-ok.csv'), is_convert_na_none=True) with patch('rq.queue.Queue.enqueue') as enqueue_mock: Score.enqueue_update_scores_for_all_models(is_only_changed=True) self.assertEqual(len(SCORE_ABBREV_TO_NAME_AND_DESCR), enqueue_mock.call_count)
def test_model_score_change_truths(self): project2 = Project.objects.create() make_cdc_units_and_targets(project2) # adding project truth should update all of its models' score_change.changed_at. test with no models -> ensure # Project._update_model_score_changes() is called with patch('forecast_app.models.Project._update_model_score_changes' ) as update_mock: load_truth_data( project2, Path('forecast_app/tests/truth_data/truths-ok.csv')) self.assertEqual( 2, update_mock.call_count ) # called once each: delete_truth_data(), load_truth_data() # adding project truth should update all of its models' score_change.changed_at. test with one model forecast_model2 = ForecastModel.objects.create(project=project2, name='name', abbreviation='abbrev') before_changed_at = forecast_model2.score_change.changed_at load_truth_data(project2, Path('forecast_app/tests/truth_data/truths-ok.csv')) forecast_model2.score_change.refresh_from_db() self.assertNotEqual(before_changed_at, forecast_model2.score_change.changed_at) # deleting project truth should update all of its models' score_change.changed_at before_changed_at = forecast_model2.score_change.changed_at project2.delete_truth_data() forecast_model2.score_change.refresh_from_db() self.assertNotEqual(before_changed_at, forecast_model2.score_change.changed_at)
def setUpTestData(cls): cls.project = Project.objects.create() make_cdc_units_and_targets(cls.project) cls.forecast_model = ForecastModel.objects.create( project=cls.project, name='name', abbreviation='abbrev') time_zero = TimeZero.objects.create( project=cls.project, timezero_date=(pymmwr.mmwr_week_to_date(2017, 1))) csv_file_path = Path( 'forecast_app/tests/model_error/ensemble/EW1-KoTstable-2017-01-17.csv' ) # EW01 2017 cls.forecast1 = load_cdc_csv_forecast_file(2016, cls.forecast_model, csv_file_path, time_zero) time_zero = TimeZero.objects.create( project=cls.project, timezero_date=(pymmwr.mmwr_week_to_date(2017, 2))) csv_file_path = Path( 'forecast_app/tests/model_error/ensemble/EW2-KoTstable-2017-01-23.csv' ) # EW02 2017 cls.forecast2 = load_cdc_csv_forecast_file(2016, cls.forecast_model, csv_file_path, time_zero) time_zero = TimeZero.objects.create( project=cls.project, timezero_date=(pymmwr.mmwr_week_to_date(2016, 51))) csv_file_path = Path( 'forecast_app/tests/model_error/ensemble/EW51-KoTstable-2017-01-03.csv' ) # EW51 2016 cls.forecast3 = load_cdc_csv_forecast_file(2016, cls.forecast_model, csv_file_path, time_zero) time_zero = TimeZero.objects.create( project=cls.project, timezero_date=(pymmwr.mmwr_week_to_date(2016, 52))) csv_file_path = Path( 'forecast_app/tests/model_error/ensemble/EW52-KoTstable-2017-01-09.csv' ) # EW52 2016 cls.forecast4 = load_cdc_csv_forecast_file(2016, cls.forecast_model, csv_file_path, time_zero) # 'mini' season for testing. from: # model_error_calculations.txt -> model_error_calculations.py -> model_error_calculations.xlsx: cls.exp_target_to_mae = { '1 wk ahead': 0.215904853, '2 wk ahead': 0.458186984, '3 wk ahead': 0.950515864, '4 wk ahead': 1.482010693 } load_truth_data( cls.project, Path('forecast_app/tests/truth_data/mean-abs-error-truths.csv')) # score needed for MAE calculation Score.ensure_all_scores_exist() cls.score = Score.objects.filter( abbreviation='abs_error').first() # hard-coded official abbrev cls.score.update_score_for_model(cls.forecast_model)
def fill_cdc_project(project, mo_user, is_public): project.description = "description" project.home_url = "http://example.com/" project.core_data = "http://example.com/" # make the Units and Targets via cdc-project.json (recall it has no timezeros) make_cdc_units_and_targets(project) # make two TimeZeros - one for ground truth, and one for the forecast's data: # EW1-KoTsarima-2017-01-17-small.csv -> pymmwr.date_to_mmwr_week(datetime.date(2017, 1, 17)) # EW01 2017 # -> {'year': 2017, 'week': 3, 'day': 3} time_zero1 = TimeZero.objects.create(project=project, timezero_date=datetime.date( 2017, 1, 17), data_version_date=None) TimeZero.objects.create(project=project, timezero_date=datetime.date(2017, 1, 24), data_version_date=None) # load ground truth load_truth_data( project, Path('forecast_app/tests/truth_data/2017-01-17-truths.csv'), is_convert_na_none=True) # create the two models click.echo("creating ForecastModel") forecast_model1 = ForecastModel.objects.create( project=project, name=f'Test ForecastModel1 ({"public" if is_public else "private"})', abbreviation='model1_abbrev', team_name='ForecastModel1 team', description="a ForecastModel for testing", home_url='http://example.com', owner=mo_user) # load the forecasts using a small data file csv_file_path = Path( 'forecast_app/tests/EW1-KoTsarima-2017-01-17-small.csv') # EW01 2017 click.echo( "* loading forecast into forecast_model={}, csv_file_path={}".format( forecast_model1, csv_file_path)) start_time = timeit.default_timer() forecast1 = load_cdc_csv_forecast_file(2016, forecast_model1, csv_file_path, time_zero1) click.echo(" loaded forecast={}. {}".format( forecast1, timeit.default_timer() - start_time)) ForecastModel.objects.create( project=project, name=f'Test ForecastModel2 ({"public" if is_public else "private"})', abbreviation='model2_abbrev', # team_name='ForecastModel2 team', # leave default ('') description="a second ForecastModel for testing", home_url='http://example.com', owner=mo_user)
def test__tz_unit_targ_pks_to_truth_values(self): # setup project = Project.objects.create() make_cdc_units_and_targets(project) # load truth only for the TimeZero in truths-2016-2017-reichlab.csv we're testing against time_zero = TimeZero.objects.create(project=project, timezero_date=datetime.date( 2017, 1, 1), is_season_start=True, season_name='season1') load_truth_data( project, Path( 'utils/ensemble-truth-table-script/truths-2016-2017-reichlab.csv' )) forecast_model = ForecastModel.objects.create(project=project, name='test model', abbreviation='abbrev') csv_file_path = Path( 'forecast_app/tests/EW1-KoTsarima-2017-01-17-small.csv' ) # EW01 2017 load_cdc_csv_forecast_file(2016, forecast_model, csv_file_path, time_zero) # test tz_pk = time_zero.pk loc1_pk = Unit.objects.filter(project=project, name='HHS Region 1').first().pk loc2_pk = Unit.objects.filter(project=project, name='HHS Region 2').first().pk loc3_pk = Unit.objects.filter(project=project, name='HHS Region 3').first().pk loc4_pk = Unit.objects.filter(project=project, name='HHS Region 4').first().pk loc5_pk = Unit.objects.filter(project=project, name='HHS Region 5').first().pk loc6_pk = Unit.objects.filter(project=project, name='HHS Region 6').first().pk loc7_pk = Unit.objects.filter(project=project, name='HHS Region 7').first().pk loc8_pk = Unit.objects.filter(project=project, name='HHS Region 8').first().pk loc9_pk = Unit.objects.filter(project=project, name='HHS Region 9').first().pk loc10_pk = Unit.objects.filter(project=project, name='HHS Region 10').first().pk loc11_pk = Unit.objects.filter(project=project, name='US National').first().pk target1_pk = Target.objects.filter(project=project, name='Season onset').first().pk target2_pk = Target.objects.filter(project=project, name='Season peak week').first().pk target3_pk = Target.objects.filter( project=project, name='Season peak percentage').first().pk target4_pk = Target.objects.filter(project=project, name='1 wk ahead').first().pk target5_pk = Target.objects.filter(project=project, name='2 wk ahead').first().pk target6_pk = Target.objects.filter(project=project, name='3 wk ahead').first().pk target7_pk = Target.objects.filter(project=project, name='4 wk ahead').first().pk exp_dict = { # {timezero_pk: {unit_pk: {target_id: truth_value}}} tz_pk: { loc1_pk: { target1_pk: ['2016-12-25'], target2_pk: [datetime.date(2017, 2, 5)], target3_pk: [3.19221], target4_pk: [1.52411], target5_pk: [1.73987], target6_pk: [2.06524], target7_pk: [2.51375] }, loc2_pk: { target1_pk: ['2016-11-20'], target2_pk: [datetime.date(2017, 2, 5)], target3_pk: [6.93759], target4_pk: [5.07086], target5_pk: [5.68166], target6_pk: [6.01053], target7_pk: [6.49829] }, loc3_pk: { target1_pk: ['2016-12-18'], target2_pk: [datetime.date(2017, 2, 12)], target3_pk: [5.20003], target4_pk: [2.81366], target5_pk: [3.09968], target6_pk: [3.45232], target7_pk: [3.73339] }, loc4_pk: { target1_pk: ['2016-11-13'], target2_pk: [datetime.date(2017, 2, 12)], target3_pk: [5.5107], target4_pk: [2.89395], target5_pk: [3.68564], target6_pk: [3.69188], target7_pk: [4.53169] }, loc5_pk: { target1_pk: ['2016-12-25'], target2_pk: [datetime.date(2017, 2, 12)], target3_pk: [4.31787], target4_pk: [2.11757], target5_pk: [2.4432], target6_pk: [2.76295], target7_pk: [3.182] }, loc6_pk: { target1_pk: ['2017-01-08'], target2_pk: [datetime.date(2017, 2, 5)], target3_pk: [9.87589], target4_pk: [4.80185], target5_pk: [5.26955], target6_pk: [6.10427], target7_pk: [8.13221] }, loc7_pk: { target1_pk: ['2016-12-25'], target2_pk: [datetime.date(2017, 2, 5)], target3_pk: [6.35948], target4_pk: [2.75581], target5_pk: [3.46528], target6_pk: [4.56991], target7_pk: [5.52653] }, loc8_pk: { target1_pk: ['2016-12-18'], target2_pk: [datetime.date(2017, 2, 12)], target3_pk: [2.72703], target4_pk: [1.90851], target5_pk: [2.2668], target6_pk: [2.07104], target7_pk: [2.27632] }, loc9_pk: { target1_pk: ['2016-12-18'], target2_pk: [datetime.date(2016, 12, 25)], target3_pk: [3.30484], target4_pk: [2.83778], target5_pk: [2.68071], target6_pk: [2.9577], target7_pk: [3.03987] }, loc10_pk: { target1_pk: ['2016-12-11'], target2_pk: [datetime.date(2016, 12, 25)], target3_pk: [3.67061], target4_pk: [2.15197], target5_pk: [3.25108], target6_pk: [2.51434], target7_pk: [2.28634] }, loc11_pk: { target1_pk: ['2016-12-11'], target2_pk: [datetime.date(2017, 2, 5)], target3_pk: [5.06094], target4_pk: [3.07623], target5_pk: [3.50708], target6_pk: [3.79872], target7_pk: [4.43601] } } } act_dict = _tz_unit_targ_pks_to_truth_values(forecast_model.project) self.assertEqual(exp_dict, act_dict)
def test_mae(self): project2 = Project.objects.create() make_cdc_units_and_targets(project2) TimeZero.objects.create(project=project2, timezero_date=datetime.date(2016, 10, 23), is_season_start=True, season_name='s1') TimeZero.objects.create(project=project2, timezero_date=datetime.date(2016, 10, 30)) TimeZero.objects.create(project=project2, timezero_date=datetime.date(2016, 11, 6)) forecast_model2 = ForecastModel.objects.create(project=project2, name='name', abbreviation='abbrev') load_cdc_csv_forecasts_from_dir( forecast_model2, Path('forecast_app/tests/load_forecasts'), 2016) load_truth_data( project2, Path( 'utils/ensemble-truth-table-script/truths-2016-2017-reichlab.csv' )) Score.ensure_all_scores_exist() score = Score.objects.filter( abbreviation='abs_error').first() # hard-coded official abbrev score.update_score_for_model(forecast_model2) score_value_rows_for_season = _score_value_rows_for_season( project2, 's1') self.assertEqual( 5 * 11, len(score_value_rows_for_season)) # 5 targets * 11 units # spot-check a unit exp_maes = [ 0.1830079332082548, 0.127335480231265, 0.040631614561185525, 0.09119562794624952, 0.15125133156909953 ] hhs1_loc = project2.units.filter(name='HHS Region 1').first() hhs1_loc_rows = filter(lambda row: row[0] == hhs1_loc.id, score_value_rows_for_season) act_maes = [row[-1] for row in hhs1_loc_rows] for exp_mae, act_mae in zip(exp_maes, act_maes): self.assertAlmostEqual(exp_mae, act_mae) # test unit_to_mean_abs_error_rows_for_project(), since we have a nice fixture loc_to_mae_rows_no_season = unit_to_mean_abs_error_rows_for_project( project2, None) self.assertEqual(loc_to_mae_rows_no_season, unit_to_mean_abs_error_rows_for_project( project2, 's1')) # season_name shouldn't matter self.assertEqual(set(project2.units.values_list('name', flat=True)), set(loc_to_mae_rows_no_season)) exp_rows = [[ 'Model', '1 wk ahead', '2 wk ahead', '3 wk ahead', '4 wk ahead', 'Season peak percentage' ], [ forecast_model2.pk, 0.127335480231265, 0.040631614561185525, 0.09119562794624952, 0.15125133156909953, 0.1830079332082548 ]] act_rows = loc_to_mae_rows_no_season[hhs1_loc.name][0] self.assertEqual(exp_rows[0], act_rows[0]) # header self.assertEqual(exp_rows[1][0], act_rows[1][0]) # model self.assertAlmostEqual(exp_rows[1][1], act_rows[1][1]) # 1 wk ahead self.assertAlmostEqual(exp_rows[1][2], act_rows[1][2]) self.assertAlmostEqual(exp_rows[1][3], act_rows[1][3]) self.assertAlmostEqual(exp_rows[1][4], act_rows[1][4]) self.assertAlmostEqual(exp_rows[1][5], act_rows[1][5]) target_spp = project2.targets.filter( name='Season peak percentage').first() target_1wk = project2.targets.filter(name='1 wk ahead').first() target_2wk = project2.targets.filter(name='2 wk ahead').first() target_3wk = project2.targets.filter(name='3 wk ahead').first() target_4wk = project2.targets.filter(name='4 wk ahead').first() exp_loc_to_min = { target_spp: 0.1830079332082548, target_1wk: 0.127335480231265, target_2wk: 0.040631614561185525, target_3wk: 0.09119562794624952, target_4wk: 0.15125133156909953 } act_loc_to_min = loc_to_mae_rows_no_season[hhs1_loc.name][1] self.assertAlmostEqual(exp_loc_to_min[target_spp], act_loc_to_min[target_spp.name]) self.assertAlmostEqual(exp_loc_to_min[target_1wk], act_loc_to_min[target_1wk.name]) self.assertAlmostEqual(exp_loc_to_min[target_2wk], act_loc_to_min[target_2wk.name]) self.assertAlmostEqual(exp_loc_to_min[target_3wk], act_loc_to_min[target_3wk.name]) self.assertAlmostEqual(exp_loc_to_min[target_4wk], act_loc_to_min[target_4wk.name])
def make_thai_moph_project_app(data_dir, truths_csv_file): """ Deletes and creates a database with one project, one group, and two classes of users. Hard-coded for 2017-2018 season. Then loads models from the Impetus project. Note: The input files to this program are the output from a spamd export script located the dengue-data repo ( https://github.com/reichlab/dengue-data/blob/master/misc/cdc-csv-export.R ) and are committed to https://epimodeling.springloops.io/project/156725/svn/source/browse/-/trunk%2Farchives%2Fdengue-reports%2Fdata-summaries They currently must be processed (currently by hand) via these rough steps: 1. download template 2. correct template header from 'bin_end_not_incl' to 'bin_end_notincl' 3. delete files where first date (data_version_date) was before 0525 4. for files with duplicate second dates (timezeros), keep the one with the most recent first date (data_version_date) """ start_time = timeit.default_timer() data_dir = Path(data_dir) click.echo(f"* make_thai_moph_project_app(): data_dir={data_dir}, truths_csv_file={truths_csv_file}") project = Project.objects.filter(name=THAI_PROJECT_NAME).first() if project: click.echo("* Deleting existing project: {}".format(project)) delete_project_iteratively(project) # create the Project (and Users if necessary), including loading the template and creating Targets po_user, _, mo_user, _, _, _ = get_or_create_super_po_mo_users(is_create_super=False) # !is_validate to bypass Impetus non-uniform bins: [0, 1), [1, 10), [10, 20), ..., [1990, 2000): project = create_project_from_json(Path('forecast_app/tests/projects/thai-project.json'), po_user) project.model_owners.add(mo_user) project.save() click.echo("* Created project: {}".format(project)) # make the model forecast_model = make_model(project, mo_user) click.echo("* created model: {}".format(forecast_model)) # create TimeZeros. NB: we skip existing TimeZeros in case we are loading new forecasts. for is_season_start and # season_name we use year transitions: the first 2017 we encounter -> start of that year, etc. seen_years = [] # indicates a year has been processed. used to determine season starts for cdc_csv_file, timezero_date, _, data_version_date in cdc_csv_components_from_data_dir(data_dir): timezero_year = timezero_date.year is_season_start = timezero_year not in seen_years if is_season_start: seen_years.append(timezero_year) found_time_zero = project.time_zero_for_timezero_date(timezero_date) if found_time_zero: click.echo(f"s (TimeZero exists)\t{cdc_csv_file}\t") # 's' from load_cdc_csv_forecasts_from_dir() continue TimeZero.objects.create(project=project, timezero_date=str(timezero_date), data_version_date=str(data_version_date) if data_version_date else None, is_season_start=(True if is_season_start else False), season_name=(str(timezero_year) if is_season_start else None)) click.echo("- created TimeZeros: {}".format(project.timezeros.all())) # load the truth click.echo("- loading truth values") load_truth_data(project, Path('utils/dengue-truth-table-script/truths.csv'), is_convert_na_none=True) # load data click.echo("* Loading forecasts") forecast_model = project.models.first() forecasts = load_cdc_csv_forecasts_from_dir(forecast_model, data_dir, None) # season_start_year click.echo("- Loading forecasts: loaded {} forecast(s)".format(len(forecasts))) # done click.echo(f"* Done. time: {timeit.default_timer() - start_time}")