def _make_docs_project(user): """ Creates a project based on docs-project.json with forecasts from docs-predictions.json. """ found_project = Project.objects.filter(name=DOCS_PROJECT_NAME).first() if found_project: click.echo("* deleting previous project: {}".format(found_project)) found_project.delete() project = create_project_from_json( Path('forecast_app/tests/projects/docs-project.json'), user) # atomic project.name = DOCS_PROJECT_NAME project.save() load_truth_data( project, Path('forecast_app/tests/truth_data/docs-ground-truth.csv')) forecast_model = ForecastModel.objects.create(project=project, name='docs forecast model', abbreviation='docs_mod') time_zero = project.timezeros.filter( timezero_date=datetime.date(2011, 10, 2)).first() forecast = Forecast.objects.create(forecast_model=forecast_model, source='docs-predictions.json', time_zero=time_zero, notes="a small prediction file") with open('forecast_app/tests/predictions/docs-predictions.json') as fp: json_io_dict_in = json.load(fp) load_predictions_from_json_io_dict(forecast, json_io_dict_in, False) # atomic cache_forecast_metadata(forecast) # atomic return project, time_zero, forecast_model, forecast
def test_cache_forecast_metadata_predictions(self): self.assertEqual( 0, ForecastMetaPrediction.objects.filter( forecast=self.forecast).count()) cache_forecast_metadata(self.forecast) forecast_meta_prediction_qs = ForecastMetaPrediction.objects.filter( forecast=self.forecast) self.assertEqual(1, forecast_meta_prediction_qs.count()) meta_cache_prediction = forecast_meta_prediction_qs.first( ) # only one row self.assertEqual(11, meta_cache_prediction.point_count) self.assertEqual(2, meta_cache_prediction.named_count) self.assertEqual(16, meta_cache_prediction.bin_count) self.assertEqual(23, meta_cache_prediction.sample_count) self.assertEqual(10, meta_cache_prediction.quantile_count) # second run first deletes existing rows, resulting in the same number as before cache_forecast_metadata(self.forecast) self.assertEqual( 1, ForecastMetaPrediction.objects.filter( forecast=self.forecast).count())
def update(project_pk, no_enqueue): """ A subcommand that updates all one or all projects' forecast metadata. :param project_pk: if a valid Project pk then only that project's metadata is updated. o/w updates all :param no_enqueue: controls whether the update will be immediate in the calling thread (blocks), or enqueued for RQ """ from forecast_repo.settings.base import CACHE_FORECAST_METADATA_QUEUE_NAME # avoid circular imports queue = django_rq.get_queue(CACHE_FORECAST_METADATA_QUEUE_NAME) projects = [get_object_or_404(Project, pk=project_pk)] if project_pk else Project.objects.all() print("updating metadata") for project in projects: print(f"* {project}") for forecast_model in project.models.all(): print(f"- {forecast_model}") for forecast in forecast_model.forecasts.all(): if no_enqueue: print(f" = caching metadata (no enqueue): {forecast}") cache_forecast_metadata(forecast) else: print(f" = enqueuing caching metadata: {forecast}") queue.enqueue(_cache_forecast_metadata_worker, forecast.pk) print("update done")
def load_cdc_csv_forecast_file(season_start_year, forecast_model, cdc_csv_file_path, time_zero): """ Loads the passed cdc csv file into a new forecast_model Forecast for time_zero. NB: does not check if a Forecast already exists for time_zero and file_name. Is atomic so that an invalid forecast's data is not saved. :param season_start_year :param forecast_model: the ForecastModel to create the new Forecast in :param cdc_csv_file_path: string or Path to a CDC CSV forecast file. the CDC CSV file format is documented at https://predict.cdc.gov/api/v1/attachments/flusight/flu_challenge_2016-17_update.docx :param time_zero: the TimeZero this forecast applies to :return returns a new Forecast for it :raises RuntimeError: if the data could not be loaded """ if time_zero not in forecast_model.project.timezeros.all(): raise RuntimeError( f"time_zero was not in project. time_zero={time_zero}, " f"project timezeros={forecast_model.project.timezeros.all()}") cdc_csv_file_path = Path(cdc_csv_file_path) file_name = cdc_csv_file_path.name new_forecast = Forecast.objects.create(forecast_model=forecast_model, time_zero=time_zero, source=file_name) with open(cdc_csv_file_path) as cdc_csv_file_fp: json_io_dict = json_io_dict_from_cdc_csv_file(season_start_year, cdc_csv_file_fp) load_predictions_from_json_io_dict(new_forecast, json_io_dict, is_validate_cats=False) # atomic cache_forecast_metadata(new_forecast) # atomic return new_forecast
def test_cache_forecast_metadata_second_forecast(self): # make sure only the passed forecast is cached forecast2 = Forecast.objects.create(forecast_model=self.forecast_model, source='docs-predictions.json', time_zero=self.time_zero, notes="a small prediction file") with open( 'forecast_app/tests/predictions/docs-predictions.json') as fp: json_io_dict_in = json.load(fp) load_predictions_from_json_io_dict(forecast2, json_io_dict_in, False) self.assertEqual( 0, ForecastMetaPrediction.objects.filter( forecast=self.forecast).count()) self.assertEqual( 0, ForecastMetaUnit.objects.filter(forecast=self.forecast).count()) self.assertEqual( 0, ForecastMetaTarget.objects.filter(forecast=self.forecast).count()) self.assertEqual( 0, ForecastMetaPrediction.objects.filter(forecast=forecast2).count()) self.assertEqual( 0, ForecastMetaUnit.objects.filter(forecast=forecast2).count()) self.assertEqual( 0, ForecastMetaTarget.objects.filter(forecast=forecast2).count()) cache_forecast_metadata(self.forecast) self.assertEqual( 1, ForecastMetaPrediction.objects.filter( forecast=self.forecast).count()) self.assertEqual( 3, ForecastMetaUnit.objects.filter(forecast=self.forecast).count()) self.assertEqual( 5, ForecastMetaTarget.objects.filter(forecast=self.forecast).count()) self.assertEqual( 0, ForecastMetaPrediction.objects.filter(forecast=forecast2).count()) self.assertEqual( 0, ForecastMetaUnit.objects.filter(forecast=forecast2).count()) self.assertEqual( 0, ForecastMetaTarget.objects.filter(forecast=forecast2).count())
def test_cache_forecast_metadata_targets(self): self.assertEqual(0, ForecastMetaTarget.objects.filter(forecast=self.forecast).count()) cache_forecast_metadata(self.forecast) forecast_meta_target_qs = ForecastMetaTarget.objects.filter(forecast=self.forecast) self.assertEqual(5, forecast_meta_target_qs.count()) self.assertEqual(set(self.project.targets.all()), set([fmt.target for fmt in forecast_meta_target_qs])) # second run first deletes existing rows, resulting in the same number as before cache_forecast_metadata(self.forecast) self.assertEqual(5, forecast_meta_target_qs.count())
def test_cache_forecast_metadata_clears_first(self): self.assertEqual(0, ForecastMetaPrediction.objects.filter(forecast=self.forecast).count()) self.assertEqual(0, ForecastMetaUnit.objects.filter(forecast=self.forecast).count()) self.assertEqual(0, ForecastMetaTarget.objects.filter(forecast=self.forecast).count()) # first run creates rows, second run first deletes existing rows, resulting in the same number as before for _ in range(2): cache_forecast_metadata(self.forecast) self.assertEqual(1, ForecastMetaPrediction.objects.filter(forecast=self.forecast).count()) self.assertEqual(3, ForecastMetaUnit.objects.filter(forecast=self.forecast).count()) self.assertEqual(5, ForecastMetaTarget.objects.filter(forecast=self.forecast).count()) clear_forecast_metadata(self.forecast) self.assertEqual(0, ForecastMetaPrediction.objects.filter(forecast=self.forecast).count()) self.assertEqual(0, ForecastMetaUnit.objects.filter(forecast=self.forecast).count()) self.assertEqual(0, ForecastMetaTarget.objects.filter(forecast=self.forecast).count())
def test_forecast_metadata_counts_for_f_ids(self): forecast2 = Forecast.objects.create(forecast_model=self.forecast_model, source='docs-predictions-non-dup.json', time_zero=self.time_zero, notes="a small prediction file") with open('forecast_app/tests/predictions/docs-predictions-non-dup.json') as fp: json_io_dict_in = json.load(fp) load_predictions_from_json_io_dict(forecast2, json_io_dict_in, is_validate_cats=False) cache_forecast_metadata(self.forecast) cache_forecast_metadata(forecast2) forecasts_qs = self.forecast_model.forecasts.all() forecast_id_to_counts = forecast_metadata_counts_for_f_ids(forecasts_qs) # f_id: [(point_count, named_count, bin_count, sample_count, quantile_count), num_names, num_targets] # { 4: [(11, 2, 6, 7, 3), 3, 5 ], # 5: [(11, 2, 6, 7, 3), 3, 5 ]} self.assertEqual(sorted([self.forecast.id, forecast2.id]), sorted(forecast_id_to_counts.keys())) self.assertEqual([(11, 2, 6, 7, 3), 3, 5], forecast_id_to_counts[self.forecast.id]) self.assertEqual([(11, 2, 6, 7, 3), 3, 5], forecast_id_to_counts[forecast2.id])
def test_metadata_for_forecast(self): cache_forecast_metadata(self.forecast) forecast_meta_prediction, forecast_meta_unit_qs, forecast_meta_target_qs = forecast_metadata(self.forecast) self.assertIsInstance(forecast_meta_prediction, ForecastMetaPrediction) self.assertEqual(11, forecast_meta_prediction.point_count) self.assertEqual(2, forecast_meta_prediction.named_count) self.assertEqual(6, forecast_meta_prediction.bin_count) self.assertEqual(7, forecast_meta_prediction.sample_count) self.assertEqual(3, forecast_meta_prediction.quantile_count) self.assertIsInstance(forecast_meta_unit_qs, QuerySet) self.assertEqual(3, len(forecast_meta_unit_qs)) self.assertEqual({ForecastMetaUnit}, set(map(type, forecast_meta_unit_qs))) self.assertIsInstance(forecast_meta_target_qs, QuerySet) self.assertEqual(5, len(forecast_meta_target_qs)) self.assertEqual({ForecastMetaTarget}, set(map(type, forecast_meta_target_qs)))
def test_target_rows_for_project(self): _, _, po_user, _, _, _, _, _ = get_or_create_super_po_mo_users( is_create_super=True) # recall that _make_docs_project() calls cache_forecast_metadata(): project, time_zero, forecast_model, forecast = _make_docs_project( po_user) # 2011, 10, 2 # case: one model with one timezero that has five groups of one target each. # recall: `group_targets(project.targets.all())` (only one target/group in this case): # {'pct next week': [(1, 'pct next week', 'continuous', True, 1, 'percent')], # 'cases next week': [(2, 'cases next week', 'discrete', True, 2, 'cases')], # 'season severity': [(3, 'season severity', 'nominal', False, None, None)], # 'above baseline': [(4, 'above baseline', 'binary', False, None, None)], # 'Season peak week': [(5, 'Season peak week', 'date', False, None, 'week')]} exp_rows = [(forecast_model, str(time_zero.timezero_date), forecast.id, 'Season peak week', 1), (forecast_model, str(time_zero.timezero_date), forecast.id, 'above baseline', 1), (forecast_model, str(time_zero.timezero_date), forecast.id, 'cases next week', 1), (forecast_model, str(time_zero.timezero_date), forecast.id, 'pct next week', 1), (forecast_model, str(time_zero.timezero_date), forecast.id, 'season severity', 1)] act_rows = [(row[0], str(row[1]), row[2], row[3], row[4]) for row in target_rows_for_project(project)] self.assertEqual(sorted(exp_rows, key=lambda _: _[0].id), sorted(act_rows, key=lambda _: _[0].id)) # case: add a second forecast for a newer timezero time_zero2 = TimeZero.objects.create(project=project, timezero_date=datetime.date( 2011, 10, 3)) forecast2 = Forecast.objects.create(forecast_model=forecast_model, source='docs-predictions.json', time_zero=time_zero2, notes="a small prediction file") with open( 'forecast_app/tests/predictions/docs-predictions.json') as fp: json_io_dict_in = json.load(fp) load_predictions_from_json_io_dict(forecast2, json_io_dict_in, False) cache_forecast_metadata( forecast2 ) # required by _forecast_ids_to_present_unit_or_target_id_sets() exp_rows = [(forecast_model, str(time_zero2.timezero_date), forecast2.id, 'Season peak week', 1), (forecast_model, str(time_zero2.timezero_date), forecast2.id, 'above baseline', 1), (forecast_model, str(time_zero2.timezero_date), forecast2.id, 'cases next week', 1), (forecast_model, str(time_zero2.timezero_date), forecast2.id, 'pct next week', 1), (forecast_model, str(time_zero2.timezero_date), forecast2.id, 'season severity', 1)] act_rows = [(row[0], str(row[1]), row[2], row[3], row[4]) for row in target_rows_for_project(project)] self.assertEqual(sorted(exp_rows, key=lambda _: _[0].id), sorted(act_rows, key=lambda _: _[0].id)) # case: add a second model with only forecasts for one target forecast_model2 = ForecastModel.objects.create( project=project, name=forecast_model.name + '2', abbreviation=forecast_model.abbreviation + '2') time_zero3 = TimeZero.objects.create(project=project, timezero_date=datetime.date( 2011, 10, 4)) forecast3 = Forecast.objects.create(forecast_model=forecast_model2, source='docs-predictions.json', time_zero=time_zero3, notes="a small prediction file") json_io_dict = { "meta": {}, "predictions": [{ "unit": "location1", "target": "pct next week", "class": "point", "prediction": { "value": 2.1 } }] } load_predictions_from_json_io_dict(forecast3, json_io_dict, False) cache_forecast_metadata( forecast3 ) # required by _forecast_ids_to_present_unit_or_target_id_sets() exp_rows = exp_rows + [(forecast_model2, str( time_zero3.timezero_date), forecast3.id, 'pct next week', 1)] act_rows = [(row[0], str(row[1]), row[2], row[3], row[4]) for row in target_rows_for_project(project)] self.assertEqual(sorted(exp_rows, key=lambda _: _[0].id), sorted(act_rows, key=lambda _: _[0].id)) # case: no forecasts forecast.delete() forecast2.delete() forecast3.delete() exp_rows = [(forecast_model, '', '', '', 0), (forecast_model2, '', '', '', 0)] act_rows = [(row[0], str(row[1]), row[2], row[3], row[4]) for row in target_rows_for_project(project)] self.assertEqual(sorted(exp_rows, key=lambda _: _[0].id), sorted(act_rows, key=lambda _: _[0].id))
def test_unit_rows_for_project(self): _, _, po_user, _, _, _, _, _ = get_or_create_super_po_mo_users( is_create_super=True) # recall that _make_docs_project() calls cache_forecast_metadata(): project, time_zero, forecast_model, forecast = _make_docs_project( po_user) # 2011, 10, 2 # case: one model with one timezero. recall rows: # (model, newest_forecast_tz_date, newest_forecast_id, # num_present_unit_names, present_unit_names, missing_unit_names): exp_rows = [(forecast_model, str(time_zero.timezero_date), forecast.id, 3, '(all)', '')] act_rows = [(row[0], str(row[1]), row[2], row[3], row[4], row[5]) for row in unit_rows_for_project(project)] self.assertEqual(exp_rows, act_rows) # case: add a second forecast for a newer timezero time_zero2 = TimeZero.objects.create(project=project, timezero_date=datetime.date( 2011, 10, 3)) forecast2 = Forecast.objects.create(forecast_model=forecast_model, source='docs-predictions.json', time_zero=time_zero2, notes="a small prediction file") with open( 'forecast_app/tests/predictions/docs-predictions.json') as fp: json_io_dict_in = json.load(fp) load_predictions_from_json_io_dict(forecast2, json_io_dict_in, False) cache_forecast_metadata( forecast2 ) # required by _forecast_ids_to_present_unit_or_target_id_sets() exp_rows = [(forecast_model, str(time_zero2.timezero_date), forecast2.id, 3, '(all)', '')] act_rows = [(row[0], str(row[1]), row[2], row[3], row[4], row[5]) for row in unit_rows_for_project(project)] self.assertEqual(exp_rows, act_rows) # case: add a second model with only forecasts for one unit forecast_model2 = ForecastModel.objects.create( project=project, name=forecast_model.name + '2', abbreviation=forecast_model.abbreviation + '2') time_zero3 = TimeZero.objects.create(project=project, timezero_date=datetime.date( 2011, 10, 4)) forecast3 = Forecast.objects.create(forecast_model=forecast_model2, source='docs-predictions.json', time_zero=time_zero3, notes="a small prediction file") json_io_dict = { "meta": {}, "predictions": [{ "unit": "location1", "target": "pct next week", "class": "point", "prediction": { "value": 2.1 } }] } load_predictions_from_json_io_dict(forecast3, json_io_dict, False) cache_forecast_metadata( forecast3 ) # required by _forecast_ids_to_present_unit_or_target_id_sets() exp_rows = [(forecast_model, str(time_zero2.timezero_date), forecast2.id, 3, '(all)', ''), (forecast_model2, str(time_zero3.timezero_date), forecast3.id, 1, 'location1', 'location2, location3')] act_rows = [(row[0], str(row[1]), row[2], row[3], row[4], row[5]) for row in unit_rows_for_project(project)] self.assertEqual(exp_rows, act_rows) # case: exposes bug: syntax error when no forecasts in project: # psycopg2.errors.SyntaxError: syntax error at or near ")" # LINE 6: WHERE f.id IN () forecast.delete() forecast2.delete() forecast3.delete() # (model, newest_forecast_tz_date, newest_forecast_id, num_present_unit_names, present_unit_names, # missing_unit_names): exp_rows = [(forecast_model, 'None', None, 0, '', '(all)'), (forecast_model2, 'None', None, 0, '', '(all)')] act_rows = [(row[0], str(row[1]), row[2], row[3], row[4], row[5]) for row in unit_rows_for_project(project)] self.assertEqual(sorted(exp_rows, key=lambda _: _[0].id), sorted(act_rows, key=lambda _: _[0].id))
def test_is_forecast_metadata_available(self): self.assertFalse(is_forecast_metadata_available(self.forecast)) cache_forecast_metadata(self.forecast) self.assertTrue(is_forecast_metadata_available(self.forecast))
def test_calc_interval_20_docs_project_additional_version(self): Score.ensure_all_scores_exist() interval_20_score = Score.objects.filter( abbreviation='interval_20').first() self.assertIsNotNone(interval_20_score) _, _, po_user, _, _, _, _, _ = get_or_create_super_po_mo_users( is_create_super=True) project, time_zero, forecast_model, forecast = _make_docs_project( po_user) unit_loc2 = project.units.filter(name='location2').first() targ_pct_next_wk = project.targets.filter( name='pct next week').first() # continuous unit_loc3 = project.units.filter(name='location3').first() targ_cases_next_wk = project.targets.filter( name='cases next week').first() # discrete # add two truths that result in two ScoreValues project.delete_truth_data() TruthData.objects.create(time_zero=time_zero, unit=unit_loc2, target=targ_pct_next_wk, value_f=2.2) # 2/7) TruthData.objects.create(time_zero=time_zero, unit=unit_loc3, target=targ_cases_next_wk, value_i=50) # 6/7 ScoreValue.objects \ .filter(score=interval_20_score, forecast__forecast_model=forecast_model) \ .delete() # usually done by update_score_for_model() _calculate_interval_score_values(interval_20_score, forecast_model, 0.5) self.assertEqual(2, interval_20_score.values.count()) self.assertEqual([2.8, 50], sorted(interval_20_score.values.all().values_list( 'value', flat=True))) # add a second forecast for a newer timezero time_zero2 = TimeZero.objects.create(project=project, timezero_date=datetime.date( 2011, 10, 3)) forecast2 = Forecast.objects.create(forecast_model=forecast_model, source='docs-predictions.json', time_zero=time_zero2, notes="a small prediction file") with open( 'forecast_app/tests/predictions/docs-predictions.json') as fp: json_io_dict_in = json.load(fp) load_predictions_from_json_io_dict(forecast2, json_io_dict_in, False) TruthData.objects.create(time_zero=time_zero2, unit=unit_loc2, target=targ_pct_next_wk, value_f=2.2) # 2/7) TruthData.objects.create(time_zero=time_zero2, unit=unit_loc3, target=targ_cases_next_wk, value_i=50) # 6/7 ScoreValue.objects \ .filter(score=interval_20_score, forecast__forecast_model=forecast_model) \ .delete() # usually done by update_score_for_model() _calculate_interval_score_values(interval_20_score, forecast_model, 0.5) self.assertEqual(4, interval_20_score.values.count()) # finally, add a new version to timezero forecast.issue_date = forecast.time_zero.timezero_date forecast.save() forecast2.issue_date = forecast2.time_zero.timezero_date forecast2.save() forecast2 = Forecast.objects.create(forecast_model=forecast_model, source='f2', time_zero=time_zero) with open( 'forecast_app/tests/predictions/docs-predictions.json') as fp: json_io_dict_in = json.load(fp) load_predictions_from_json_io_dict(forecast2, json_io_dict_in, False) # atomic cache_forecast_metadata(forecast2) # atomic # s/b no change from previous ScoreValue.objects \ .filter(score=interval_20_score, forecast__forecast_model=forecast_model) \ .delete() # usually done by update_score_for_model() # RuntimeError: >2 lower_upper_interval_values: [2.2, 2.2, 5.0, 5.0]. timezero_id=4, unit_id=5, target_id=6 _calculate_interval_score_values(interval_20_score, forecast_model, 0.5) self.assertEqual(4, interval_20_score.values.count())