def test_unsuccessful_jobs_fail_evaluation(self): submission = SubmissionFactory() evaluation = EvaluationFactory(submission=submission) AlgorithmEvaluationFactory(status=AlgorithmEvaluation.SUCCESS, submission=submission) AlgorithmEvaluationFactory(status=AlgorithmEvaluation.FAILURE, submission=submission) set_evaluation_inputs(evaluation_pk=evaluation.pk) evaluation.refresh_from_db() assert evaluation.status == evaluation.FAILURE assert (evaluation.output == "The algorithm failed to execute on 1 images.")
def test_set_evaluation_inputs(self): submission = SubmissionFactory() evaluation = EvaluationFactory(submission=submission) algorithms = AlgorithmEvaluationFactory.create_batch( 2, status=AlgorithmEvaluation.SUCCESS, submission=submission) civs = ComponentInterfaceValueFactory.create_batch(2) for alg, civ in zip(algorithms, civs): alg.outputs.set([civ]) set_evaluation_inputs(evaluation_pk=evaluation.pk) evaluation.refresh_from_db() assert evaluation.status == evaluation.PENDING assert evaluation.output == "" assert list(evaluation.inputs.all()) == civs
def test_setting_display_all_metrics(client, challenge_set): metrics = {"public": 3245.235, "secret": 4328.432, "extra": 2144.312} e = EvaluationFactory( submission__challenge=challenge_set.challenge, status=Evaluation.SUCCESS, ) e.create_result(result=metrics) challenge_set.challenge.evaluation_config.score_jsonpath = "public" challenge_set.challenge.evaluation_config.extra_results_columns = [{ "title": "extra", "path": "extra", "order": "asc" }] challenge_set.challenge.evaluation_config.display_all_metrics = True challenge_set.challenge.evaluation_config.save() response = get_view_for_user( client=client, viewname="evaluation:detail", challenge=challenge_set.challenge, reverse_kwargs={"pk": e.pk}, ) assert response.status_code == 200 assert str(metrics["public"]) in response.rendered_content assert str(metrics["extra"]) in response.rendered_content assert str(metrics["secret"]) in response.rendered_content challenge_set.challenge.evaluation_config.display_all_metrics = False challenge_set.challenge.evaluation_config.save() response = get_view_for_user( client=client, viewname="evaluation:detail", challenge=challenge_set.challenge, reverse_kwargs={"pk": e.pk}, ) assert response.status_code == 200 assert str(metrics["public"]) in response.rendered_content assert str(metrics["extra"]) in response.rendered_content assert str(metrics["secret"]) not in response.rendered_content
def test_public_private_default(): c = ChallengeFactory() r1 = EvaluationFactory(submission__challenge=c) assert r1.published is True c.evaluation_config.auto_publish_new_results = False c.evaluation_config.save() r2 = EvaluationFactory(submission__challenge=c) assert r2.published is False # The public/private status should only update on first save r1.save() assert r1.published is True
def test_evaluation_detail(client, eval_challenge_set): submission = SubmissionFactory( challenge=eval_challenge_set.challenge_set.challenge, creator=eval_challenge_set.challenge_set.participant, ) e = EvaluationFactory(submission=submission) validate_open_view( viewname="evaluation:detail", challenge_set=eval_challenge_set.challenge_set, reverse_kwargs={"pk": e.pk}, client=client, )
def test_duration(): j = AlgorithmJobFactory() _ = EvaluationFactory() jbs = Job.objects.with_duration() assert jbs[0].duration is None assert Job.objects.average_duration() is None now = timezone.now() j.started_at = now - timedelta(minutes=5) j.completed_at = now j.save() jbs = Job.objects.with_duration() assert jbs[0].duration == timedelta(minutes=5) assert Job.objects.average_duration() == timedelta(minutes=5) _ = AlgorithmJobFactory() assert Job.objects.average_duration() == timedelta(minutes=5)
def test_null_results(): challenge = ChallengeFactory() results = [{"a": 0.6}, {"a": None}] queryset = [ EvaluationFactory(submission__challenge=challenge, status=Evaluation.SUCCESS) for _ in range(len(results)) ] for e, r in zip(queryset, results): e.create_result(result=r) challenge.evaluation_config.score_jsonpath = "a" challenge.evaluation_config.result_display_choice = Config.ALL challenge.evaluation_config.save() calculate_ranks(challenge_pk=challenge.pk) expected_ranks = [1, 0] assert_ranks(queryset, expected_ranks)
def test_results_display(): challenge = ChallengeFactory() user1 = UserFactory() user2 = UserFactory() metrics = "metrics" creator = "creator" results = [ { metrics: { "b": 0.3 }, creator: user1 }, # Invalid result { metrics: { "a": 0.6 }, creator: user1 }, { metrics: { "a": 0.4 }, creator: user1 }, { metrics: { "a": 0.2 }, creator: user1 }, { metrics: { "a": 0.1 }, creator: user2 }, { metrics: { "a": 0.5 }, creator: user2 }, { metrics: { "a": 0.3 }, creator: user2 }, ] queryset = [ EvaluationFactory( submission__challenge=challenge, submission__creator=r[creator], status=Evaluation.SUCCESS, ) for r in results ] for e, r in zip(queryset, results): e.create_result(result=r[metrics]) challenge.evaluation_config.score_jsonpath = "a" challenge.evaluation_config.result_display_choice = Config.ALL challenge.evaluation_config.save() calculate_ranks(challenge_pk=challenge.pk) expected_ranks = [0, 1, 3, 5, 6, 2, 4] assert_ranks(queryset, expected_ranks) challenge.evaluation_config.result_display_choice = Config.MOST_RECENT challenge.evaluation_config.save() calculate_ranks(challenge_pk=challenge.pk) expected_ranks = [0, 0, 0, 2, 0, 0, 1] assert_ranks(queryset, expected_ranks) challenge.evaluation_config.result_display_choice = Config.BEST challenge.evaluation_config.save() calculate_ranks(challenge_pk=challenge.pk) expected_ranks = [0, 1, 0, 0, 0, 2, 0] assert_ranks(queryset, expected_ranks) # now test reverse order challenge.evaluation_config.score_default_sort = ( challenge.evaluation_config.ASCENDING) challenge.evaluation_config.save() calculate_ranks(challenge_pk=challenge.pk) expected_ranks = [0, 0, 0, 2, 1, 0, 0] assert_ranks(queryset, expected_ranks) challenge.evaluation_config.result_display_choice = Config.MOST_RECENT challenge.evaluation_config.save() calculate_ranks(challenge_pk=challenge.pk) expected_ranks = [0, 0, 0, 1, 0, 0, 2] assert_ranks(queryset, expected_ranks)
def test_calculate_ranks(): challenge = ChallengeFactory() results = [ # Warning: Do not change this values without updating the # expected_ranks below. { "a": 0.0, "b": 0.0 }, { "a": 0.5, "b": 0.2 }, { "a": 1.0, "b": 0.3 }, { "a": 0.7, "b": 0.4 }, { "a": 0.5, "b": 0.5 }, # Following two are invalid if relative ranking is used { "a": 1.0 }, { "b": 0.3 }, # Add a valid, but unpublished result { "a": 0.1, "b": 0.1 }, ] queryset = [ EvaluationFactory(submission__challenge=challenge, status=Evaluation.SUCCESS) for _ in range(len(results)) ] for e, r in zip(queryset, results): e.create_result(result=r) # Unpublish the result queryset[-1].published = False queryset[-1].save() expected = { Config.DESCENDING: { Config.ABSOLUTE: { Config.DESCENDING: { "ranks": [6, 4, 1, 3, 4, 1, 0, 0], "rank_scores": [6, 4, 1, 3, 4, 1, 0, 0], }, Config.ASCENDING: { "ranks": [6, 4, 1, 3, 4, 1, 0, 0], "rank_scores": [6, 4, 1, 3, 4, 1, 0, 0], }, }, Config.MEDIAN: { Config.DESCENDING: { "ranks": [5, 4, 1, 1, 1, 0, 0, 0], "rank_scores": [5, 3.5, 2, 2, 2, 0, 0, 0], }, Config.ASCENDING: { "ranks": [3, 2, 1, 3, 5, 0, 0, 0], "rank_scores": [3, 2.5, 2, 3, 4, 0, 0, 0], }, }, Config.MEAN: { Config.DESCENDING: { "ranks": [5, 4, 1, 1, 1, 0, 0, 0], "rank_scores": [5, 3.5, 2, 2, 2, 0, 0, 0], }, Config.ASCENDING: { "ranks": [3, 2, 1, 3, 5, 0, 0, 0], "rank_scores": [3, 2.5, 2, 3, 4, 0, 0, 0], }, }, }, Config.ASCENDING: { Config.ABSOLUTE: { Config.DESCENDING: { "ranks": [1, 2, 5, 4, 2, 5, 0, 0], "rank_scores": [1, 2, 5, 4, 2, 5, 0, 0], }, Config.ASCENDING: { "ranks": [1, 2, 5, 4, 2, 5, 0, 0], "rank_scores": [1, 2, 5, 4, 2, 5, 0, 0], }, }, Config.MEDIAN: { Config.DESCENDING: { "ranks": [2, 2, 5, 2, 1, 0, 0, 0], "rank_scores": [3, 3, 4, 3, 1.5, 0, 0, 0], }, Config.ASCENDING: { "ranks": [1, 2, 4, 4, 3, 0, 0, 0], "rank_scores": [1, 2, 4, 4, 3.5, 0, 0, 0], }, }, Config.MEAN: { Config.DESCENDING: { "ranks": [2, 2, 5, 2, 1, 0, 0, 0], "rank_scores": [3, 3, 4, 3, 1.5, 0, 0, 0], }, Config.ASCENDING: { "ranks": [1, 2, 4, 4, 3, 0, 0, 0], "rank_scores": [1, 2, 4, 4, 3.5, 0, 0, 0], }, }, }, } for score_method in (Config.ABSOLUTE, Config.MEDIAN, Config.MEAN): for a_order in (Config.DESCENDING, Config.ASCENDING): for b_order in (Config.DESCENDING, Config.ASCENDING): challenge.evaluation_config.score_jsonpath = "a" challenge.evaluation_config.scoring_method_choice = ( score_method) challenge.evaluation_config.score_default_sort = a_order challenge.evaluation_config.extra_results_columns = [{ "path": "b", "title": "b", "order": b_order }] challenge.evaluation_config.save() calculate_ranks(challenge_pk=challenge.pk) assert_ranks( queryset, expected[a_order][score_method][b_order]["ranks"], expected[a_order][score_method][b_order]["rank_scores"], )
def submission_and_evaluation(*, challenge, creator): """Creates a submission and an evaluation for that submission.""" s = SubmissionFactory(challenge=challenge, creator=creator) e = EvaluationFactory(submission=s) return s, e
def test_mark_long_running_jobs_failed(): # Started jobs should be unaffected j1 = EvaluationFactory() j1.update_status(status=EvaluationJob.STARTED) # Long running jobs should be marked as failed j2 = EvaluationFactory() j2.update_status(status=EvaluationJob.STARTED) j2.started_at = timezone.now() - timedelta(days=1) j2.save() # A job that has not been started should not be marked as failed, even if # if it is outside the celery task limit j3 = EvaluationFactory() j3.created -= timedelta(days=1) j3.save() # Algorithm jobs should not be affected a = AlgorithmJobFactory() a.update_status(status=AlgorithmJob.STARTED) assert EvaluationJob.objects.all().count() == 3 assert (AlgorithmJob.objects.filter( status=AlgorithmJob.STARTED).count() == 1) assert (EvaluationJob.objects.filter( status=EvaluationJob.FAILURE).count() == 0) assert j1.status == EvaluationJob.STARTED assert j2.status == EvaluationJob.STARTED assert j3.status == EvaluationJob.PENDING assert a.status == AlgorithmJob.STARTED mark_long_running_jobs_failed(app_label="evaluation", model_name="evaluation") j1.refresh_from_db() j2.refresh_from_db() j3.refresh_from_db() a.refresh_from_db() assert j1.status == EvaluationJob.STARTED assert j2.status == EvaluationJob.FAILURE assert j3.status == EvaluationJob.PENDING assert a.status == AlgorithmJob.STARTED