def test_null_results(settings): # Override the celery settings settings.task_eager_propagates = (True, ) settings.task_always_eager = (True, ) challenge = ChallengeFactory() with mute_signals(post_save): user1 = UserFactory() queryset = ( ResultFactory( job__submission__challenge=challenge, metrics={"a": 0.6}, job__submission__creator=user1, ), ResultFactory( job__submission__challenge=challenge, metrics={"a": None}, job__submission__creator=user1, ), ) challenge.evaluation_config.score_jsonpath = "a" challenge.evaluation_config.result_display_choice = Config.ALL challenge.evaluation_config.save() expected_ranks = [1, 0] assert_ranks(queryset, expected_ranks)
def test_public_private_default(): c = ChallengeFactory() r1 = ResultFactory(job__submission__challenge=c) assert r1.published == True c.evaluation_config.auto_publish_new_results = False c.evaluation_config.save() r2 = ResultFactory(job__submission__challenge=c) assert r2.published == False # The public/private status should only update on first save r1.save() assert r1.published == True
def test_public_private_default(): c = ChallengeFactory() r1 = ResultFactory(challenge=c) assert r1.public == True c.evaluation_config.new_results_are_public = False c.evaluation_config.save() r2 = ResultFactory(challenge=c) assert r2.public == False # The public/private status should only update on first save r1.save() assert r1.public == True
def test_result_detail(client, EvalChallengeSet): submission = SubmissionFactory( challenge=EvalChallengeSet.ChallengeSet.challenge, creator=EvalChallengeSet.ChallengeSet.participant, ) job = JobFactory(submission=submission) result = ResultFactory(job=job) validate_open_view( viewname="evaluation:result-detail", challenge_set=EvalChallengeSet.ChallengeSet, reverse_kwargs={"pk": result.pk}, client=client, )
def test_setting_display_all_metrics(client, ChallengeSet): metrics = {"public": 3245.235, "secret": 4328.432, "extra": 2144.312} r = ResultFactory(metrics=metrics, job__submission__challenge=ChallengeSet.challenge) ChallengeSet.challenge.evaluation_config.score_jsonpath = "public" ChallengeSet.challenge.evaluation_config.extra_results_columns = [{ "title": "extra", "path": "extra", "order": "asc" }] ChallengeSet.challenge.evaluation_config.display_all_metrics = True ChallengeSet.challenge.evaluation_config.save() response = get_view_for_user( client=client, viewname="evaluation:result-detail", challenge=ChallengeSet.challenge, reverse_kwargs={"pk": r.pk}, ) assert response.status_code == 200 assert str(metrics["public"]) in response.rendered_content assert str(metrics["extra"]) in response.rendered_content assert str(metrics["secret"]) in response.rendered_content ChallengeSet.challenge.evaluation_config.display_all_metrics = False ChallengeSet.challenge.evaluation_config.save() response = get_view_for_user( client=client, viewname="evaluation:result-detail", challenge=ChallengeSet.challenge, reverse_kwargs={"pk": r.pk}, ) assert response.status_code == 200 assert str(metrics["public"]) in response.rendered_content assert str(metrics["extra"]) in response.rendered_content assert str(metrics["secret"]) not in response.rendered_content
def test_calculate_ranks(mocker): challenge = ChallengeFactory() challenge.evaluation_config.score_jsonpath = 'a' challenge.evaluation_config.save() with mute_signals(post_save): queryset = ( ResultFactory(challenge=challenge, metrics={'a': 0.1}), ResultFactory(challenge=challenge, metrics={'a': 0.5}), ResultFactory(challenge=challenge, metrics={'a': 1.0}), ResultFactory(challenge=challenge, metrics={'a': 0.7}), ResultFactory(challenge=challenge, metrics={'a': 0.5}), ResultFactory(challenge=challenge, metrics={'a': 1.0}), ) # An alternative implementation could be [4, 3, 1, 2, 3, 1] as there are # only 4 unique values, the current implementation is harsh on poor results expected_ranks = [6, 4, 1, 3, 4, 1] challenge = assert_ranks(challenge, expected_ranks, queryset) # now test reverse order challenge.evaluation_config.score_default_sort = challenge.evaluation_config.ASCENDING challenge.evaluation_config.save() expected_ranks = [1, 2, 5, 4, 2, 5] assert_ranks(challenge, expected_ranks, queryset)
def test_calculate_ranks(settings): # Override the celery settings settings.task_eager_propagates = (True, ) settings.task_always_eager = (True, ) challenge = ChallengeFactory() with mute_signals(post_save): queryset = ( # Warning: Do not change this values without updating the # expected_ranks below. ResultFactory( job__submission__challenge=challenge, metrics={ "a": 0.0, "b": 0.0 }, ), ResultFactory( job__submission__challenge=challenge, metrics={ "a": 0.5, "b": 0.2 }, ), ResultFactory( job__submission__challenge=challenge, metrics={ "a": 1.0, "b": 0.3 }, ), ResultFactory( job__submission__challenge=challenge, metrics={ "a": 0.7, "b": 0.4 }, ), ResultFactory( job__submission__challenge=challenge, metrics={ "a": 0.5, "b": 0.5 }, ), # Following two are invalid if relative ranking is used ResultFactory(job__submission__challenge=challenge, metrics={"a": 1.0}), ResultFactory(job__submission__challenge=challenge, metrics={"b": 0.3}), # Add a valid, but unpublished result ResultFactory( job__submission__challenge=challenge, metrics={ "a": 0.1, "b": 0.1 }, ), ) # Unpublish the result queryset[-1].published = False queryset[-1].save() expected = { Config.DESCENDING: { Config.ABSOLUTE: { Config.DESCENDING: { "ranks": [6, 4, 1, 3, 4, 1, 0, 0], "rank_scores": [6, 4, 1, 3, 4, 1, 0, 0], }, Config.ASCENDING: { "ranks": [6, 4, 1, 3, 4, 1, 0, 0], "rank_scores": [6, 4, 1, 3, 4, 1, 0, 0], }, }, Config.MEDIAN: { Config.DESCENDING: { "ranks": [5, 4, 1, 1, 1, 0, 0, 0], "rank_scores": [5, 3.5, 2, 2, 2, 0, 0, 0], }, Config.ASCENDING: { "ranks": [3, 2, 1, 3, 5, 0, 0, 0], "rank_scores": [3, 2.5, 2, 3, 4, 0, 0, 0], }, }, Config.MEAN: { Config.DESCENDING: { "ranks": [5, 4, 1, 1, 1, 0, 0, 0], "rank_scores": [5, 3.5, 2, 2, 2, 0, 0, 0], }, Config.ASCENDING: { "ranks": [3, 2, 1, 3, 5, 0, 0, 0], "rank_scores": [3, 2.5, 2, 3, 4, 0, 0, 0], }, }, }, Config.ASCENDING: { Config.ABSOLUTE: { Config.DESCENDING: { "ranks": [1, 2, 5, 4, 2, 5, 0, 0], "rank_scores": [1, 2, 5, 4, 2, 5, 0, 0], }, Config.ASCENDING: { "ranks": [1, 2, 5, 4, 2, 5, 0, 0], "rank_scores": [1, 2, 5, 4, 2, 5, 0, 0], }, }, Config.MEDIAN: { Config.DESCENDING: { "ranks": [2, 2, 5, 2, 1, 0, 0, 0], "rank_scores": [3, 3, 4, 3, 1.5, 0, 0, 0], }, Config.ASCENDING: { "ranks": [1, 2, 4, 4, 3, 0, 0, 0], "rank_scores": [1, 2, 4, 4, 3.5, 0, 0, 0], }, }, Config.MEAN: { Config.DESCENDING: { "ranks": [2, 2, 5, 2, 1, 0, 0, 0], "rank_scores": [3, 3, 4, 3, 1.5, 0, 0, 0], }, Config.ASCENDING: { "ranks": [1, 2, 4, 4, 3, 0, 0, 0], "rank_scores": [1, 2, 4, 4, 3.5, 0, 0, 0], }, }, }, } for score_method in (Config.ABSOLUTE, Config.MEDIAN, Config.MEAN): for a_order in (Config.DESCENDING, Config.ASCENDING): for b_order in (Config.DESCENDING, Config.ASCENDING): challenge.evaluation_config.score_jsonpath = "a" challenge.evaluation_config.scoring_method_choice = ( score_method) challenge.evaluation_config.score_default_sort = a_order challenge.evaluation_config.extra_results_columns = [{ "path": "b", "title": "b", "order": b_order }] challenge.evaluation_config.save() assert_ranks( queryset, expected[a_order][score_method][b_order]["ranks"], expected[a_order][score_method][b_order]["rank_scores"], )
def test_results_display(settings): # Override the celery settings settings.task_eager_propagates = (True, ) settings.task_always_eager = (True, ) challenge = ChallengeFactory() with mute_signals(post_save): user1 = UserFactory() user2 = UserFactory() queryset = ( ResultFactory( job__submission__challenge=challenge, metrics={"b": 0.3}, # Invalid result job__submission__creator=user1, ), ResultFactory( job__submission__challenge=challenge, metrics={"a": 0.6}, job__submission__creator=user1, ), ResultFactory( job__submission__challenge=challenge, metrics={"a": 0.4}, job__submission__creator=user1, ), ResultFactory( job__submission__challenge=challenge, metrics={"a": 0.2}, job__submission__creator=user1, ), ResultFactory( job__submission__challenge=challenge, metrics={"a": 0.1}, job__submission__creator=user2, ), ResultFactory( job__submission__challenge=challenge, metrics={"a": 0.5}, job__submission__creator=user2, ), ResultFactory( job__submission__challenge=challenge, metrics={"a": 0.3}, job__submission__creator=user2, ), ) challenge.evaluation_config.score_jsonpath = "a" challenge.evaluation_config.result_display_choice = Config.ALL challenge.evaluation_config.save() expected_ranks = [0, 1, 3, 5, 6, 2, 4] assert_ranks(queryset, expected_ranks) challenge.evaluation_config.result_display_choice = Config.MOST_RECENT challenge.evaluation_config.save() expected_ranks = [0, 0, 0, 2, 0, 0, 1] assert_ranks(queryset, expected_ranks) challenge.evaluation_config.result_display_choice = Config.BEST challenge.evaluation_config.save() expected_ranks = [0, 1, 0, 0, 0, 2, 0] assert_ranks(queryset, expected_ranks) # now test reverse order challenge.evaluation_config.score_default_sort = ( challenge.evaluation_config.ASCENDING) challenge.evaluation_config.save() expected_ranks = [0, 0, 0, 2, 1, 0, 0] assert_ranks(queryset, expected_ranks) challenge.evaluation_config.result_display_choice = Config.MOST_RECENT challenge.evaluation_config.save() expected_ranks = [0, 0, 0, 1, 0, 0, 2] assert_ranks(queryset, expected_ranks)