def test_null_results(settings):
    # Override the celery settings
    settings.task_eager_propagates = (True, )
    settings.task_always_eager = (True, )

    challenge = ChallengeFactory()

    with mute_signals(post_save):
        user1 = UserFactory()
        queryset = (
            ResultFactory(
                job__submission__challenge=challenge,
                metrics={"a": 0.6},
                job__submission__creator=user1,
            ),
            ResultFactory(
                job__submission__challenge=challenge,
                metrics={"a": None},
                job__submission__creator=user1,
            ),
        )

    challenge.evaluation_config.score_jsonpath = "a"
    challenge.evaluation_config.result_display_choice = Config.ALL
    challenge.evaluation_config.save()

    expected_ranks = [1, 0]
    assert_ranks(queryset, expected_ranks)
def test_public_private_default():
    c = ChallengeFactory()

    r1 = ResultFactory(job__submission__challenge=c)

    assert r1.published == True

    c.evaluation_config.auto_publish_new_results = False
    c.evaluation_config.save()

    r2 = ResultFactory(job__submission__challenge=c)

    assert r2.published == False

    # The public/private status should only update on first save
    r1.save()
    assert r1.published == True
def test_public_private_default():
    c = ChallengeFactory()

    r1 = ResultFactory(challenge=c)

    assert r1.public == True

    c.evaluation_config.new_results_are_public = False
    c.evaluation_config.save()

    r2 = ResultFactory(challenge=c)

    assert r2.public == False

    # The public/private status should only update on first save
    r1.save()
    assert r1.public == True
def test_result_detail(client, EvalChallengeSet):
    submission = SubmissionFactory(
        challenge=EvalChallengeSet.ChallengeSet.challenge,
        creator=EvalChallengeSet.ChallengeSet.participant,
    )
    job = JobFactory(submission=submission)
    result = ResultFactory(job=job)
    validate_open_view(
        viewname="evaluation:result-detail",
        challenge_set=EvalChallengeSet.ChallengeSet,
        reverse_kwargs={"pk": result.pk},
        client=client,
    )
Exemple #5
0
def test_setting_display_all_metrics(client, ChallengeSet):
    metrics = {"public": 3245.235, "secret": 4328.432, "extra": 2144.312}
    r = ResultFactory(metrics=metrics,
                      job__submission__challenge=ChallengeSet.challenge)

    ChallengeSet.challenge.evaluation_config.score_jsonpath = "public"
    ChallengeSet.challenge.evaluation_config.extra_results_columns = [{
        "title":
        "extra",
        "path":
        "extra",
        "order":
        "asc"
    }]
    ChallengeSet.challenge.evaluation_config.display_all_metrics = True
    ChallengeSet.challenge.evaluation_config.save()

    response = get_view_for_user(
        client=client,
        viewname="evaluation:result-detail",
        challenge=ChallengeSet.challenge,
        reverse_kwargs={"pk": r.pk},
    )

    assert response.status_code == 200
    assert str(metrics["public"]) in response.rendered_content
    assert str(metrics["extra"]) in response.rendered_content
    assert str(metrics["secret"]) in response.rendered_content

    ChallengeSet.challenge.evaluation_config.display_all_metrics = False
    ChallengeSet.challenge.evaluation_config.save()

    response = get_view_for_user(
        client=client,
        viewname="evaluation:result-detail",
        challenge=ChallengeSet.challenge,
        reverse_kwargs={"pk": r.pk},
    )

    assert response.status_code == 200
    assert str(metrics["public"]) in response.rendered_content
    assert str(metrics["extra"]) in response.rendered_content
    assert str(metrics["secret"]) not in response.rendered_content
def test_calculate_ranks(mocker):
    challenge = ChallengeFactory()
    challenge.evaluation_config.score_jsonpath = 'a'
    challenge.evaluation_config.save()
    with mute_signals(post_save):
        queryset = (
            ResultFactory(challenge=challenge, metrics={'a': 0.1}),
            ResultFactory(challenge=challenge, metrics={'a': 0.5}),
            ResultFactory(challenge=challenge, metrics={'a': 1.0}),
            ResultFactory(challenge=challenge, metrics={'a': 0.7}),
            ResultFactory(challenge=challenge, metrics={'a': 0.5}),
            ResultFactory(challenge=challenge, metrics={'a': 1.0}),
        )
    # An alternative implementation could be [4, 3, 1, 2, 3, 1] as there are
    # only 4 unique values, the current implementation is harsh on poor results
    expected_ranks = [6, 4, 1, 3, 4, 1]
    challenge = assert_ranks(challenge, expected_ranks, queryset)
    # now test reverse order
    challenge.evaluation_config.score_default_sort = challenge.evaluation_config.ASCENDING
    challenge.evaluation_config.save()
    expected_ranks = [1, 2, 5, 4, 2, 5]
    assert_ranks(challenge, expected_ranks, queryset)
def test_calculate_ranks(settings):
    # Override the celery settings
    settings.task_eager_propagates = (True, )
    settings.task_always_eager = (True, )

    challenge = ChallengeFactory()

    with mute_signals(post_save):
        queryset = (
            # Warning: Do not change this values without updating the
            # expected_ranks below.
            ResultFactory(
                job__submission__challenge=challenge,
                metrics={
                    "a": 0.0,
                    "b": 0.0
                },
            ),
            ResultFactory(
                job__submission__challenge=challenge,
                metrics={
                    "a": 0.5,
                    "b": 0.2
                },
            ),
            ResultFactory(
                job__submission__challenge=challenge,
                metrics={
                    "a": 1.0,
                    "b": 0.3
                },
            ),
            ResultFactory(
                job__submission__challenge=challenge,
                metrics={
                    "a": 0.7,
                    "b": 0.4
                },
            ),
            ResultFactory(
                job__submission__challenge=challenge,
                metrics={
                    "a": 0.5,
                    "b": 0.5
                },
            ),
            # Following two are invalid if relative ranking is used
            ResultFactory(job__submission__challenge=challenge,
                          metrics={"a": 1.0}),
            ResultFactory(job__submission__challenge=challenge,
                          metrics={"b": 0.3}),
            # Add a valid, but unpublished result
            ResultFactory(
                job__submission__challenge=challenge,
                metrics={
                    "a": 0.1,
                    "b": 0.1
                },
            ),
        )

        # Unpublish the result
        queryset[-1].published = False
        queryset[-1].save()

    expected = {
        Config.DESCENDING: {
            Config.ABSOLUTE: {
                Config.DESCENDING: {
                    "ranks": [6, 4, 1, 3, 4, 1, 0, 0],
                    "rank_scores": [6, 4, 1, 3, 4, 1, 0, 0],
                },
                Config.ASCENDING: {
                    "ranks": [6, 4, 1, 3, 4, 1, 0, 0],
                    "rank_scores": [6, 4, 1, 3, 4, 1, 0, 0],
                },
            },
            Config.MEDIAN: {
                Config.DESCENDING: {
                    "ranks": [5, 4, 1, 1, 1, 0, 0, 0],
                    "rank_scores": [5, 3.5, 2, 2, 2, 0, 0, 0],
                },
                Config.ASCENDING: {
                    "ranks": [3, 2, 1, 3, 5, 0, 0, 0],
                    "rank_scores": [3, 2.5, 2, 3, 4, 0, 0, 0],
                },
            },
            Config.MEAN: {
                Config.DESCENDING: {
                    "ranks": [5, 4, 1, 1, 1, 0, 0, 0],
                    "rank_scores": [5, 3.5, 2, 2, 2, 0, 0, 0],
                },
                Config.ASCENDING: {
                    "ranks": [3, 2, 1, 3, 5, 0, 0, 0],
                    "rank_scores": [3, 2.5, 2, 3, 4, 0, 0, 0],
                },
            },
        },
        Config.ASCENDING: {
            Config.ABSOLUTE: {
                Config.DESCENDING: {
                    "ranks": [1, 2, 5, 4, 2, 5, 0, 0],
                    "rank_scores": [1, 2, 5, 4, 2, 5, 0, 0],
                },
                Config.ASCENDING: {
                    "ranks": [1, 2, 5, 4, 2, 5, 0, 0],
                    "rank_scores": [1, 2, 5, 4, 2, 5, 0, 0],
                },
            },
            Config.MEDIAN: {
                Config.DESCENDING: {
                    "ranks": [2, 2, 5, 2, 1, 0, 0, 0],
                    "rank_scores": [3, 3, 4, 3, 1.5, 0, 0, 0],
                },
                Config.ASCENDING: {
                    "ranks": [1, 2, 4, 4, 3, 0, 0, 0],
                    "rank_scores": [1, 2, 4, 4, 3.5, 0, 0, 0],
                },
            },
            Config.MEAN: {
                Config.DESCENDING: {
                    "ranks": [2, 2, 5, 2, 1, 0, 0, 0],
                    "rank_scores": [3, 3, 4, 3, 1.5, 0, 0, 0],
                },
                Config.ASCENDING: {
                    "ranks": [1, 2, 4, 4, 3, 0, 0, 0],
                    "rank_scores": [1, 2, 4, 4, 3.5, 0, 0, 0],
                },
            },
        },
    }

    for score_method in (Config.ABSOLUTE, Config.MEDIAN, Config.MEAN):
        for a_order in (Config.DESCENDING, Config.ASCENDING):
            for b_order in (Config.DESCENDING, Config.ASCENDING):
                challenge.evaluation_config.score_jsonpath = "a"
                challenge.evaluation_config.scoring_method_choice = (
                    score_method)
                challenge.evaluation_config.score_default_sort = a_order
                challenge.evaluation_config.extra_results_columns = [{
                    "path":
                    "b",
                    "title":
                    "b",
                    "order":
                    b_order
                }]
                challenge.evaluation_config.save()

                assert_ranks(
                    queryset,
                    expected[a_order][score_method][b_order]["ranks"],
                    expected[a_order][score_method][b_order]["rank_scores"],
                )
def test_results_display(settings):
    # Override the celery settings
    settings.task_eager_propagates = (True, )
    settings.task_always_eager = (True, )

    challenge = ChallengeFactory()

    with mute_signals(post_save):
        user1 = UserFactory()
        user2 = UserFactory()
        queryset = (
            ResultFactory(
                job__submission__challenge=challenge,
                metrics={"b": 0.3},  # Invalid result
                job__submission__creator=user1,
            ),
            ResultFactory(
                job__submission__challenge=challenge,
                metrics={"a": 0.6},
                job__submission__creator=user1,
            ),
            ResultFactory(
                job__submission__challenge=challenge,
                metrics={"a": 0.4},
                job__submission__creator=user1,
            ),
            ResultFactory(
                job__submission__challenge=challenge,
                metrics={"a": 0.2},
                job__submission__creator=user1,
            ),
            ResultFactory(
                job__submission__challenge=challenge,
                metrics={"a": 0.1},
                job__submission__creator=user2,
            ),
            ResultFactory(
                job__submission__challenge=challenge,
                metrics={"a": 0.5},
                job__submission__creator=user2,
            ),
            ResultFactory(
                job__submission__challenge=challenge,
                metrics={"a": 0.3},
                job__submission__creator=user2,
            ),
        )

    challenge.evaluation_config.score_jsonpath = "a"
    challenge.evaluation_config.result_display_choice = Config.ALL
    challenge.evaluation_config.save()

    expected_ranks = [0, 1, 3, 5, 6, 2, 4]
    assert_ranks(queryset, expected_ranks)

    challenge.evaluation_config.result_display_choice = Config.MOST_RECENT
    challenge.evaluation_config.save()

    expected_ranks = [0, 0, 0, 2, 0, 0, 1]
    assert_ranks(queryset, expected_ranks)

    challenge.evaluation_config.result_display_choice = Config.BEST
    challenge.evaluation_config.save()

    expected_ranks = [0, 1, 0, 0, 0, 2, 0]
    assert_ranks(queryset, expected_ranks)

    # now test reverse order
    challenge.evaluation_config.score_default_sort = (
        challenge.evaluation_config.ASCENDING)
    challenge.evaluation_config.save()

    expected_ranks = [0, 0, 0, 2, 1, 0, 0]
    assert_ranks(queryset, expected_ranks)

    challenge.evaluation_config.result_display_choice = Config.MOST_RECENT
    challenge.evaluation_config.save()

    expected_ranks = [0, 0, 0, 1, 0, 0, 2]
    assert_ranks(queryset, expected_ranks)