def test_max_similarity(): """Tests that the maximum similarity is properly returned.""" submission = SubmissionFactory.build( similarity=0, mt_similarity=0, ) assert submission.max_similarity == 0 submission = SubmissionFactory.build( similarity=0.5, mt_similarity=0.6, ) assert submission.max_similarity == 0.6 submission = SubmissionFactory.build( similarity=0.5, mt_similarity=None, ) assert submission.max_similarity == 0.5 submission = SubmissionFactory.build( similarity=None, mt_similarity=None, ) assert submission.max_similarity == 0
def test_needs_scorelog(): """Tests if the submission needs to be logged or not.""" # Changing the STATE from UNTRANSLATED won't record any logs submission = SubmissionFactory.build( field=SubmissionFields.STATE, type=SubmissionTypes.NORMAL, old_value=UNTRANSLATED, new_value=TRANSLATED, ) assert not submission.needs_scorelog() # Changing other fields (or even STATE, in a different direction) should # need to record a score log submission = SubmissionFactory.build( field=SubmissionFields.STATE, type=SubmissionTypes.NORMAL, old_value=TRANSLATED, new_value=UNTRANSLATED, ) assert submission.needs_scorelog() submission = SubmissionFactory.build( field=SubmissionFields.TARGET, type=SubmissionTypes.SUGG_ADD, old_value=u'', new_value=u'', ) assert submission.needs_scorelog()
def test_create_evaluation_is_idempotent(self): s = SubmissionFactory( challenge=self.method.challenge, algorithm_image=self.algorithm_image, ) s.create_evaluation() assert AlgorithmEvaluation.objects.count() == 2
def test_submission_conversion(capsys, submission_file, settings): # Override the celery settings settings.task_eager_propagates = (True,) settings.task_always_eager = (True,) challenge = ChallengeFactory() test_set = challenge.imageset_set.get(phase=ImageSet.TESTING) with mute_signals(post_save): submission = SubmissionFactory( file__from_path=submission_file, challenge=challenge ) call_command("convertsubmissions", challenge.short_name) _, err = capsys.readouterr() assert err == "" annotation_set = AnnotationSet.objects.all()[0] assert annotation_set.submission == submission assert annotation_set.base == test_set images = annotation_set.images.all() assert len(images) == 1 assert images[0].name == "image10x10x10.mhd" with mute_signals(post_save): submission = SubmissionFactory( file__from_path=Path(__file__).parent.parent / "evaluation_tests" / "resources" / "submission.csv", challenge=challenge, ) call_command("convertsubmissions", challenge.short_name) _, err = capsys.readouterr() assert err == "" annotation_set = AnnotationSet.objects.all()[1] assert annotation_set.submission == submission assert annotation_set.base == test_set labels = annotation_set.labels assert len(labels) == 10 assert labels[0]["class"] == 0
def test_submission_time_limit(client, TwoChallengeSets): SubmissionFactory( challenge=TwoChallengeSets.ChallengeSet1.challenge, creator=TwoChallengeSets.ChallengeSet1.participant, ) def get_submission_view(): return get_view_for_user( viewname="evaluation:submission-create", challenge=TwoChallengeSets.ChallengeSet1.challenge, client=client, user=TwoChallengeSets.ChallengeSet1.participant, ) assert "make 9 more" in get_submission_view().rendered_content s = SubmissionFactory( challenge=TwoChallengeSets.ChallengeSet1.challenge, creator=TwoChallengeSets.ChallengeSet1.participant, ) s.created = timezone.now() - timedelta(hours=23) s.save() assert "make 8 more" in get_submission_view().rendered_content s = SubmissionFactory( challenge=TwoChallengeSets.ChallengeSet1.challenge, creator=TwoChallengeSets.ChallengeSet1.participant, ) s.created = timezone.now() - timedelta(hours=25) s.save() assert "make 8 more" in get_submission_view().rendered_content
def test_invoice_get_rates_inconsistent_scorelog_rates(member, store0): USER_RATE_ONE = 0.5 USER_RATE_TWO = 0.2 # Set some rate member.rate = USER_RATE_ONE member.review_rate = USER_RATE_ONE member.save() month = timezone.make_aware(timezone.datetime(2014, 4, 1)) submission_kwargs = { "store": store0, "unit": store0.units[0], "field": SubmissionFields.TARGET, "type": SubmissionTypes.NORMAL, "old_value": "foo", "new_value": "bar", "submitter": member, "translation_project": store0.translation_project, "creation_time": month, } scorelog_kwargs = { "wordcount": 1, "similarity": 0, "action_code": TranslationActionCodes.NEW, "creation_time": month, "user": member, "submission": SubmissionFactory(**submission_kwargs), } ScoreLogFactory(**scorelog_kwargs) # Alter rates, producing an inconsistent state when recording the ScoreLog member.rate = USER_RATE_TWO member.review_rate = USER_RATE_TWO member.save() submission_kwargs["unit"] = store0.units[1] scorelog_kwargs["submission"] = SubmissionFactory(**submission_kwargs) ScoreLogFactory(**scorelog_kwargs) invoice = Invoice(member, FAKE_CONFIG, month=month) with pytest.raises(ValueError) as e: invoice.get_rates() assert "Multiple rate values recorded for user %s" % ( member.username, ) in str(e.value)
def test_invoice_get_rates_inconsistent_scorelog_rates(member, store0): USER_RATE_ONE = 0.5 USER_RATE_TWO = 0.2 # Set some rate member.rate = USER_RATE_ONE member.review_rate = USER_RATE_ONE member.save() month = timezone.datetime(2014, 04, 01) submission_kwargs = { 'store': store0, 'unit': store0.units[0], 'field': SubmissionFields.TARGET, 'type': SubmissionTypes.NORMAL, 'old_value': 'foo', 'new_value': 'bar', 'submitter': member, 'translation_project': store0.translation_project, 'creation_time': month, } scorelog_kwargs = { 'wordcount': 1, 'similarity': 0, 'action_code': TranslationActionCodes.NEW, 'creation_time': month, 'user': member, 'submission': SubmissionFactory(**submission_kwargs), } ScoreLogFactory(**scorelog_kwargs) # Alter rates, producing an inconsistent state when recording the ScoreLog member.rate = USER_RATE_TWO member.review_rate = USER_RATE_TWO member.save() submission_kwargs['unit'] = store0.units[1] scorelog_kwargs['submission'] = SubmissionFactory(**submission_kwargs) ScoreLogFactory(**scorelog_kwargs) invoice = Invoice(member, FAKE_CONFIG, month=month) with pytest.raises(ValueError) as e: invoice.get_rates() assert ('Multiple rate values recorded for user %s' % (member.username, ) in e.value.message)
def test_submission_evaluation( client, evaluation_image, submission_file, settings ): # Override the celery settings settings.task_eager_propagates = (True,) settings.task_always_eager = (True,) settings.broker_url = ("memory://",) settings.backend = "memory" # Upload a submission and create a job dockerclient = docker.DockerClient( base_url=settings.CONTAINER_EXEC_DOCKER_BASE_URL ) eval_container, sha256 = evaluation_image method = MethodFactory( image__from_path=eval_container, image_sha256=sha256, ready=True ) # We should not be able to download methods response = client.get(method.image.url) assert response.status_code == 404 num_containers_before = len(dockerclient.containers.list()) num_volumes_before = len(dockerclient.volumes.list()) # This will create a job, and we'll wait for it to be executed submission = SubmissionFactory( file__from_path=submission_file, challenge=method.challenge ) # The evaluation method should clean up after itself assert len(dockerclient.volumes.list()) == num_volumes_before assert len(dockerclient.containers.list()) == num_containers_before # The evaluation method should return the correct answer assert len(submission.job_set.all()) == 1 assert submission.job_set.all()[0].result.metrics["acc"] == 0.5 # Try with a csv file submission = SubmissionFactory( file__from_path=Path(__file__).parent / "resources" / "submission.csv", challenge=method.challenge, ) assert len(submission.job_set.all()) == 1 assert submission.job_set.all()[0].result.metrics["acc"] == 0.5
def test_submission_download(client, two_challenge_sets): """Only the challenge admin should be able to download submissions.""" submission = SubmissionFactory( challenge=two_challenge_sets.challenge_set_1.challenge, creator=two_challenge_sets.challenge_set_1.participant, ) tests = [ # ( # image response + annotation response not test ground truth, # user # ) (403, None), (403, two_challenge_sets.challenge_set_1.non_participant), (403, two_challenge_sets.challenge_set_1.participant), (403, two_challenge_sets.challenge_set_1.participant1), (302, two_challenge_sets.challenge_set_1.creator), (302, two_challenge_sets.challenge_set_1.admin), (403, two_challenge_sets.challenge_set_2.non_participant), (403, two_challenge_sets.challenge_set_2.participant), (403, two_challenge_sets.challenge_set_2.participant1), (403, two_challenge_sets.challenge_set_2.creator), (403, two_challenge_sets.challenge_set_2.admin), (302, two_challenge_sets.admin12), (403, two_challenge_sets.participant12), (302, two_challenge_sets.admin1participant2), ] for test in tests: response = get_view_for_user( url=submission.file.url, client=client, user=test[1] ) assert response.status_code == test[0]
def test_is_similarity_taken_from_mt(similarity, mt_similarity): submission = SubmissionFactory.build(similarity=similarity, mt_similarity=mt_similarity) score_log = ScoreLogFactory.build(submission=submission) if submission.similarity < submission.mt_similarity: assert score_log.is_similarity_taken_from_mt() else: assert not score_log.is_similarity_taken_from_mt()
def test_submission_evaluation(client, evaluation_image, submission_file, settings): # Override the celery settings settings.task_eager_propagates = (True, ) settings.task_always_eager = (True, ) # Upload a submission and create an evaluation dockerclient = docker.DockerClient( base_url=settings.COMPONENTS_DOCKER_BASE_URL) eval_container, sha256 = evaluation_image method = MethodFactory(image__from_path=eval_container, image_sha256=sha256, ready=True) # We should not be able to download methods with pytest.raises(NotImplementedError): _ = method.image.url num_containers_before = len(dockerclient.containers.list()) num_volumes_before = len(dockerclient.volumes.list()) # This will create an evaluation, and we'll wait for it to be executed submission = SubmissionFactory(predictions_file__from_path=submission_file, challenge=method.challenge) # The evaluation method should clean up after itself assert len(dockerclient.volumes.list()) == num_volumes_before assert len(dockerclient.containers.list()) == num_containers_before # The evaluation method should return the correct answer assert len(submission.evaluation_set.all()) == 1 assert (submission.evaluation_set.first().outputs.get( interface__slug="metrics-json-file").value["acc"] == 0.5) # Try with a csv file submission = SubmissionFactory( predictions_file__from_path=Path(__file__).parent / "resources" / "submission.csv", challenge=method.challenge, ) assert len(submission.evaluation_set.all()) == 1 assert (submission.evaluation_set.first().outputs.get( interface__slug="metrics-json-file").value["acc"] == 0.5)
def test_create_job_for_submission(self): submission: Submission = SubmissionFactory() self.assertIsNone(submission.algorithm_job) create_algorithm_job_for_submission(submission) self.assertIsNotNone(submission.algorithm_job) self.assertIsNone(submission.evaluation_job) create_evaluation_job_for_submission(submission) self.assertIsNotNone(submission.evaluation_job)
def test_submission_evaluation(client, evaluation_image, submission_file): # Upload a submission and create a job dockerclient = docker.DockerClient(base_url=settings.DOCKER_BASE_URL) user = UserFactory() submission = SubmissionFactory(file__from_path=submission_file, creator=user) eval_container, sha256 = evaluation_image method = MethodFactory(image__from_path=eval_container, image_sha256=sha256, ready=True) # We should not be able to download methods response = client.get(method.image.url) assert response.status_code == 403 job = JobFactory(submission=submission, method=method) num_containers_before = len(dockerclient.containers.list()) num_volumes_before = len(dockerclient.volumes.list()) res = evaluate_submission(job=job) # The evaluation method should return the correct answer assert res["acc"] == 0.5 # The evaluation method should clean up after itself assert len(dockerclient.volumes.list()) == num_volumes_before assert len(dockerclient.containers.list()) == num_containers_before # Try with a csv file submission = SubmissionFactory( file__from_path=Path(__file__).parent / 'resources' / 'submission.csv', creator=user, ) job = JobFactory(submission=submission, method=method) res = evaluate_submission(job=job) assert res["acc"] == 0.5
def test_submission_detail(client, TwoChallengeSets): submission = SubmissionFactory( challenge=TwoChallengeSets.ChallengeSet1.challenge, creator=TwoChallengeSets.ChallengeSet1.participant, ) validate_admin_only_view( viewname="evaluation:submission-detail", two_challenge_set=TwoChallengeSets, reverse_kwargs={"pk": submission.pk}, client=client, )
def test_evaluation_detail(client, eval_challenge_set): submission = SubmissionFactory( challenge=eval_challenge_set.challenge_set.challenge, creator=eval_challenge_set.challenge_set.participant, ) e = EvaluationFactory(submission=submission) validate_open_view( viewname="evaluation:detail", challenge_set=eval_challenge_set.challenge_set, reverse_kwargs={"pk": e.pk}, client=client, )
def test_algorithm_submission_creates_one_job_per_test_set_image(self): SubmissionFactory( challenge=self.method.challenge, algorithm_image=self.algorithm_image, ) assert AlgorithmEvaluation.objects.count() == 2 assert [ inpt.image for ae in AlgorithmEvaluation.objects.all() for inpt in ae.inputs.all() ] == self.images[:2]
def test_result_detail(client, EvalChallengeSet): submission = SubmissionFactory( challenge=EvalChallengeSet.ChallengeSet.challenge, creator=EvalChallengeSet.ChallengeSet.participant, ) job = JobFactory(submission=submission) result = ResultFactory(job=job) validate_open_view( viewname="evaluation:result-detail", challenge_set=EvalChallengeSet.ChallengeSet, reverse_kwargs={"pk": result.pk}, client=client, )
def test_unsuccessful_jobs_fail_evaluation(self): submission = SubmissionFactory() evaluation = EvaluationFactory(submission=submission) AlgorithmEvaluationFactory(status=AlgorithmEvaluation.SUCCESS, submission=submission) AlgorithmEvaluationFactory(status=AlgorithmEvaluation.FAILURE, submission=submission) set_evaluation_inputs(evaluation_pk=evaluation.pk) evaluation.refresh_from_db() assert evaluation.status == evaluation.FAILURE assert (evaluation.output == "The algorithm failed to execute on 1 images.")
def test_invoice_get_rates_inconsistent_paidtask_rates(member, task_type, task_type_name, user_rate_attr_name, store0): USER_RATE = 0.5 PAID_TASK_RATE = 0.2 # Set some user rate setattr(member, user_rate_attr_name, USER_RATE) member.save() month = timezone.make_aware(timezone.datetime(2014, 4, 1)) submission_kwargs = { "store": store0, "unit": store0.units[0], "field": SubmissionFields.TARGET, "type": SubmissionTypes.NORMAL, "old_value": "foo", "new_value": "bar", "submitter": member, "translation_project": store0.translation_project, "creation_time": month, } scorelog_kwargs = { "wordcount": 1, "similarity": 0, "action_code": TranslationActionCodes.NEW, "creation_time": month, "user": member, "submission": SubmissionFactory(**submission_kwargs), } paid_task_kwargs = { "rate": PAID_TASK_RATE, # Note how this doesn't match user's rate "datetime": month, "user": member, "task_type": task_type, } ScoreLogFactory(**scorelog_kwargs) PaidTaskFactory(**paid_task_kwargs) invoice = Invoice(member, FAKE_CONFIG, month=month) with pytest.raises(ValueError) as e: invoice.get_rates() assert "Multiple %s rate values for user %s" % ( task_type_name, member.username, ) in str(e.value)
def test_invoice_get_rates_inconsistent_paidtask_rates(member, task_type, task_type_name, user_rate_attr_name, store0): USER_RATE = 0.5 PAID_TASK_RATE = 0.2 # Set some user rate setattr(member, user_rate_attr_name, USER_RATE) member.save() month = timezone.datetime(2014, 04, 01) submission_kwargs = { 'store': store0, 'unit': store0.units[0], 'field': SubmissionFields.TARGET, 'type': SubmissionTypes.NORMAL, 'old_value': 'foo', 'new_value': 'bar', 'submitter': member, 'translation_project': store0.translation_project, 'creation_time': month, } scorelog_kwargs = { 'wordcount': 1, 'similarity': 0, 'action_code': TranslationActionCodes.NEW, 'creation_time': month, 'user': member, 'submission': SubmissionFactory(**submission_kwargs), } paid_task_kwargs = { 'rate': PAID_TASK_RATE, # Note how this doesn't match user's rate 'datetime': month, 'user': member, 'task_type': task_type, } ScoreLogFactory(**scorelog_kwargs) PaidTaskFactory(**paid_task_kwargs) invoice = Invoice(member, FAKE_CONFIG, month=month) with pytest.raises(ValueError) as e: invoice.get_rates() assert ('Multiple %s rate values for user %s' % (task_type_name, member.username) in e.value.message)
def test_set_evaluation_inputs(self): submission = SubmissionFactory() evaluation = EvaluationFactory(submission=submission) algorithms = AlgorithmEvaluationFactory.create_batch( 2, status=AlgorithmEvaluation.SUCCESS, submission=submission) civs = ComponentInterfaceValueFactory.create_batch(2) for alg, civ in zip(algorithms, civs): alg.outputs.set([civ]) set_evaluation_inputs(evaluation_pk=evaluation.pk) evaluation.refresh_from_db() assert evaluation.status == evaluation.PENDING assert evaluation.output == "" assert list(evaluation.inputs.all()) == civs
def test_job_detail(client, TwoChallengeSets): method = MethodFactory( challenge=TwoChallengeSets.ChallengeSet1.challenge, creator=TwoChallengeSets.ChallengeSet1.admin, ready=True, ) submission = SubmissionFactory( challenge=TwoChallengeSets.ChallengeSet1.challenge, creator=TwoChallengeSets.ChallengeSet1.participant, ) job = JobFactory(method=method, submission=submission) validate_admin_only_view( viewname="evaluation:job-detail", two_challenge_set=TwoChallengeSets, reverse_kwargs={"pk": job.pk}, client=client, )
def test_record_submission(member, submission_type, store0): unit = store0.units.first() submission_params = { "store": store0, "unit": unit, "field": SubmissionFields.TARGET, "type": submission_type, "old_value": unit.target, "new_value": "New target", "similarity": 0, "mt_similarity": 0, "submitter": member, "translation_project": store0.translation_project, "creation_time": timezone.now(), } sub = SubmissionFactory(**submission_params) assert ScoreLog.objects.filter(submission=sub).count() == 1
def test_invoice_get_rates_scorelog_rates(member, task_type, task_type_name, user_rate_attr_name, store0): """Tests that `Invoice.get_rates()` returns the rates set for users in their `ScoreLog` entries. """ USER_RATE_ONE = 0.5 USER_RATE_TWO = 0.2 # Set some user rate setattr(member, user_rate_attr_name, USER_RATE_ONE) member.save() month = timezone.datetime(2014, 04, 01) submission_kwargs = { 'store': store0, 'unit': store0.units[0], 'field': SubmissionFields.TARGET, 'type': SubmissionTypes.NORMAL, 'old_value': 'foo', 'new_value': 'bar', 'submitter': member, 'translation_project': store0.translation_project, 'creation_time': month, } scorelog_kwargs = { 'wordcount': 1, 'similarity': 0, 'action_code': TranslationActionCodes.NEW, 'creation_time': month, 'user': member, 'submission': SubmissionFactory(**submission_kwargs), } ScoreLogFactory(**scorelog_kwargs) invoice = Invoice(member, FAKE_CONFIG, month=month) # Set user rate to something else to ensure we get the recorded rates setattr(member, user_rate_attr_name, USER_RATE_TWO) member.save() rate, review_rate, hourly_rate = invoice.get_rates() assert locals()[user_rate_attr_name] == USER_RATE_ONE
def test_invoice_get_rates_scorelog_rates(member, task_type, task_type_name, user_rate_attr_name, store0): """Tests that `Invoice.get_rates()` returns the rates set for users in their `ScoreLog` entries. """ USER_RATE_ONE = 0.5 USER_RATE_TWO = 0.2 # Set some user rate setattr(member, user_rate_attr_name, USER_RATE_ONE) member.save() month = timezone.make_aware(timezone.datetime(2014, 4, 1)) submission_kwargs = { "store": store0, "unit": store0.units[0], "field": SubmissionFields.TARGET, "type": SubmissionTypes.NORMAL, "old_value": "foo", "new_value": "bar", "submitter": member, "translation_project": store0.translation_project, "creation_time": month, } scorelog_kwargs = { "wordcount": 1, "similarity": 0, "action_code": TranslationActionCodes.NEW, "creation_time": month, "user": member, "submission": SubmissionFactory(**submission_kwargs), } ScoreLogFactory(**scorelog_kwargs) invoice = Invoice(member, FAKE_CONFIG, month=month) # Set user rate to something else to ensure we get the recorded rates setattr(member, user_rate_attr_name, USER_RATE_TWO) member.save() rate, review_rate, hourly_rate = invoice.get_rates() assert locals()[user_rate_attr_name] == USER_RATE_ONE
def submission_and_job(*, challenge, creator): """ Creates a submission and a job for that submission """ s = SubmissionFactory(challenge=challenge, creator=creator) j = JobFactory(submission=s) return s, j
def submission_and_evaluation(*, challenge, creator): """Creates a submission and an evaluation for that submission.""" s = SubmissionFactory(challenge=challenge, creator=creator) e = EvaluationFactory(submission=s) return s, e