Esempio n. 1
0
    def test_create_annotation_task_with_task_group(self):
        self.before_each()

        # make an AnnotationTaskTopicGroup that contains first two annotation tasks in self.tasks
        task_group = AnnotationTaskTopicGroup({
            'name': 'task_group_1',
            'description': 'first task group',
            'annotation_task_ids': [self.tasks[0].id, self.tasks[1].id],
            'arbitrary_tags': ["tag1", "tag2"],
            'topic_id': 1,
            'gold_annotator_user_ids': [1, 2]
        })
        self.task_groups = [task_group]
        db_session_users.add_all(self.task_groups)
        db_session_users.commit()
        for group in self.task_groups:
            db_session_users.refresh(group)

        # create two new annotation tasks that point point to this existing annotation_task_topic_group
        request_body_1 = json.dumps({
            'name': 'wat_1',
            'topics': {
                'banking': {'topic_id': 1, 'topic_table': 'topics'},
                'lending': {'topic_id': 2, 'topic_table': 'topics'}
            },
            'user_ids': [self.user.id],
            'term_sampling_group_ids': [1],
            'annotation_task_topic_group_id': self.task_groups[0].id
        })
        request_body_2 = json.dumps({
            'name': 'wat_2',
            'topics': {
                'banking': {'topic_id': 1, 'topic_table': 'topics'},
                'lending': {'topic_id': 2, 'topic_table': 'topics'}
            },
            'user_ids': [self.user.id],
            'term_sampling_group_ids': [1],
            'annotation_task_topic_group_id': self.task_groups[0].id
        })
        response_1 = self.client.post("/annotation_tasks", headers={'Authorization': self.admin_user_token},
                                    data=request_body_1)
        task_result_1 = response_1.json["annotation_task"]
        response_2 = self.client.post("/annotation_tasks", headers={'Authorization': self.admin_user_token},
                                    data=request_body_2)
        task_result_2 = response_2.json["annotation_task"]

        for group in self.task_groups:
            db_session_users.refresh(group)

        # check that these two tasks point to task group correctly
        task_from_db_1 = db_session_users.query(AnnotationTask).filter_by(id=task_result_1['id']).first()
        task_from_db_2 = db_session_users.query(AnnotationTask).filter_by(id=task_result_2['id']).first()
        self.assertEqual(task_from_db_1.annotation_task_topic_group_id, self.task_groups[0].id)
        self.assertEqual(task_from_db_2.annotation_task_topic_group_id, self.task_groups[0].id)

        # check that ORM query on the annotation_task_topic_group now gives the these two annotation tasks
        task_group_from_db = db_session_users.query(AnnotationTaskTopicGroup)\
                                             .filter_by(id=self.task_groups[0].id)\
                                             .first()
        self.assertEqual({task_from_db_1, task_from_db_2}, set(task_group_from_db.annotation_tasks))
Esempio n. 2
0
    def test_get_annotation_tasks_by_annotation_type(self):
        self.before_each()
        first_task = self.tasks[0]
        first_task.type = "contributor"
        first_task.is_contributor_task = True
        db_session_users.add(first_task)
        second_task = self.tasks[1]
        second_task.type = AnnotationTask.TOPIC_ANNOTATION_TYPE
        second_task.is_contributor_task = True
        second_task.user_ids = [self.contributor_user.id]
        db_session_users.add(second_task)
        new_task_no_flag = AnnotationTask({
            'name': 'watman',
            'topics': {
                'banking': {'topic_id': 1, 'topic_table': 'topics'},
                'lending': {'topic_id': 2, 'topic_table': 'topics'},
            },
            'type': AnnotationTask.TOPIC_ANNOTATION_TYPE,
            'is_contributor_task': False,
            'user_ids': [self.contributor_user.id]
        })
        db_session_users.add(new_task_no_flag)
        db_session_users.commit()
        db_session_users.refresh(new_task_no_flag)

        response = self.client.get("/annotation_tasks?type=topic_annotation", headers={'Authorization': self.contributor_user_token})
        self.assert200(response)
        self.assertIn("annotation_tasks", response.json)
        result_tasks = response.json["annotation_tasks"]
        self.assertIsInstance(result_tasks, list)
        self.assertEqual(len(result_tasks), 2)
        self.assertEqual(result_tasks[0]['id'], second_task.id)
        self.assertEqual(result_tasks[1]['id'], new_task_no_flag.id)
    def test_skip_flagged_document(self):
        doc_id = 3

        # first write a doc for review
        ufd = UserFlaggedDocument({
            'issue_severity': 'review',
            'issue_type': 'technical',
            'field': 'title',
            'notes': 'typo in title',
            'user_id': self.user.id,
            'doc_id': doc_id
        })
        db_session_users.add(ufd)
        db_session_users.commit()
        db_session_users.refresh(ufd)

        # then post a status update to skipped
        flagged_data = json.dumps(
            {'flagged': {
                'id': ufd.id,
                'status': 'skipped'
            }})
        response = self.client.post(
            "/documents/{}".format(doc_id),
            headers={'Authorization': self.qa_user_token},
            data=flagged_data)
        self.assert200(response)
        self.assertIn('document', response.json)
        self.assertEquals(response.json['document']['id'], doc_id)
        self.assertEquals(response.json['document']['issue_severity'],
                          'review')
        self.assertEquals(response.json['document']['issue_type'], 'technical')
        self.assertEquals(response.json['document']['status'], 'skipped')
        db_session_users.refresh(ufd)
        self.assertEquals(ufd.status, 'skipped')
    def test_update_user_folder_with_new_documents(self):
        # create a folder
        user_folder = UserFolderFactory(user_id=self.user.id)
        db_session_users.add(user_folder)
        db_session_users.commit()
        db_session_users.refresh(user_folder)
        # add single doc to folder
        folder = db_session_users.query(UserFolder).filter_by(
            user_id=self.user.id).first()

        request_body = json.dumps({
            'document_ids': [1, 2, 3],
            'folder_id': folder.id,
        })

        response = self.client.post("/documents",
                                    headers={'Authorization': self.token},
                                    data=request_body)

        self.assert200(response)
        self.assertIn('documents', response.json)
        self.assertEquals(response.json['documents'], {
            'ids': [1, 2, 3],
            'documents_added_to_folder': True
        })
    def test_remove_positive_tag_from_document(self):
        doc_id = 3
        self.before_each_tags()
        user_doc_tag = UserDocumentTag({
            'user_tag_id': self.user_tags[0].id,
            'is_positive': True,
            'display_style': 'modal',
            'doc_id': doc_id,
            'user_id': self.user.id
        })
        db_session_users.add(user_doc_tag)
        db_session_users.commit()

        request_data = {
            'tag': {
                'id': self.user_tags[0].id,
                'is_positive': False,
                'display_style': 'modal'
            }
        }
        response = self.client.post("/documents/{}".format(doc_id),
                                    headers={'Authorization': self.token},
                                    data=json.dumps(request_data))
        self.assert200(response)
        self.assertIn('document', response.json)
        for key in [
                'doc_id', 'user_id', 'user_tag_id', 'is_positive',
                'display_style'
        ]:
            self.assertIn(key, response.json['document'])

        self.assertEquals(response.json['document']['id'], doc_id)
        self.assertEquals(response.json['document']['user_tag_id'],
                          self.user_tags[0].id)
        self.assertEquals(response.json['document']['user_id'], self.user.id)
        self.assertFalse(response.json['document']['is_positive'])
        self.assertEquals(response.json['document']['display_style'], 'modal')

        # make sure it is updated (not new) in the db too
        db_session_users.refresh(user_doc_tag)
        self.assertFalse(user_doc_tag.is_positive)

        # now check that it does not show up in the get_document route
        response = self.client.get(
            "/documents/{}".format(doc_id),
            headers={'Authorization': self.token},
        )

        self.assert200(response)

        tag_ids = [t[0] for t in response.json['document']['tags']]
        self.assertNotIn(self.user_tags[0].id, tag_ids)
Esempio n. 6
0
    def before_each(self):
        # n.b. cleaning this out due to other test interference
        db_session_users.query(UserSharedFolder).delete()
        db_session_users.query(UserFolderDocument).delete()
        db_session_users.query(UserFolder).delete()

        # create a folder
        self.user_folder = UserFolder({
            "name": 'test_folder',
            'user_id': self.user.id,
        })
        db_session_users.add(self.user_folder)
        db_session_users.commit()
        db_session_users.refresh(self.user_folder)
Esempio n. 7
0
    def before_each(self, skip_user_entries=False):
        db_session_users.query(UserContributorPoint).delete()
        db_session_users.query(ContributorPointType).delete()
        self.contributor_point_types = []
        for contributor_type in CONTRIBUTOR_TYPE_TEMPLATES:
            self.contributor_point_types.append(
                ContributorPointType(contributor_type))

        if not skip_user_entries:
            # add one event for each type
            for s in self.contributor_point_types:
                s.user_contributor_points.append(
                    UserContributorPoint({
                        "user_id": self.user.id,
                        'num_points': s.points_per_action
                    }))

                # for weekly/anytime frequencies, add another basic option
                if s.frequency != 'onboarding':
                    # add another recent option
                    s.user_contributor_points.append(
                        UserContributorPoint({
                            "user_id": self.user.id,
                            'num_points': s.points_per_action
                        }))

        db_session_users.add_all(self.contributor_point_types)
        db_session_users.commit()

        if not skip_user_entries:
            # add another option from a year ago for anytime/weekly types that will get ignored for weekly but returned for anytime
            for s in self.contributor_point_types:
                if s.frequency != 'onboarding':
                    one_year_ago = dt.datetime.now() - dt.timedelta(weeks=52)
                    ucp = UserContributorPoint({
                        "user_id":
                        self.user.id,
                        'num_points':
                        s.points_per_action,
                        'contributor_point_type_id':
                        s.id
                    })
                    db_session_users.add(ucp)
                    db_session_users.commit()
                    ucp.created_at = one_year_ago  # n.b. need to do it like this to fudge the date
                    db_session_users.add(ucp)
                    db_session_users.commit()
                    db_session_users.refresh(ucp)
Esempio n. 8
0
    def test_get_annotation_task_group_tags_for_task(self):
        self.before_each()

        # create AnnotationTaskGroups objects
        # self.tasks[0] is contained in both task groups
        task_group_1 = AnnotationTaskTopicGroup({
            'name': 'task_group_1',
            'description': 'first task group',
            'annotation_task_ids': [self.tasks[0].id],
            'arbitrary_tags': ["tag1", "tag2"],
            'topic_id': 1
        })

        task_group_2 = AnnotationTaskTopicGroup({
            'name': 'task_group_2',
            'description': 'second task group',
            'annotation_task_ids': [self.tasks[0].id, self.tasks[1].id],
            'arbitrary_tags': ["tag2", "tag3"],
            'topic_id': 2
        })

        self.task_groups = [task_group_1, task_group_2]
        db_session_users.add_all(self.task_groups)
        db_session_users.commit()
        for tg in self.task_groups:
            db_session_users.refresh(tg)

        # make request for task contained in one task group
        response = self.client.get('/annotation_tasks/' + str(self.tasks[1].id) + '/task_group_labels',
                                   headers={'Authorization': self.admin_user_token})
        self.assert200(response)
        self.assertIn("annotation_task_group_tags", response.json)
        self.assertEqual(set(response.json["annotation_task_group_tags"]),
                         {"tag2", "tag3"})

        # make request for task contained in two task groups
        response = self.client.get('/annotation_tasks/' + str(self.tasks[0].id) + '/task_group_labels',
                                   headers={'Authorization': self.admin_user_token})
        self.assert200(response)
        self.assertIn("annotation_task_group_tags", response.json)
        self.assertEqual(set(response.json["annotation_task_group_tags"]),
                         {"tag1", "tag2", "tag3", "WARNING: MORE THAN ONE ANNOTATION TASK GROUP CONTAINS THIS TASK"})
Esempio n. 9
0
 def before_each(self):
     # n.b. cleaning this out due to other test interference
     db_session_users.query(TeamMember).delete() 
     db_session_users.query(Team).delete()    
     
     self.teams = [
         UserTeamFactory(name='test_team'),
         UserTeamFactory(name='foo_team_2')
     ]
     db_session_users.add_all(self.teams)
     db_session_users.commit()
     for team in self.teams:
         db_session_users.refresh(team)
     
     self.teamMembers = [
         UserTeamMemberFactory(user_id=self.user.id, team_id=self.teams[0].id),
         UserTeamMemberFactory(user_id=self.user.id, team_id=self.teams[1].id),
     ]
     db_session_users.add_all(self.teamMembers)
     db_session_users.commit()
     for tm in self.teamMembers:
         db_session_users.refresh(tm)
    def test_update_user_folder_with_existing_documents(self):
        # create a folder
        user_folder = UserFolderFactory(user_id=self.user.id)
        db_session_users.add(user_folder)
        db_session_users.commit()
        db_session_users.refresh(user_folder)
        # add a document to the folder
        folder = db_session_users.query(UserFolder).filter_by(
            user_id=self.user.id).first()
        user_folder_documents = [
            UserFolderDocument({
                "user_folder_id": folder.id,
                'doc_id': 1,
            }),
            UserFolderDocument({
                "user_folder_id": folder.id,
                'doc_id': 2,
            }),
            UserFolderDocument({
                "user_folder_id": folder.id,
                'doc_id': 3,
            }),
        ]

        db_session_users.add_all(user_folder_documents)
        db_session_users.commit()
        # add the doc that already exists in the folder
        request_body = json.dumps({
            'document_ids': [1, 2, 3],
            'folder_id': folder.id,
        })
        response = self.client.post("/documents",
                                    headers={'Authorization': self.token},
                                    data=request_body)

        self.assertStatus(response, 409)
        self.assertIn('errors', response.json)
Esempio n. 11
0
    def test_update_annotation_task_include_gold_annotations(self):
        self.before_each()

        ## test update to is_training_task ##

        # check original annotation task value for include_gold_annotations
        self.assertTrue(self.tasks[0].include_gold_annotations)

        # make update
        request_body = json.dumps({
            'include_gold_annotations': False
        })
        response = self.client.post('/annotation_tasks/' + str(self.tasks[0].id),
                                    headers={'Authorization': self.admin_user_token}, data=request_body)
        # check response
        self.assert200(response)
        self.assertIn('annotation_task', response.json)
        self.assertIsInstance(response.json['annotation_task'], dict)
        task_result = response.json['annotation_task']
        self.assertFalse(task_result['include_gold_annotations'])
        self.assertNotEqual(task_result['id'], self.tasks[0].id)  # check that new annotation_task was created
        db_session_users.refresh(self.tasks[0])
        self.assertEqual(self.tasks[0].status, 'inactive')  # check that old task is no longer active
        self.assertEqual(self.tasks[0].active_task_id, task_result['id'])  # check that old task points to new task
 def before_each(self):
     db_session_users.query(MarketingCampaignUsers).delete()
     db_session_users.query(MarketingCampaign).delete()
     m1 = MarketingCampaign({
         'name': 'foo',
         'start_date': '01/01/2017',
         'end_date': '01/06/2017',
         'created_by_user_id': self.user.id,
         'notes': 'yada yada yada'
     })
     m1.gen_token()
     m1.users.append(self.user)
     m2 = MarketingCampaign({
         'name': 'bar',
         'start_date': '01/01/2015',
         'end_date': '01/06/2015',
         'created_by_user_id': self.user.id
     })
     m2.gen_token()
     self.marketing_campaigns = [m1, m2]
     db_session_users.add_all(self.marketing_campaigns)
     db_session_users.commit()
     for m in self.marketing_campaigns:
         db_session_users.refresh(m)
    def before_each(self):
        db_session_users.query(TopicAnnotation).delete()
        db_session_users.query(AnnotationJob).delete()
        db_session_users.query(AnnotationTask).delete()
        t1 = AnnotationTask({
            'name': 'foo',
            'topics': {
                'banking': {'topic_id': 1, 'topic_table': 'topics'},
                'lending': {'topic_id': 2, 'topic_table': 'topics'},
            }
        })
        self.tasks = [t1]
        self.jobs = []
        self.annotations = []
        for i in xrange(1, 6):
            j1 = AnnotationJob({
                "doc_id": i,
                "priority": 1.0 if i % 2 == 0 else 0.5,
                "status": AnnotationJob.COMPLETE_STATUS,
                "user_id": self.user.id,
                "was_skipped": True if i % 2 == 0 else None
            })
            j1.annotation_task = t1
            self.jobs.append(j1)

            j1 = AnnotationJob({
                "doc_id": i,
                "priority": 1.0 if i % 2 == 0 else 0.5,
                "status": AnnotationJob.COMPLETE_STATUS,
                "user_id": self.admin_user.id,
                "was_skipped": None
            })
            j1.annotation_task = t1
            self.jobs.append(j1)

            a1 = TopicAnnotation({
                "doc_id": i,
                "is_positive": True if i % 2 == 0 else False,
                "user_id": self.user.id,
                "topic_name": "Lending"
            })
            a1.annotation_task = t1
            self.annotations.append(a1)

            a1 = TopicAnnotation({
                "doc_id": i,
                "is_positive": False if i % 2 == 0 else True,
                "user_id": self.admin_user.id,
                "topic_name": "Lending"
            })
            a1.annotation_task = t1
            self.annotations.append(a1)

        db_session_users.add_all(self.tasks)
        db_session_users.add_all(self.jobs)
        db_session_users.add_all(self.annotations)
        db_session_users.commit()
        for t in self.tasks:
            db_session_users.refresh(t)
        for j in self.jobs:
            db_session_users.refresh(j)
Esempio n. 14
0
    def test_update_annotation_task_is_onboarding_task(self):
        self.before_each()
        db_session_users.query(AnnotationJob).delete()

        # make annotation task group to contain the onboarding task
        task_group = AnnotationTaskTopicGroup({
            'name': 'task_group_1',
            'description': 'onboarding task group',
            'arbitrary_tags': ["tag1", "tag2"],
            'topic_id': 1,
            'gold_annotator_user_ids': [1, 2]
        })
        self.task_groups = [task_group]
        db_session_users.add_all(self.task_groups)
        db_session_users.commit()
        for group in self.task_groups:
            db_session_users.refresh(group)

        # create onboarding annotation task
        onboarding_task = AnnotationTask({
            'name': 'onboarding task',
            'topics': {
                'lending': {'topic_id': 2, 'topic_table': 'topics'},
            },
            'type': AnnotationTask.TOPIC_ANNOTATION_TYPE,
            'is_training_task': True,
            'annotation_task_topic_group_id': self.task_groups[0].id,
            'user_ids': [[self.user.id, self.new_user.id, self.qa_user.id]]
        })
        db_session_users.add(onboarding_task)
        db_session_users.commit()
        db_session_users.refresh(onboarding_task)

        # create job for this onboarding task (so that if this is not an onboarding task, new task will be created)
        onboarding_job = AnnotationJob({
            "doc_id": 1,
            "priority": 1.0,
            "status": AnnotationJob.QUEUED_STATUS,
            "is_gold_evaluation": True
        })
        onboarding_job.annotation_task = onboarding_task
        db_session_users.add(onboarding_job)
        db_session_users.commit()
        db_session_users.refresh(onboarding_job)

        # make update to users
        request_body = json.dumps({
            'user_ids': [self.user.id]
        })
        response = self.client.post('/annotation_tasks/' + str(onboarding_task.id),
                                    headers={'Authorization': self.admin_user_token}, data=request_body)

        # check result - no new task should be created, because this is an onboarding task
        self.assert200(response)
        self.assertIn('annotation_task', response.json)
        self.assertIsInstance(response.json['annotation_task'], dict)
        task_result = response.json['annotation_task']

        self.assertEqual(task_result['user_ids'], [self.user.id])
        self.assertEqual(task_result['id'], onboarding_task.id)  # check that no new annotation_task was created

        # update task to be not-onboarding
        onboarding_task.is_training_task = False
        db_session_users.add(onboarding_task)
        db_session_users.commit()
        db_session_users.refresh(onboarding_task)

        # make another user_ids update
        request_body = json.dumps({
            'user_ids': [self.user.id, self.new_user.id]
        })
        response = self.client.post('/annotation_tasks/' + str(onboarding_task.id),
                                    headers={'Authorization': self.admin_user_token}, data=request_body)

        # confirm that new task was created this time
        self.assert200(response)
        self.assertIn('annotation_task', response.json)
        self.assertIsInstance(response.json['annotation_task'], dict)
        task_result = response.json['annotation_task']

        self.assertEqual(task_result['user_ids'], [self.user.id, self.new_user.id])
        self.assertNotEqual(task_result['id'], onboarding_task.id)  # check that new annotation_task was created
Esempio n. 15
0
    def before_each(self, with_annotations=False):
        db_session_users.query(AnnotationTaskTermSamplingGroup).delete()
        db_session_users.query(TopicAnnotation).delete()
        db_session_users.query(AnnotationJob).delete()
        db_session_users.query(AnnotationTask).delete()
        db_session_users.query(AnnotationTaskTopicGroup).delete()

        t1 = AnnotationTask({
            'name': 'bar',
            'topics': {
                'banking': {'topic_id': 1, 'topic_table': 'topics'},
                'lending': {'topic_id': 2, 'topic_table': 'topics'},
            },
            'type': 'topic_annotation'
        })
        t2 = AnnotationTask({
            'name': 'foo',
            'topics': {
                'fintech': {'topic_id': 3, 'topic_table': 'topics'},
                'payments': {'topic_id': 4, 'topic_table': 'topics'},
            },
            "user_ids": [self.user.id, self.new_user.id, self.qa_user.id],
            "config": {
                "dummy-value": 1
            }
        })
        # n.b. add a third task that will be totally ignored
        t3 = AnnotationTask({
            'name': 'annotate any doc',
            'topics': {
                'banking': {'topic_id': 1, 'topic_table': 'topics'},
                'lending': {'topic_id': 2, 'topic_table': 'topics'},
            },
            'type': AnnotationTask.ANNOTATE_ANY_DOCUMENT_TYPE
        })
        self.tasks = [t1, t2]
        self.jobs = []
        for i in xrange(1, 5):
            j1 = AnnotationJob({
                "doc_id": 1,
                "priority": 1.0,
                "status": AnnotationJob.QUEUED_STATUS
            })
            j1.annotation_task = t1

            j2 = AnnotationJob({
                "doc_id": 1,
                "priority": 1.0,
                "status": AnnotationJob.QUEUED_STATUS
            })
            j2.annotation_task = t2

            self.jobs.append(j1)
            self.jobs.append(j2)

            if with_annotations:
                ta1 = TopicAnnotation({"doc_id": 1, "topic_name": "test", "is_positive": True, "user_id": self.user.id})
                ta1.annotation_job = j1
                ta1.annotation_task = t1
                ta2 = TopicAnnotation({"doc_id": 3, "topic_name": "test2", "is_positive": True, "user_id": self.user.id})
                ta2.annotation_job = j2
                ta2.annotation_task = t2
                db_session_users.add_all([ta1, ta2])

        db_session_users.add_all(self.tasks)
        db_session_users.add(t3)
        db_session_users.add_all(self.jobs)
        db_session_users.commit()
        for t in self.tasks:
            db_session_users.refresh(t)
Esempio n. 16
0
    def test_update_annotation_task_is_training_task(self):
        # TODO: this behavior creates new task even in case when illegal is_training_task update occurs;
        # need to either make this impossible in API side or enforce that this does not happen on frontend
        self.before_each()


        ## test update to is_training_task ##

        # check original annotation task value for is_training_task
        self.assertFalse(self.tasks[0].is_training_task)

        # make update when annotation_task_topic_group_id is NOT set (so is_training_task cannot be True)
        request_body = json.dumps({
            'is_training_task': True
        })
        response = self.client.post('/annotation_tasks/' + str(self.tasks[0].id),
                                    headers={'Authorization': self.admin_user_token}, data=request_body)
        # check response
        self.assert200(response)
        self.assertIn('annotation_task', response.json)
        self.assertIsInstance(response.json['annotation_task'], dict)
        task_result = response.json['annotation_task']
        self.assertFalse(task_result['is_training_task'])  # since annotation_task_topic_group_id is NOT set
        self.assertNotEqual(task_result['id'], self.tasks[0].id)  # check that new annotation_task was created
        db_session_users.refresh(self.tasks[0])
        self.assertEqual(self.tasks[0].status, 'inactive')  # check that old task is no longer active
        self.assertEqual(self.tasks[0].active_task_id, task_result['id'])  # check that old task points to new task


        # update task to have an annotation_task_topic_group_id and do update again

        # make an AnnotationTaskTopicGroup that contains the second annotation task in self.tasks
        task_group = AnnotationTaskTopicGroup({
            'name': 'task_group_1',
            'description': 'first task group',
            'annotation_task_ids': [self.tasks[1].id],
            'arbitrary_tags': ["tag1", "tag2"],
            'topic_id': 1,
            'gold_annotator_user_ids': [1, 2]
        })
        self.task_groups = [task_group]
        db_session_users.add_all(self.task_groups)
        db_session_users.commit()
        for group in self.task_groups:
            db_session_users.refresh(group)

        # make second task point to this annotation_task_topic_group
        self.tasks[1].annotation_task_topic_group_id = self.task_groups[0].id
        db_session_users.commit()

        # make update when annotation_task_topic_group_id IS set (so is_training_task can be True)
        request_body = json.dumps({
            'is_training_task': True
        })
        response = self.client.post('/annotation_tasks/' + str(self.tasks[1].id),
                                    headers={'Authorization': self.admin_user_token}, data=request_body)
        # check response
        self.assert200(response)
        self.assertIn('annotation_task', response.json)
        self.assertIsInstance(response.json['annotation_task'], dict)
        task_result = response.json['annotation_task']
        self.assertTrue(task_result['is_training_task'])  # since annotation_task_topic_group_id IS set
        self.assertNotEqual(task_result['id'], self.tasks[1].id)  # check that new annotation_task was created
        db_session_users.refresh(self.tasks[1])
        self.assertEqual(self.tasks[1].status, 'inactive')  # check that old task is no longer active
        self.assertEqual(self.tasks[1].active_task_id, task_result['id'])  # check that old task points to new task
Esempio n. 17
0
    def test_update_annotation_task_is_training_task_include_gold_annotations_but_no_jobs(self):
        self.before_each()
        db_session_users.query(AnnotationJob).delete()


        # make an AnnotationTaskTopicGroup that contains the first annotation task in self.tasks
        task_group = AnnotationTaskTopicGroup({
            'name': 'task_group_1',
            'description': 'first task group',
            'annotation_task_ids': [self.tasks[0].id],
            'arbitrary_tags': ["tag1", "tag2"],
            'topic_id': 1,
            'gold_annotator_user_ids': [1, 2]
        })
        self.task_groups = [task_group]
        db_session_users.add_all(self.task_groups)
        db_session_users.commit()
        for group in self.task_groups:
            db_session_users.refresh(group)


        ## test update to is_training_task ##

        # check original annotation task value for is_training_task
        self.assertFalse(self.tasks[0].is_training_task)

        # make update when annotation_task_topic_group_id is NOT set (so is_training_task cannot be True)
        request_body = json.dumps({
            'is_training_task': True
        })
        response = self.client.post('/annotation_tasks/' + str(self.tasks[0].id),
                                    headers={'Authorization': self.admin_user_token}, data=request_body)
        # check response - update should NOT happen because task not in an annotation_task_topic_group
        self.assert200(response)
        self.assertIn('annotation_task', response.json)
        self.assertIsInstance(response.json['annotation_task'], dict)
        task_result = response.json['annotation_task']
        self.assertIsNone(task_result['annotation_task_topic_group_id'])
        self.assertFalse(task_result['is_training_task'])  # because task does not have annotation_task_topic_group_id
        self.assertEqual(task_result['id'], self.tasks[0].id)  # check that no new annotation_task was created


        # make update when annotation_task_topic_group_id is IS set (update should now go through)
        # NOTE: the annotation_task_topic_group_id here does not link to an actual annotation_task_topic_group
        self.tasks[0].annotation_task_topic_group_id = self.task_groups[0].id
        db_session_users.commit()

        request_body = json.dumps({
            'is_training_task': True
        })
        response = self.client.post('/annotation_tasks/' + str(self.tasks[0].id),
                                    headers={'Authorization': self.admin_user_token}, data=request_body)
        # check response - update should NOT happen because task not in an annotation_task_topic_group
        self.assert200(response)
        self.assertIn('annotation_task', response.json)
        self.assertIsInstance(response.json['annotation_task'], dict)
        task_result = response.json['annotation_task']
        self.assertEqual(task_result['annotation_task_topic_group_id'], self.task_groups[0].id)
        self.assertTrue(task_result['is_training_task'])  # because task DOES have annotation_task_topic_group_id
        self.assertEqual(task_result['id'], self.tasks[0].id)  # check that no new annotation_task was created


        ## test update to include_gold_annotations ##

        # check original annotation task value for include_gold_annotations
        self.assertTrue(self.tasks[0].include_gold_annotations)

        # make update
        request_body = json.dumps({
            'include_gold_annotations': False
        })
        response = self.client.post('/annotation_tasks/' + str(self.tasks[0].id),
                                    headers={'Authorization': self.admin_user_token}, data=request_body)

        # check response
        self.assert200(response)
        self.assertIn('annotation_task', response.json)
        self.assertIsInstance(response.json['annotation_task'], dict)
        task_result = response.json['annotation_task']
        self.assertFalse(task_result['include_gold_annotations'])
        self.assertEqual(task_result['id'], self.tasks[0].id)  # check that no new annotation_task was created
Esempio n. 18
0
    def before_each(self):
        # must delete tables in correct order to avoid key constraint error

        # see necessary ordering in test_annotation_tasks.py and test_annotation_jobs.py
        db_session_users.query(AggregatedAnnotations).delete()
        db_session_users.query(AnnotationTaskTermSamplingGroup).delete()
        db_session_users.query(TopicAnnotation).delete()
        db_session_users.query(AnnotationJob).delete()
        db_session_users.query(AnnotationTask).delete()
        db_session_users.query(AnnotationTaskTopicGroup).delete()

        self.valid_doc_ids = [3, 8, 12, 13,
                              14]  # valid doc_ids in ES index as of Nov 2017
        # (update if any of these become invalid)
        # more ids: [18, 21, 22, 160, 161]

        ###########################################
        # create AnnotationTask objects (3 total)
        ###########################################
        t1 = AnnotationTask({
            'name': 'bar',
            'topics': {
                'BSA/AML': {
                    'topic_id': 2,
                    'topic_table': 'topics'
                },
                'Lending': {
                    'topic_id': 1,
                    'topic_table': 'topics'
                },
            }
        })
        t2 = AnnotationTask({
            'name': 'foo',
            'topics': {
                'BSA/AML': {
                    'topic_id': 2,
                    'topic_table': 'topics'
                },
                'Lending': {
                    'topic_id': 1,
                    'topic_table': 'topics'
                },
            }
        })
        t3 = AnnotationTask({
            'name': 'fubar',
            'topics': {
                'BSA/AML': {
                    'topic_id': 2,
                    'topic_table': 'topics'
                },
                'Lending': {
                    'topic_id': 1,
                    'topic_table': 'topics'
                },
            }
        })

        self.tasks = [t1, t2, t3]

        db_session_users.add_all(self.tasks)
        db_session_users.commit()
        # do refresh to get current primary key ids of tasks
        for t in self.tasks:
            db_session_users.refresh(t)

        ##########################################
        # create AnnotationJob objects (10 total)
        ##########################################
        self.jobs = []
        for i in xrange(10):
            j1 = AnnotationJob({
                "doc_id":
                self.valid_doc_ids[i % len(self.valid_doc_ids)],  # round robin
                "priority":
                1.0,
                "arbitrary_tags": ["tag_{}".format(str(i))] if i <= 2 else [],
                "status":
                AnnotationJob.QUEUED_STATUS
            })
            j1.annotation_task = self.tasks[i % len(self.tasks)]  # round robin
            self.jobs.append(j1)

        db_session_users.add_all(self.jobs)
        db_session_users.commit()
        # refresh to get current primary key ids of jobs
        for j in self.jobs:
            db_session_users.refresh(j)

        ###############################################
        # create AnnotationTaskGroups object (1 total)
        ###############################################
        task_group = AnnotationTaskTopicGroup({
            'name':
            'task_group_1',
            'description':
            'first task group',
            'annotation_task_ids': [self.tasks[0].id, self.tasks[1].id],
            'arbitrary_tags': ["tag1", "tag2"],
            'topic_id':
            1
        })

        self.task_groups = [task_group]
        db_session_users.add_all(self.task_groups)
        db_session_users.commit()
        for tg in self.task_groups:
            db_session_users.refresh(tg)

        ###################################
        # create User objects (3 total)
        ###################################
        # for testing retrieval of user names as judges
        # only creates users if they don't already exist
        # N.B. in future use a factory for creating users for testing

        if db_session_users.query(User)\
                           .filter_by(email='*****@*****.**')\
                           .first() is not None:
            self.users = [
                          db_session_users.query(User)\
                                          .filter_by(email='*****@*****.**')\
                                          .first(),
                          db_session_users.query(User) \
                                          .filter_by(email='*****@*****.**') \
                                          .first(),
                          db_session_users.query(User)\
                                          .filter_by(email='*****@*****.**')\
                                          .first()
                          ]

        else:

            self.users = []

            user_1 = User({
                'first_name': 'Alice',
                'last_name': 'Smith',
                'email': '*****@*****.**',
                'password': '******'
            })
            self.users.append(user_1)

            user_2 = User({
                'first_name': 'Bob',
                'last_name': 'Doe',
                'email': '*****@*****.**',
                'password': '******'
            })
            self.users.append(user_2)

            user_3 = User({
                'first_name': 'Cindy',
                'last_name': 'Zeta',
                'email': '*****@*****.**',
                'password': '******'
            })
            self.users.append(user_3)

            db_session_users.add_all(self.users)
            db_session_users.commit()
            for us in self.users:
                db_session_users.refresh(us)

        ########################################################################
        # create TopicAnnotation objects (10 total, one for each AnnotationJob)
        ########################################################################

        # here there is a 1-to-1 correspondence between TopicAnnotations and AnnotationJobs
        self.topic_annotations = []

        # connect TopicAnnotation values with AnnotationJob values
        for i, job in enumerate(self.jobs):
            ta = TopicAnnotation({
                "doc_id":
                job.doc_id,
                "is_positive":
                True if i % 2 == 0 else False,
                "user_id":
                self.users[i % 3].id,
                "topic_name":
                "Lending",
                "annotation_task_id":
                self.tasks[i % len(self.tasks)].id
            })
            ta.annotation_job = job
            self.topic_annotations.append(ta)
        db_session_users.add_all(self.topic_annotations)
        db_session_users.commit()
        for ta in self.topic_annotations:
            db_session_users.refresh(ta)

        ##################################################
        # create AggregatedAnnotations objects (3 total)
        ##################################################
        # NB: AggregatedAnnotations object has foreign keys for annotation_task_group and topic_annotation
        # NB: this table is populated by a daemon job in actual database

        # all have same topic_id (1 for "Lending")
        self.aggregated_annotations = []

        # self.valid_doc_ids[0] includes jobs 1 and 6
        agg_annotation_1 = AggregatedAnnotations({
            'annotation_task_group_id':
            self.task_groups[0].id,
            'doc_id':
            self.valid_doc_ids[0],
            'topic_id':
            1,
            'is_gold_standard':
            True,
            'gold_topic_annotation_id':
            self.topic_annotations[0].id,
            'is_in_agreement':
            True,
            'is_active_for_gold_annotation':
            True,
            'gold_difficulty':
            "easy",
            'arbitrary_tags': ["tag_1", "tag_2"],
            'notes':
            "text_1"
        })

        # self.valid_doc_ids[0] includes jobs 2 and 7
        agg_annotation_2 = AggregatedAnnotations({
            'annotation_task_group_id':
            self.task_groups[0].id,
            'doc_id':
            self.valid_doc_ids[1],
            'topic_id':
            1,
            'is_gold_standard':
            True,
            'gold_topic_annotation_id':
            self.topic_annotations[1].id,
            'is_in_agreement':
            False,
            'is_active_for_gold_annotation':
            True,
            'gold_difficulty':
            "medium",
            'arbitrary_tags': ["tag_1", "tag_3"],
            'notes':
            "text_1"
        })

        # self.valid_doc_ids[1] includes jobs 3 and 8
        agg_annotation_3 = AggregatedAnnotations({
            'annotation_task_group_id':
            self.task_groups[0].id,
            'doc_id':
            self.valid_doc_ids[2],
            'topic_id':
            1,
            'is_gold_standard':
            True,
            'gold_topic_annotation_id':
            self.topic_annotations[2].id,
            'is_active_for_gold_annotation':
            True,
            'gold_difficulty':
            "hard",
            'arbitrary_tags': ["tag_1", "tag_2"],
            'notes':
            "text_1"
        })

        self.aggregated_annotations = [
            agg_annotation_1, agg_annotation_2, agg_annotation_3
        ]
        db_session_users.add_all(self.aggregated_annotations)
        db_session_users.commit()
        # do refresh to get current primary key ids of aggregated annotations
        for agt in self.aggregated_annotations:
            db_session_users.refresh(agt)
Esempio n. 19
0
    def test_update_research_mode_expanded_view(self):
        self.before_each()

        # check initial topic annotation
        initial_topic_ant = db_session_users.query(TopicAnnotation)\
                                            .filter_by(id=self.topic_annotations[0].id)\
                                            .first()
        self.assertTrue(initial_topic_ant.is_positive)
        self.assertIsNone(initial_topic_ant.admin_notes)
        self.assertEqual(initial_topic_ant.user_id, self.users[0].id)
        self.assertEqual(initial_topic_ant.details, {})

        # make update to topic_annotation
        request_body = json.dumps({
            'topic_annotation_id':
            self.topic_annotations[0].id,
            'topic_annotation_updates': {
                "is_positive": False,
                "user_id": self.users[1].id,
                "admin_notes": 'this tation was SUPER hard bruh'
            }
        })
        response = self.client.post(
            '/aggregated_annotations/research/' +
            str(self.aggregated_annotations[0].id),
            headers={'Authorization': self.admin_user_token},
            data=request_body)

        # check response
        self.assert200(response)
        self.assertTrue(response.json['success'])
        # check that update occurred
        updated_topic_ant = db_session_users.query(TopicAnnotation)\
                                            .filter_by(id=self.topic_annotations[0].id)\
                                            .first()
        updated_ant_job = db_session_users.query(AnnotationJob)\
                                          .filter_by(id=self.jobs[0].id)\
                                          .first()

        self.assertFalse(updated_topic_ant.is_positive)
        self.assertEqual(updated_topic_ant.admin_notes,
                         'this tation was SUPER hard bruh')
        self.assertEqual(updated_topic_ant.user_id, self.users[1].id)
        self.assertEqual(updated_ant_job.user_id, self.users[1].id)

        self.assertNotEqual(updated_topic_ant.details, {})
        self.assertEqual(
            updated_topic_ant.details['previous_annotators'][0]['is_positive'],
            True)
        self.assertEqual(
            updated_topic_ant.details['previous_annotators'][0]['user_id'],
            self.users[0].id)

        # check initial annotation job (gold standard annotation_job) and initial agg_annotation containing this job
        initial_ant_job = db_session_users.query(AnnotationJob)\
                                          .filter_by(id=self.jobs[0].id)\
                                          .first()
        self.assertEqual(initial_ant_job.arbitrary_tags, ['tag_0'])
        self.assertIsNone(initial_ant_job.user_difficulty)
        self.assertEqual(initial_ant_job.annotation_task_id, self.tasks[0].id)
        self.assertEqual(self.aggregated_annotations[0].arbitrary_tags,
                         ["tag_1", "tag_2"])
        self.assertEqual(self.aggregated_annotations[0].gold_difficulty,
                         'easy')

        # make update to annotation job for arbitrary_tags
        request_body = json.dumps({
            'topic_annotation_id':
            self.topic_annotations[0].id,
            'annotation_job_updates': {
                'arbitrary_tags': ['tag2', 'NOT ALLOWED TAG']
            }
        })
        response = self.client.post(
            '/aggregated_annotations/research/' +
            str(self.aggregated_annotations[0].id),
            headers={'Authorization': self.admin_user_token},
            data=request_body)
        # check response
        self.assert200(response)
        self.assertTrue(response.json['success'])
        # check that update occurred
        updated_ant_job = db_session_users.query(AnnotationJob)\
                                          .filter_by(id=self.jobs[0].id)\
                                          .first()
        self.assertEqual(
            updated_ant_job.arbitrary_tags,
            ['tag2'])  # since other tag not in AnnotationTaskTopicGroup
        self.assertIsNone(updated_ant_job.user_difficulty)
        # check that aggregated_annotation[0] had arbitrary_tags updated as well
        db_session_users.refresh(self.aggregated_annotations[0])
        self.assertEqual(self.aggregated_annotations[0].arbitrary_tags,
                         ['tag2'])

        # make second update to same annotation job for user_difficulty
        request_body = json.dumps({
            'topic_annotation_id':
            self.topic_annotations[0].id,
            'annotation_job_updates': {
                'user_difficulty': 'medium'
            }
        })
        response = self.client.post(
            '/aggregated_annotations/research/' +
            str(self.aggregated_annotations[0].id),
            headers={'Authorization': self.admin_user_token},
            data=request_body)
        # check response
        self.assert200(response)
        self.assertTrue(response.json['success'])
        # check that update occurred
        updated_ant_job = db_session_users.query(AnnotationJob)\
                                          .filter_by(id=self.jobs[0].id)\
                                          .first()
        self.assertEqual(updated_ant_job.arbitrary_tags,
                         ['tag2'])  # same as before
        self.assertEqual(updated_ant_job.user_difficulty,
                         'medium')  # update that occurred here
        # check that aggregated_annotation[0] had gold_difficulty updated as well
        db_session_users.refresh(self.aggregated_annotations[0])
        self.assertEqual(self.aggregated_annotations[0].gold_difficulty,
                         'medium')

        # make task that contains this job point to new task as its active task (which is in same task group)
        self.tasks[0].active_task_id = self.tasks[1].id
        db_session_users.add(self.tasks[0])
        db_session_users.commit()
        db_session_users.refresh(self.tasks[0])
        # update task group to no longer contain task that contains initial_ant_job (but does contain its active task)
        self.task_groups[0].annotation_task_ids = [self.tasks[1].id]
        db_session_users.add(self.task_groups[0])
        db_session_users.commit()
        db_session_users.refresh(self.task_groups[0])

        # do update to tags for job whose parent task no longer in task group (but whose active task is)
        request_body = json.dumps({
            'topic_annotation_id':
            self.topic_annotations[0].id,
            'annotation_job_updates': {
                'arbitrary_tags': ['tag1', 'NOT ALLOWED TAG']
            }
        })
        response = self.client.post(
            '/aggregated_annotations/research/' +
            str(self.aggregated_annotations[0].id),
            headers={'Authorization': self.admin_user_token},
            data=request_body)
        # check response
        self.assert200(response)
        self.assertTrue(response.json['success'])
        # check that update occurred
        updated_ant_job = db_session_users.query(AnnotationJob)\
                                          .filter_by(id=self.jobs[0].id)\
                                          .first()
        self.assertEqual(
            updated_ant_job.arbitrary_tags,
            ['tag1'])  # since other tag not in AnnotationTaskTopicGroup
    def before_each(self):
        db_session_users.query(AnnotationJob).delete()
        db_session_users.query(AnnotationTask).delete()
        db_session_users.query(UserFlaggedDocument).delete()
        t1 = AnnotationTask({
            'name': 'foo',
            'type': 'contributor',
        })
        self.tasks = [t1]
        self.jobs = []
        self.docs = []
        for i in xrange(1, 6):
            j1 = AnnotationJob({
                "doc_id": i,
                "priority": 1.0 if i % 2 == 0 else 0.5,
                "status": AnnotationJob.COMPLETE_STATUS,
                "user_id": self.user.id
            })
            j1.annotation_task = t1
            self.jobs.append(j1)
            if i % 2 == 0:
                flagged_doc = UserFlaggedDocument({
                    'user_id': self.user.id,
                    'doc_id': i,
                    'issue_severity': UserFlaggedDocument.REVIEW_SEVERITY,
                    'issue_type': UserFlaggedDocument.CONTRIBUTOR_ISSUE_TYPE,
                    'multiple_field': {"field1": "test notes"},
                })
                self.docs.append(flagged_doc)

            j1 = AnnotationJob({
                "doc_id": i,
                "priority": 1.0 if i % 2 == 0 else 0.5,
                "status": AnnotationJob.COMPLETE_STATUS,
                "user_id": self.contributor_user.id
            })
            j1.annotation_task = t1
            self.jobs.append(j1)
            flagged_doc = UserFlaggedDocument({
                'user_id': self.contributor_user.id,
                'doc_id': i,
                'issue_severity': UserFlaggedDocument.REVIEW_SEVERITY,
                'issue_type': UserFlaggedDocument.CONTRIBUTOR_ISSUE_TYPE,
                'multiple_field': {"field1": "test notes"},
                'status': UserFlaggedDocument.FIXED_STATUS if i % 2 == 0 else UserFlaggedDocument.PROCESSED_STATUS
            })
            self.docs.append(flagged_doc)

            j1 = AnnotationJob({
                "doc_id": i,
                "priority": 1.0 if i % 2 == 0 else 0.5,
                "status": AnnotationJob.COMPLETE_STATUS,
                "user_id": self.qa_user.id
            })
            j1.annotation_task = t1
            self.jobs.append(j1)

            j1 = AnnotationJob({
                "doc_id": i,
                "priority": 1.0 if i % 2 == 0 else 0.5,
                "status": AnnotationJob.QUEUED_STATUS,
                "user_id": self.internal_user.id
            })
            j1.annotation_task = t1
            self.jobs.append(j1)
            
            

        db_session_users.add_all(self.tasks)
        db_session_users.add_all(self.jobs)
        db_session_users.add_all(self.docs)
        db_session_users.commit()
        for t in self.tasks:
            db_session_users.refresh(t)
        for j in self.jobs:
            db_session_users.refresh(j)
        for d in self.docs:
            db_session_users.refresh(d)