Esempio n. 1
0
class TestResultRepository(Test):
    def setUp(self):
        super(TestResultRepository, self).setUp()
        self.result_repo = ResultRepository(db)

    def create_result(self, n_answers=1, filter_by=False):
        task = TaskFactory.create(n_answers=n_answers)
        TaskRunFactory.create(task=task)
        if filter_by:
            return self.result_repo.filter_by(project_id=1)
        else:
            return self.result_repo.get_by(project_id=1)

    def test_get_return_none_if_no_result(self):
        """Test get method returns None if there is no result with the
        specified id"""

        result = self.result_repo.get(2)

        assert result is None, result

    def test_get_returns_result(self):
        """Test get method returns a result if exists"""

        n_answers = 1

        task = TaskFactory.create(n_answers=n_answers)
        task_run = TaskRunFactory.create(task=task)

        result = self.result_repo.filter_by(project_id=1)

        err_msg = "There should be a result"
        assert len(result) == 1, err_msg
        result = result[0]
        assert result.project_id == 1, err_msg
        assert result.task_id == task.id, err_msg
        assert len(result.task_run_ids) == n_answers, err_msg
        err_msg = "The task_run id is missing in the results array"
        for tr_id in result.task_run_ids:
            assert tr_id == task_run.id, err_msg

    def test_get_by_returns_result(self):
        """Test get_by method returns a result if exists"""

        n_answers = 1

        task = TaskFactory.create(n_answers=n_answers)
        task_run = TaskRunFactory.create(task=task)

        result = self.result_repo.get_by(project_id=1)

        err_msg = "There should be a result"
        assert result.project_id == 1, err_msg
        assert result.task_id == task.id, err_msg
        assert len(result.task_run_ids) == n_answers, err_msg
        err_msg = "The task_run id is missing in the results array"
        for tr_id in result.task_run_ids:
            assert tr_id == task_run.id, err_msg

    def test_get_returns_result_after_increasig_redundancy(self):
        """Test get method returns a result if after increasing redundancy"""

        n_answers = 1

        task = TaskFactory.create(n_answers=n_answers)
        task_run = TaskRunFactory.create(task=task)

        result = self.result_repo.filter_by(project_id=1)

        err_msg = "There should be a result"
        assert len(result) == 1, err_msg
        result = result[0]
        assert result.project_id == 1, err_msg
        assert result.task_id == task.id, err_msg
        assert len(result.task_run_ids) == n_answers, err_msg
        err_msg = "The task_run id is missing in the results array"
        for tr_id in result.task_run_ids:
            assert tr_id == task_run.id, err_msg

        # Increase redundancy
        tmp = task_repo.get_task(task.id)
        tmp.n_answers = 2
        task_repo.update(task)

        err_msg = "There should be only one result"
        results = result_repo.filter_by(project_id=1)
        assert len(results) == 1, err_msg
        task_run_2 = TaskRunFactory.create(task=task)

        err_msg = "There should be 1 results"
        results = result_repo.filter_by(project_id=1)
        assert len(results) == 1, err_msg

        err_msg = "There should be 2 results"
        results = result_repo.filter_by(project_id=1, last_version=False)
        assert len(results) == 2, err_msg

        assert results[1].project_id == 1, err_msg
        assert results[1].task_id == task.id, err_msg
        err_msg = "First result should have only one task run ID"
        assert len(results[0].task_run_ids) == 1, err_msg
        err_msg = "Second result should have only two task run IDs"
        assert len(results[1].task_run_ids) == 2, err_msg
        err_msg = "The task_run id is missing in the results array"
        for tr_id in results[1].task_run_ids:
            assert tr_id in [task_run.id, task_run_2.id], err_msg

    def test_get_returns_no_result(self):
        """Test get method does not return a result if task not completed"""

        n_answers = 3

        task = TaskFactory.create(n_answers=n_answers)
        TaskRunFactory.create(task=task)

        result = self.result_repo.filter_by(project_id=1)

        err_msg = "There should not be a result"
        assert len(result) == 0, err_msg

    def test_fulltext_search_result(self):
        """Test fulltext search in JSON info works."""
        result = self.create_result()
        text = 'something word you me bar'
        data = {'foo': text}
        result.info = data
        self.result_repo.update(result)

        info = 'foo::word'
        res = self.result_repo.filter_by(info=info, fulltextsearch='1')
        assert len(res) == 1, len(res)
        assert res[0].info['foo'] == text, res[0]

        res = self.result_repo.filter_by(info=info)
        assert len(res) == 0, len(res)

    def test_fulltext_search_result_01(self):
        """Test fulltext search in JSON info works."""
        result = self.create_result()
        text = 'something word you me bar'
        data = {'foo': text, 'bar': 'foo'}
        result.info = data
        self.result_repo.update(result)

        info = 'foo::word&bar|bar::foo'
        res = self.result_repo.filter_by(info=info, fulltextsearch='1')
        assert len(res) == 1, len(res)
        assert res[0].info['foo'] == text, res[0]

    def test_info_json_search_result(self):
        """Test search in JSON info works."""
        result = self.create_result()
        text = 'bar'
        data = {'foo': text}
        result.info = data
        self.result_repo.update(result)

        info = 'foo::bar'
        res = self.result_repo.filter_by(info=info)
        assert len(res) == 1, len(res)
        assert res[0].info['foo'] == text, res[0]

    def test_update(self):
        """Test update persists the changes made to the result"""

        result = self.create_result()
        result.info = dict(new='value')

        self.result_repo.update(result)
        updated_result = self.result_repo.get(result.id)

        assert updated_result.info['new'] == 'value', updated_result

    def test_update_fails_if_integrity_error(self):
        """Test update raises a DBIntegrityError if the instance to be updated
        lacks a required value"""

        result = self.create_result()
        result.project_id = None

        assert_raises(DBIntegrityError, self.result_repo.update, result)

    def test_update_only_updates_results(self):
        """Test update raises a WrongObjectError when an object which is not
        a Result instance is updated"""

        bad_object = dict()

        assert_raises(WrongObjectError, self.result_repo.update, bad_object)
Esempio n. 2
0
class TestBaseAnalyst(Test):
    def setUp(self):
        super(TestBaseAnalyst, self).setUp()
        BaseAnalyst.__abstractmethods__ = frozenset()
        self.ctx = ContextFixtures()
        self.base_analyst = BaseAnalyst()
        self.project_repo = ProjectRepository(db)
        self.result_repo = ResultRepository(db)
        self.task_repo = TaskRepository(db)
        assert_dict_equal.__self__.maxDiff = None
        assert_equal.__self__.maxDiff = None

    @with_context
    @patch("pybossa_lc.analysis.base.BaseAnalyst.analyse")
    def test_analyse_all(self, mock_analyse):
        """Test that all results are analysed."""
        project = ProjectFactory()
        tasks = TaskFactory.create_batch(2, project=project, n_answers=1)
        for task in tasks:
            TaskRunFactory.create(task=task)
        result = self.result_repo.get_by(task_id=tasks[0].id)
        result.info = dict(annotations=[{}])
        self.result_repo.update(result)
        self.base_analyst.analyse_all(project.id)
        expected = [call(t.id, analyse_full=True) for t in tasks]
        assert_equal(mock_analyse.call_args_list, expected)

    @with_context
    @patch("pybossa_lc.analysis.base.BaseAnalyst.analyse")
    def test_analyse_empty(self, mock_analyse):
        """Test that empty results are analysed."""
        project = ProjectFactory()
        tasks = TaskFactory.create_batch(2, project=project, n_answers=1)
        for task in tasks:
            TaskRunFactory.create(task=task)
        result = self.result_repo.get_by(task_id=tasks[0].id)
        result.info = dict(annotations=[{}])
        self.result_repo.update(result)
        all_results = self.result_repo.filter_by(project_id=project.id)
        self.base_analyst.analyse_empty(project.id)
        expected = [call(r.task_id) for r in all_results if not r.info]
        assert_equal(mock_analyse.call_args_list, expected)

    @with_context
    def test_key_dropped(self):
        """Test the correct keys are dropped."""
        data = [{'foo': None, 'bar': None}]
        df = pandas.DataFrame(data, range(len(data)))
        excluded = ['foo']
        df = self.base_analyst.drop_keys(df, excluded)
        assert_not_in('foo', df.keys())
        assert_in('bar', df.keys())

    @with_context
    def test_empty_rows_dropped(self):
        """Test empty rows are dropped."""
        data = [{'foo': 'bar'}, {'foo': None}]
        df = pandas.DataFrame(data, range(len(data)))
        df = self.base_analyst.drop_empty_rows(df)
        assert_equals(df['foo'].tolist(), ['bar'])

    @with_context
    def test_partial_rows_not_dropped(self):
        """Test partial rows are not dropped."""
        data = [{'foo': 'bar', 'baz': None}]
        df = pandas.DataFrame(data, range(len(data)))
        df = self.base_analyst.drop_empty_rows(df)
        expected = {'foo': {0: 'bar'}, 'baz': {0: None}}
        assert_dict_equal(df.to_dict(), expected)

    @with_context
    def test_match_fails_when_percentage_not_met(self):
        """Test False is returned when min answers not met."""
        data = [{'foo': 'bar', 'baz': None}]
        df = pandas.DataFrame(data, range(len(data)))
        min_answers = 2
        has_matches = self.base_analyst.has_n_matches(min_answers, df)
        assert_equal(has_matches, False)

    @with_context
    def test_match_fails_when_nan_cols(self):
        """Test False is returned when NaN columns only."""
        data = [{'foo': None}]
        df = pandas.DataFrame(data, range(len(data)))
        df = df.replace('', numpy.nan)
        min_answers = 2
        has_matches = self.base_analyst.has_n_matches(min_answers, df)
        assert_equal(has_matches, False)

    @with_context
    def test_match_succeeds_when_percentage_met(self):
        """Test True returned when match percentage met."""
        data = [{'foo': 'bar'}, {'foo': 'bar'}]
        df = pandas.DataFrame(data, range(len(data)))
        min_answers = 2
        has_matches = self.base_analyst.has_n_matches(min_answers, df)
        assert_equal(has_matches, True)

    @with_context
    def test_get_dataframe_with_dict(self):
        """Test the task run dataframe with a dict as the info."""
        info = {'foo': 'bar'}
        n_task_runs = 2
        task = TaskFactory()
        taskruns = TaskRunFactory.create_batch(n_task_runs,
                                               task=task,
                                               info=info)
        df = self.base_analyst.get_task_run_df(task, taskruns)
        assert_equal(df['foo'].tolist(), [info['foo']] * n_task_runs)
        assert_equal(df['info'].tolist(), [info] * n_task_runs)

    @with_context
    def test_get_dataframe_with_list(self):
        """Test the task run dataframe with a list as the info."""
        info = [{'foo': 'bar'}, {'baz': 'qux'}]
        n_task_runs = 2
        task = TaskFactory()
        taskruns = TaskRunFactory.create_batch(n_task_runs,
                                               task=task,
                                               info=info)
        df = self.base_analyst.get_task_run_df(task, taskruns)
        assert_equal(df['info'].tolist(), [info] * n_task_runs)

    @with_context
    def test_protected_keys_prefixed_when_exploded(self):
        """Test that protected info keys are prefixed."""
        info = {'foo': 'bar', 'info': 'baz'}
        task = TaskFactory()
        taskrun = TaskRunFactory.create(task=task, info=info)
        df = self.base_analyst.get_task_run_df(task, [taskrun])
        assert_equal(df['_info'].tolist(), [info['info']])

    @with_context
    def test_user_ids_in_task_run_dataframe(self):
        """Test that user IDs are included in the task run dataframe."""
        task = TaskFactory()
        taskruns = TaskRunFactory.create_batch(2, task=task)
        df = self.base_analyst.get_task_run_df(task, taskruns)
        assert_equal(df['user_id'].tolist(), [tr.user_id for tr in taskruns])

    def test_titlecase_normalisation(self):
        """Test titlecase normalisation."""
        rules = dict(case='title')
        norm = self.base_analyst.normalise_transcription('Some words', rules)
        assert_equal(norm, 'Some Words')

    def test_lowercase_normalisation(self):
        """Test lowercase normalisation."""
        rules = dict(case='lower')
        norm = self.base_analyst.normalise_transcription('Some words', rules)
        assert_equal(norm, 'some words')

    def test_uppercase_normalisation(self):
        """Test uppercase normalisation."""
        rules = dict(case='upper')
        norm = self.base_analyst.normalise_transcription('Some words', rules)
        assert_equal(norm, 'SOME WORDS')

    def test_whitespace_normalisation(self):
        """Test whitespace normalisation."""
        rules = dict(whitespace='normalise')
        norm = self.base_analyst.normalise_transcription(' Two  Words', rules)
        assert_equal(norm, 'Two Words')

    def test_whitespace_replace_underscore(self):
        """Test replacing whitespace with underscore normalisation."""
        rules = dict(whitespace='underscore')
        norm = self.base_analyst.normalise_transcription(' Two  Words', rules)
        assert_equal(norm, 'Two_Words')

    def test_whitespace_replace_full_stop(self):
        """Test replacing whitespace with full stop normalisation."""
        rules = dict(whitespace='full_stop')
        norm = self.base_analyst.normalise_transcription(' Two  Words', rules)
        assert_equal(norm, 'Two.Words')

    def test_trim_punctuation_normalisation(self):
        """Test trim punctuation normalisation."""
        rules = dict(trim_punctuation=True)
        norm = self.base_analyst.normalise_transcription(':Oh, a word.', rules)
        assert_equal(norm, 'Oh, a word')

    def test_date_not_normalised_if_rule_inactive(self):
        """Test date conversion not applied of rule not activate."""
        norm = self.base_analyst.normalise_transcription('foo', {})
        assert_equal(norm, 'foo')

    def test_date_conversion_with_slash(self):
        """Test date conversion with slash seperators."""
        rules = dict(date_format=True, dayfirst=True)
        norm = self.base_analyst.normalise_transcription('19/11/1984', rules)
        assert_equal(norm, '1984-11-19')

    def test_date_conversion_with_hyphen(self):
        """Test date conversion with hyphen seperator."""
        rules = dict(date_format=True, dayfirst=True)
        norm = self.base_analyst.normalise_transcription('19-11-1984', rules)
        assert_equal(norm, '1984-11-19')

    def test_date_conversion_with_no_seperator(self):
        """Test date conversion with no seperator."""
        rules = dict(date_format=True, dayfirst=True)
        norm = self.base_analyst.normalise_transcription('19111984', rules)
        assert_equal(norm, '')

    def test_date_conversion_with_no_year_and_year_last(self):
        """Test date conversion with no year and year last."""
        rules = dict(date_format=True, dayfirst=True)
        norm = self.base_analyst.normalise_transcription('19/11', rules)
        assert_equal(norm, '-11-19')

    def test_date_conversion_with_no_year_and_year_first(self):
        """Test date conversion with no year and year first."""
        rules = dict(date_format=True, yearfirst=True)
        norm = self.base_analyst.normalise_transcription('11/19', rules)
        assert_equal(norm, '-11-19')

    def test_date_conversion_with_invalid_string(self):
        """Test date conversion with invalid string."""
        rules = dict(date_format=True, dayfirst=True)
        norm = self.base_analyst.normalise_transcription('No date', rules)
        assert_equal(norm, '')

    def test_date_conversion_with_zero(self):
        """Test date conversion with zero."""
        rules = dict(date_format=True, dayfirst=True)
        norm = self.base_analyst.normalise_transcription('0', rules)
        assert_equal(norm, '')

    def test_date_conversion_with_non_zero_integer(self):
        """Test date conversion with non-zero integer."""
        rules = dict(date_format=True, dayfirst=True)
        norm = self.base_analyst.normalise_transcription('1', rules)
        assert_equal(norm, '')

    def test_date_conversion_with_trailing_punctuation(self):
        """Test date conversion with trailing punctuation."""
        rules = dict(date_format=True, dayfirst=True)
        norm = self.base_analyst.normalise_transcription('19/11/', rules)
        assert_equal(norm, '-11-19')

    def test_date_conversion_with_trailing_whitespace(self):
        """Test date conversion with trailing whitespace."""
        rules = dict(date_format=True, dayfirst=True)
        norm = self.base_analyst.normalise_transcription('19/11/1984 ', rules)
        assert_equal(norm, '1984-11-19')

    @with_context
    def test_n_answers_increased_when_task_complete(self):
        """Test n answers required for a task is updated."""
        n_original_answers = 1
        task = TaskFactory.create(n_answers=n_original_answers)
        TaskRunFactory.create(task=task)
        self.base_analyst.update_n_answers_required(task, False)
        assert_equal(task.n_answers, n_original_answers + 1)
        assert_equal(task.state, 'ongoing')

    @with_context
    def test_n_answers_not_increased_when_still_task_runs(self):
        """Test n answers not updated when task runs still required."""
        n_original_answers = 2
        task = TaskFactory.create(n_answers=n_original_answers)
        TaskRunFactory.create(task=task)
        self.base_analyst.update_n_answers_required(task, False)
        assert_equal(task.n_answers, n_original_answers)
        assert_equal(task.state, 'ongoing')

    @with_context
    def test_n_answers_not_increased_when_max_answers_reached(self):
        """Test n answers not updated when max answers reached."""
        n_answers = 3
        task = TaskFactory.create(n_answers=n_answers)
        TaskRunFactory.create_batch(n_answers, task=task)
        self.base_analyst.update_n_answers_required(task,
                                                    False,
                                                    max_answers=n_answers)
        assert_equal(task.n_answers, n_answers)
        assert_equal(task.state, 'completed')

    @with_context
    def test_n_answers_reduced_when_task_complete(self):
        """Test n answers reduced to number of task runs when task complete."""
        n_answers = 3
        task = TaskFactory.create(n_answers=n_answers)
        TaskRunFactory.create_batch(n_answers - 1, task=task)
        self.base_analyst.update_n_answers_required(task,
                                                    True,
                                                    max_answers=n_answers)
        assert_equal(task.n_answers, n_answers - 1)
        assert_equal(task.state, 'completed')

    def test_overlap_ratio_is_1_with_equal_rects(self):
        """Test for an overlap ratio of 1."""
        rect = {'x': 100, 'y': 100, 'w': 100, 'h': 100}
        overlap = self.base_analyst.get_overlap_ratio(rect, rect)
        assert_equal(overlap, 1)

    def test_overlap_ratio_is_0_with_adjacent_rects(self):
        """Test for an overlap ratio of 0."""
        r1 = {'x': 100, 'y': 100, 'w': 100, 'h': 100}
        r2 = {'x': 100, 'y': 201, 'w': 100, 'h': 100}
        overlap = self.base_analyst.get_overlap_ratio(r1, r2)
        assert_equal(overlap, 0)

    def test_overlap_ratio_with_partially_overlapping_rects(self):
        """Test for an overlap ratio of 0.33."""
        r1 = {'x': 100, 'y': 100, 'w': 100, 'h': 100}
        r2 = {'x': 150, 'y': 100, 'w': 100, 'h': 100}
        overlap = self.base_analyst.get_overlap_ratio(r1, r2)
        assert_equal('{:.2f}'.format(overlap), '0.33')

    def test_overlap_ratio_where_union_is_zero(self):
        """Test for an overlap ratio where the union is zero."""
        r1 = {'x': 0, 'y': 0, 'w': 100, 'h': 100}
        r2 = {'x': 101, 'y': 0, 'w': 100, 'h': 100}
        overlap = self.base_analyst.get_overlap_ratio(r1, r2)
        assert_equal(overlap, 0)

    def test_rect_from_selection(self):
        """Test that we get the correct rect."""
        coords = dict(x=400, y=200, w=100, h=150)
        coords_str = '{0},{1},{2},{3}'.format(coords['x'], coords['y'],
                                              coords['w'], coords['h'])
        fake_anno = {
            'target': {
                'selector': {
                    'value': '?xywh={}'.format(coords_str)
                }
            }
        }
        rect = self.base_analyst.get_rect_from_selection_anno(fake_anno)
        assert_dict_equal(rect, coords)

    def test_rect_from_selection_with_floats(self):
        """Test that we get the correct rect with rounded coordinates."""
        coords = dict(x=400.001, y=200.499, w=100.501, h=150.999)
        coords_str = '{0},{1},{2},{3}'.format(coords['x'], coords['y'],
                                              coords['w'], coords['h'])
        fake_anno = {
            'target': {
                'selector': {
                    'value': '?xywh={}'.format(coords_str)
                }
            }
        }
        rect = self.base_analyst.get_rect_from_selection_anno(fake_anno)
        assert_dict_equal(rect, {'x': 400, 'y': 200, 'w': 101, 'h': 151})

    @with_context
    def test_get_project_template(self):
        """Test that the correct template is returned."""
        category = CategoryFactory()
        tmpl_fixtures = TemplateFixtures(category)
        tmpl1 = tmpl_fixtures.create()
        tmpl2 = tmpl_fixtures.create()
        fake_templates = [tmpl1, tmpl2]
        category.info = dict(templates=fake_templates)
        self.project_repo.update_category(category)
        project_info = dict(template_id=tmpl1['id'])
        project = ProjectFactory(category=category, info=project_info)
        ret_tmpl = self.base_analyst.get_project_template(project)
        assert_equal(ret_tmpl, tmpl1)

    @with_context
    @raises(ValueError)
    def test_get_invalid_project_template(self):
        """Test that getting an invalid template throws an error."""
        fake_templates = [{'id': 'foo'}]
        user_info = dict(templates=fake_templates)
        project_info = dict(template_id='bar')
        UserFactory.create(info=user_info)
        project = ProjectFactory(info=project_info)
        self.base_analyst.get_project_template(project)

    @with_context
    @raises(ValueError)
    def test_get_non_existant_project_template(self):
        """Test that getting a non-existant template throws an error."""
        project = ProjectFactory()
        self.base_analyst.get_project_template(project)

    def test_dataframe_keys_replaced(self):
        """Test that dataframe keys are replaced and columns merged."""
        data = [{'foo': '你好', 'baz': 'qux'}, {'foo': 1, 'quux': 'qux'}]
        old_df = pandas.DataFrame(data, range(len(data)))
        new_df = self.base_analyst.replace_df_keys(old_df, quux='baz')
        assert_dict_equal(new_df.to_dict(), {
            'foo': {
                0: '你好',
                1: 1
            },
            'baz': {
                0: 'qux',
                1: 'qux'
            }
        })

    @with_context
    @patch('pybossa_lc.analysis.base.send_mail')
    @patch('pybossa_lc.analysis.base.render_template')
    @patch('pybossa_lc.analysis.base.Queue.enqueue')
    def test_comment_annotations_emailed(self, mock_enqueue, mock_render,
                                         mock_send_mail):
        """Test that comment annotation emails are sent."""
        mock_render.return_value = True
        comment = 'foo'
        creator = 'bar'
        target = 'example.com'
        fake_anno = {
            'creator': {
                'id': 'example.com/user1',
                'type': 'Person',
                'name': creator,
                'nickname': 'nick'
            },
            'body': {
                'type': 'TextualBody',
                'purpose': 'commenting',
                'value': comment,
                'format': 'text/plain'
            }
        }
        task = self.ctx.create_task(1, target)
        json_anno = json.dumps(fake_anno, indent=2, sort_keys=True)
        self.base_analyst.email_comment_anno(task, fake_anno)

        expected_render_args = [
            call('/account/email/new_comment_anno.md',
                 annotation=json_anno,
                 creator=creator,
                 comment=comment,
                 raw_image=None,
                 link=None),
            call('/account/email/new_comment_anno.html',
                 annotation=json_anno,
                 creator=creator,
                 comment=comment,
                 raw_image=None,
                 link=None)
        ]
        assert_equal(mock_render.call_args_list, expected_render_args)

        expected_msg = {
            'body': True,
            'html': True,
            'subject': 'New Comment Annotation',
            'recipients': flask_app.config.get('ADMINS')
        }
        mock_enqueue.assert_called_once_with(mock_send_mail, expected_msg)

    @with_context
    @patch('pybossa_lc.model.base.wa_client')
    def test_modified_results_not_updated(self, mock_client):
        """Test results are not updated if an Annotation has been modified."""
        task = self.ctx.create_task(1)
        TaskRunFactory(task=task)
        result = self.result_repo.get_by(task_id=task.id)
        self.base_analyst.analyse(result.id)
        mock_client.search_annotations.return_value = [{
            'modified': 'fake-time'
        }]
        assert_equal(mock_client.create_annotation.called, False)

    @with_context
    @patch('pybossa_lc.model.base.wa_client')
    def test_parent_results_not_updated(self, mock_client):
        """Test results are not updated if an Annotation has children."""
        task = self.ctx.create_task(1)
        TaskRunFactory(task=task)
        result = self.result_repo.get_by(task_id=task.id)
        result.info = dict(has_children=True)
        self.result_repo.update(result)
        self.base_analyst.analyse(result.id)
        assert_equal(mock_client.create_annotation.called, False)

    @with_context
    @patch('pybossa_lc.model.base.wa_client')
    def test_result_with_child_not_updated(self, mock_client):
        """Test that a result is not updated when it has a child."""
        task = self.ctx.create_task(1)
        TaskRunFactory(task=task)
        result = self.result_repo.get_by(task_id=task.id)
        info = dict(annotations='foo', has_children=True)
        result.info = info
        self.result_repo.update(result)
        self.base_analyst.analyse(result.id)
        assert_equal(result.info, info)

    @with_context
    def test_analysis_exception_if_no_annotation_collection(self):
        """Test that AnnotationCollection must be setup."""
        task = self.ctx.create_task(1, 'example.com', anno_collection=None)
        TaskRunFactory.create(task=task)
        result = self.result_repo.filter_by(project_id=task.project_id)[0]
        assert_raises(AnalysisException, self.base_analyst.analyse, result.id)
class TestBulkTaskIIIFEnhancedImport(Test):
    def setUp(self):
        super(TestBulkTaskIIIFEnhancedImport, self).setUp()
        self.result_repo = ResultRepository(db)
        self.manifest_uri = 'http://example.org/iiif/book1/manifest'
        self.canvas_id_base = 'http://example.org/iiif/book1/canvas/p{0}'
        self.img_id_base = 'http://example.org/images/book1-page{0}-img{1}'

    def create_manifest(self, canvases=1, images=1):
        manifest = {
            '@context': 'http://iiif.io/api/presentation/2/context.json',
            '@id': self.manifest_uri,
            '@type': 'sc:Manifest',
            'label': 'Foo',
            'sequences': [{
                '@type': 'sc:Sequence',
                'canvases': []
            }]
        }
        for i in range(canvases):
            canvas = {
                '@id': self.canvas_id_base.format(i),
                '@type': 'sc:Canvas',
                'label': 'Bar',
                'height': 100,
                'width': 100,
                'images': []
            }
            for j in range(images):
                image = {
                    '@type': 'oa:Annotation',
                    'motivation': 'sc:painting',
                    'resource': {
                        '@id': 'http://example.org/image{}.jpg'.format(j),
                        '@type': 'dctypes:Image',
                        'service': {
                            '@id': self.img_id_base.format(i, j)
                        }
                    },
                    'on': 'http://example.org/{}'.format(i)
                }
                canvas['images'].append(image)
            manifest['sequences'][0]['canvases'].append(canvas)
        return manifest

    @with_context
    def test_bl_tasks_created_with_bl_link(self, requests):
        """Test that non-BL tasks are created with a non-BL link."""
        manifest = self.create_manifest()
        headers = {'Content-Type': 'application/json'}
        response = FakeResponse(text=json.dumps(manifest),
                                status_code=200,
                                headers=headers,
                                encoding='utf-8')
        requests.get.return_value = response

        importer = BulkTaskIIIFEnhancedImporter(manifest_uri=self.manifest_uri)
        tasks = importer.tasks()
        assert_equal(len(tasks), 1)

        link_query = '?manifest={}#?cv=0'.format(self.manifest_uri)
        link = 'http://universalviewer.io/uv.html' + link_query
        assert_equal(tasks[0]['info']['link'], link)

    @with_context
    def test_non_bl_tasks_created_with_non_bl_link(self, requests):
        """Test that non-BL tasks are created with a non-BL link."""
        manifest = self.create_manifest()
        bl_manifest_id = 'https://api.bl.uk/metadata/iiif/id/manifest.json'
        manifest['@id'] = bl_manifest_id
        headers = {'Content-Type': 'application/json'}
        response = FakeResponse(text=json.dumps(manifest),
                                status_code=200,
                                headers=headers,
                                encoding='utf-8')
        requests.get.return_value = response

        importer = BulkTaskIIIFEnhancedImporter(manifest_uri=bl_manifest_id)
        tasks = importer.tasks()
        assert_equal(len(tasks), 1)

        link = 'http://access.bl.uk/item/viewer/id#?cv=0'
        assert_equal(tasks[0]['info']['link'], link)

    @with_context
    def test_exeption_if_no_collection_iri_for_parent(self, requests):
        """Test exception if no collection iri when child tasks generated."""
        manifest = self.create_manifest()
        headers = {'Content-Type': 'application/json'}
        response = FakeResponse(text=json.dumps(manifest),
                                status_code=200,
                                headers=headers,
                                encoding='utf-8')
        requests.get.return_value = response
        parent = ProjectFactory()
        task = TaskFactory(project=parent, n_answers=1)
        TaskRunFactory.create(task=task)
        importer = BulkTaskIIIFEnhancedImporter(manifest_uri=self.manifest_uri,
                                                parent_id=parent.id)
        assert_raises(BulkImportException, importer.tasks)

    @with_context
    @patch('pybossa_lc.model.base.wa_client')
    def test_child_tasks_generated(self, mock_wa_client, requests):
        """Test that child tasks are generated."""
        n_canvases = 3
        n_images = 1
        manifest = self.create_manifest(canvases=n_canvases, images=n_images)
        headers = {'Content-Type': 'application/json'}
        response = FakeResponse(text=json.dumps(manifest),
                                status_code=200,
                                headers=headers,
                                encoding='utf-8')
        requests.get.return_value = response
        anno_fixtures = AnnotationFixtures()
        anno_collection_iri = 'example.org/annotations'
        category = CategoryFactory(
            info={'annotations': {
                'results': anno_collection_iri
            }})
        parent = ProjectFactory(category=category)
        tasks = TaskFactory.create_batch(n_canvases,
                                         project=parent,
                                         n_answers=1)

        # Create some annotations for each parent task
        expected = []
        return_values = []
        for i, task in enumerate(tasks):
            canvas_id = self.canvas_id_base.format(i)
            for j in range(n_images):
                TaskRunFactory.create(task=task)
                img_id = self.img_id_base.format(i, j)

                annotations = [
                    anno_fixtures.create(motivation='tagging',
                                         source=canvas_id),
                    anno_fixtures.create(motivation='describing',
                                         source=canvas_id),
                    anno_fixtures.create(motivation='commenting',
                                         source=canvas_id)
                ]

                result = self.result_repo.get_by(task_id=task.id)
                result.info = dict(annotations=anno_collection_iri)
                self.result_repo.update(result)
                return_values.append(annotations)

                # Store expected task data to check later
                link_query = '?manifest={}#?cv={}'.format(self.manifest_uri, i)
                link = 'http://universalviewer.io/uv.html' + link_query
                for anno in annotations[:2]:
                    expected.append({
                        'manifest':
                        self.manifest_uri,
                        'target':
                        anno['target'],
                        'link':
                        link,
                        'tileSource':
                        '{}/info.json'.format(img_id),
                        'url':
                        '{}/full/max/0/default.jpg'.format(img_id),
                        'url_m':
                        '{}/full/240,/0/default.jpg'.format(img_id),
                        'url_b':
                        '{}/full/1024,/0/default.jpg'.format(img_id),
                        'parent_task_id':
                        task.id
                    })

        mock_wa_client.search_annotations.side_effect = return_values
        importer = BulkTaskIIIFEnhancedImporter(manifest_uri=self.manifest_uri,
                                                parent_id=parent.id)
        tasks = importer.tasks()
        task_info = [task['info'] for task in tasks]
        expected = sorted(expected, key=lambda x: x['target'])
        assert_equal(task_info, expected)

    @with_context
    @patch('pybossa_lc.model.base.wa_client')
    def test_has_child_added_to_parent_results(self, mock_wa_client, requests):
        """Test that the has_children key is added to parent results."""
        manifest = self.create_manifest()
        headers = {'Content-Type': 'application/json'}
        response = FakeResponse(text=json.dumps(manifest),
                                status_code=200,
                                headers=headers,
                                encoding='utf-8')
        requests.get.return_value = response
        anno_collection_iri = 'example.org/annotations'

        # Create a task for each canvas
        n_tasks = 3
        category = CategoryFactory(
            info={'annotations': {
                'results': anno_collection_iri
            }})
        parent = ProjectFactory(category=category)
        tasks = TaskFactory.create_batch(n_tasks, project=parent, n_answers=1)
        for task in tasks:
            TaskRunFactory.create(task=task)
            result = self.result_repo.get_by(task_id=task.id)
            result.info = dict(annotations=anno_collection_iri)
            self.result_repo.update(result)

        importer = BulkTaskIIIFEnhancedImporter(manifest_uri=self.manifest_uri,
                                                parent_id=parent.id)
        mock_wa_client.search_annotations.return_value = []
        tasks = importer.tasks()

        results = self.result_repo.filter_by(project_id=parent.id)
        result_info = [result.info for result in results]
        expected = [{
            'annotations': anno_collection_iri,
            'has_children': True
        }] * n_tasks
        assert_equal(result_info, expected)
Esempio n. 4
0
class TestResultRepository(Test):

    def setUp(self):
        super(TestResultRepository, self).setUp()
        self.result_repo = ResultRepository(db)

    @with_context
    def create_result(self, n_answers=1, filter_by=False):
        task = TaskFactory.create(n_answers=n_answers)
        TaskRunFactory.create(task=task)
        if filter_by:
            return self.result_repo.filter_by(project_id=1)
        else:
            return self.result_repo.get_by(project_id=1)


    @with_context
    def test_get_return_none_if_no_result(self):
        """Test get method returns None if there is no result with the
        specified id"""

        result = self.result_repo.get(2)

        assert result is None, result


    @with_context
    def test_get_returns_result(self):
        """Test get method returns a result if exists"""

        n_answers = 1

        task = TaskFactory.create(n_answers=n_answers)
        task_run = TaskRunFactory.create(task=task)

        result = self.result_repo.filter_by(project_id=1)


        err_msg = "There should be a result"
        assert len(result) == 1, err_msg
        result = result[0]
        assert result.project_id == 1, err_msg
        assert result.task_id == task.id, err_msg
        assert len(result.task_run_ids) == n_answers, err_msg
        err_msg = "The task_run id is missing in the results array"
        for tr_id in result.task_run_ids:
            assert tr_id == task_run.id, err_msg

    @with_context
    def test_get_by_returns_result(self):
        """Test get_by method returns a result if exists"""

        n_answers = 1

        task = TaskFactory.create(n_answers=n_answers)
        task_run = TaskRunFactory.create(task=task)

        result = self.result_repo.get_by(project_id=1)


        err_msg = "There should be a result"
        assert result.project_id == 1, err_msg
        assert result.task_id == task.id, err_msg
        assert len(result.task_run_ids) == n_answers, err_msg
        err_msg = "The task_run id is missing in the results array"
        for tr_id in result.task_run_ids:
            assert tr_id == task_run.id, err_msg


    @with_context
    def test_get_returns_result_after_increasig_redundancy(self):
        """Test get method returns a result if after increasing redundancy"""

        n_answers = 1

        task = TaskFactory.create(n_answers=n_answers)
        task_run = TaskRunFactory.create(task=task)

        result = self.result_repo.filter_by(project_id=1)

        err_msg = "There should be a result"
        assert len(result) == 1, err_msg
        result = result[0]
        assert result.project_id == 1, err_msg
        assert result.task_id == task.id, err_msg
        assert len(result.task_run_ids) == n_answers, err_msg
        err_msg = "The task_run id is missing in the results array"
        for tr_id in result.task_run_ids:
            assert tr_id == task_run.id, err_msg

        # Increase redundancy
        tmp = task_repo.get_task(task.id)
        tmp.n_answers = 2
        task_repo.update(task)

        err_msg = "There should be only one result"
        results = result_repo.filter_by(project_id=1)
        assert len(results) == 1, err_msg
        task_run_2 = TaskRunFactory.create(task=task)

        err_msg = "There should be 1 results"
        results = result_repo.filter_by(project_id=1)
        assert len(results) == 1, err_msg

        err_msg = "There should be 2 results"
        results = result_repo.filter_by(project_id=1, last_version=False)
        assert len(results) == 2, err_msg

        assert results[1].project_id == 1, err_msg
        assert results[1].task_id == task.id, err_msg
        err_msg = "First result should have only one task run ID"
        assert len(results[0].task_run_ids) == 1, err_msg
        err_msg = "Second result should have only two task run IDs"
        assert len(results[1].task_run_ids) == 2, err_msg
        err_msg = "The task_run id is missing in the results array"
        for tr_id in results[1].task_run_ids:
            assert tr_id in [task_run.id, task_run_2.id], err_msg


    @with_context
    def test_get_returns_no_result(self):
        """Test get method does not return a result if task not completed"""

        n_answers = 3

        task = TaskFactory.create(n_answers=n_answers)
        TaskRunFactory.create(task=task)

        result = self.result_repo.filter_by(project_id=1)

        err_msg = "There should not be a result"
        assert len(result) == 0, err_msg

    @with_context
    def test_fulltext_search_result(self):
        """Test fulltext search in JSON info works."""
        result = self.create_result()
        text = 'something word you me bar'
        data = {'foo': text}
        result.info = data
        self.result_repo.update(result)

        info = 'foo::word'
        res = self.result_repo.filter_by(info=info, fulltextsearch='1')
        assert len(res) == 1, len(res)
        assert res[0][0].info['foo'] == text, res[0]

        res = self.result_repo.filter_by(info=info)
        assert len(res) == 0, len(res)

    @with_context
    def test_fulltext_search_result_01(self):
        """Test fulltext search in JSON info works."""
        result = self.create_result()
        text = 'something word you me bar'
        data = {'foo': text, 'bar': 'foo'}
        result.info = data
        self.result_repo.update(result)

        info = 'foo::word&bar|bar::foo'
        res = self.result_repo.filter_by(info=info, fulltextsearch='1')
        assert len(res) == 1, len(res)
        assert res[0][0].info['foo'] == text, res[0]


    @with_context
    def test_info_json_search_result(self):
        """Test search in JSON info works."""
        result = self.create_result()
        text = 'bar'
        data = {'foo': text}
        result.info = data
        self.result_repo.update(result)

        info = 'foo::bar'
        res = self.result_repo.filter_by(info=info)
        assert len(res) == 1, len(res)
        assert res[0].info['foo'] == text, res[0]


    @with_context
    def test_update(self):
        """Test update persists the changes made to the result"""

        result = self.create_result()
        result.info = dict(new='value')

        self.result_repo.update(result)
        updated_result = self.result_repo.get(result.id)

        assert updated_result.info['new'] == 'value', updated_result


    @with_context
    def test_update_fails_if_integrity_error(self):
        """Test update raises a DBIntegrityError if the instance to be updated
        lacks a required value"""

        result = self.create_result()
        result.project_id = None

        assert_raises(DBIntegrityError, self.result_repo.update, result)


    @with_context
    def test_update_only_updates_results(self):
        """Test update raises a WrongObjectError when an object which is not
        a Result instance is updated"""

        bad_object = dict()

        assert_raises(WrongObjectError, self.result_repo.update, bad_object)

    @with_context
    def test_delete_results_from_project(self):
        """Test delte_results_from_project works."""
        project = ProjectFactory.create()
        task = TaskFactory.create(project=project,n_answers=1)
        taskrun = TaskRunFactory.create(task=task, project=project)
        result = result_repo.get_by(project_id=task.project.id)
        assert result
        result_repo.delete_results_from_project(project)
        result = result_repo.get_by(project_id=task.project.id)
        assert result is None
class TestResultRepository(Test):

    def setUp(self):
        super(TestResultRepository, self).setUp()
        self.result_repo = ResultRepository(db)

    def create_result(self, n_answers=1, filter_by=False):
        task = TaskFactory.create(n_answers=n_answers)
        TaskRunFactory.create(task=task)
        if filter_by:
            return self.result_repo.filter_by(project_id=1)
        else:
            return self.result_repo.get_by(project_id=1)


    def test_get_return_none_if_no_result(self):
        """Test get method returns None if there is no result with the
        specified id"""

        result = self.result_repo.get(2)

        assert result is None, result


    def test_get_returns_result(self):
        """Test get method returns a result if exists"""

        n_answers = 1

        task = TaskFactory.create(n_answers=n_answers)
        task_run = TaskRunFactory.create(task=task)

        result = self.result_repo.filter_by(project_id=1)


        err_msg = "There should be a result"
        assert len(result) == 1, err_msg
        result = result[0]
        assert result.project_id == 1, err_msg
        assert result.task_id == task.id, err_msg
        assert len(result.task_run_ids) == n_answers, err_msg
        err_msg = "The task_run id is missing in the results array"
        for tr_id in result.task_run_ids:
            assert tr_id == task_run.id, err_msg

    def test_get_by_returns_result(self):
        """Test get_by method returns a result if exists"""

        n_answers = 1

        task = TaskFactory.create(n_answers=n_answers)
        task_run = TaskRunFactory.create(task=task)

        result = self.result_repo.get_by(project_id=1)


        err_msg = "There should be a result"
        assert result.project_id == 1, err_msg
        assert result.task_id == task.id, err_msg
        assert len(result.task_run_ids) == n_answers, err_msg
        err_msg = "The task_run id is missing in the results array"
        for tr_id in result.task_run_ids:
            assert tr_id == task_run.id, err_msg


    def test_get_returns_result_after_increasig_redundancy(self):
        """Test get method returns a result if after increasing redundancy"""

        n_answers = 1

        task = TaskFactory.create(n_answers=n_answers)
        task_run = TaskRunFactory.create(task=task)

        result = self.result_repo.filter_by(project_id=1)

        err_msg = "There should be a result"
        assert len(result) == 1, err_msg
        result = result[0]
        assert result.project_id == 1, err_msg
        assert result.task_id == task.id, err_msg
        assert len(result.task_run_ids) == n_answers, err_msg
        err_msg = "The task_run id is missing in the results array"
        for tr_id in result.task_run_ids:
            assert tr_id == task_run.id, err_msg

        # Increase redundancy
        tmp = task_repo.get_task(task.id)
        tmp.n_answers = 2
        task_repo.update(task)

        err_msg = "There should be only one result"
        results = result_repo.filter_by(project_id=1)
        assert len(results) == 1, err_msg
        task_run_2 = TaskRunFactory.create(task=task)

        err_msg = "There should be 1 results"
        results = result_repo.filter_by(project_id=1)
        assert len(results) == 1, err_msg

        err_msg = "There should be 2 results"
        results = result_repo.filter_by(project_id=1, last_version=False)
        assert len(results) == 2, err_msg

        assert results[1].project_id == 1, err_msg
        assert results[1].task_id == task.id, err_msg
        err_msg = "First result should have only one task run ID"
        assert len(results[0].task_run_ids) == 1, err_msg
        err_msg = "Second result should have only two task run IDs"
        assert len(results[1].task_run_ids) == 2, err_msg
        err_msg = "The task_run id is missing in the results array"
        for tr_id in results[1].task_run_ids:
            assert tr_id in [task_run.id, task_run_2.id], err_msg


    def test_get_returns_no_result(self):
        """Test get method does not return a result if task not completed"""

        n_answers = 3

        task = TaskFactory.create(n_answers=n_answers)
        TaskRunFactory.create(task=task)

        result = self.result_repo.filter_by(project_id=1)

        err_msg = "There should not be a result"
        assert len(result) == 0, err_msg

    def test_update(self):
        """Test update persists the changes made to the result"""

        result = self.create_result()
        result.info = dict(new='value')

        self.result_repo.update(result)
        updated_result = self.result_repo.get(result.id)

        assert updated_result.info['new'] == 'value', updated_result


    def test_update_fails_if_integrity_error(self):
        """Test update raises a DBIntegrityError if the instance to be updated
        lacks a required value"""

        result = self.create_result()
        result.project_id = None

        assert_raises(DBIntegrityError, self.result_repo.update, result)


    def test_update_only_updates_results(self):
        """Test update raises a WrongObjectError when an object which is not
        a Result instance is updated"""

        bad_object = dict()

        assert_raises(WrongObjectError, self.result_repo.update, bad_object)