def test_check_for_duplicates(self):
        """Look for similar titles."""
        title = 'a lepton qed of colliders or interactions with strong field' \
                ' electron laser'
        event = SetTitle(creator=self.creator, title=title)

        before = copy.deepcopy(self.submission)
        after = copy.deepcopy(self.submission)
        after.metadata = SubmissionMetadata(title=title)

        trigger = Trigger(event=event,
                          actor=self.creator,
                          before=before,
                          after=after,
                          params={'TITLE_SIMILARITY_THRESHOLD': 0.7})
        events = []
        self.process.check_for_duplicates(titles.TITLES, trigger,
                                          events.append)
        self.assertGreater(len(events), 0, "Generates some events")
        for event in events:
            self.assertIsInstance(event, AddMetadataFlag,
                                  "Generates AddMetadataFlag events")
            self.assertEqual(event.flag_type,
                             MetadataFlag.Type.POSSIBLE_DUPLICATE_TITLE,
                             "Flag has type POSSIBLE_DUPLICATE_TITLE")
    def test_check_with_existing_flags(self):
        """The submission already has possible dupe title flags."""
        title = 'a lepton qed of colliders or interactions with strong field' \
                ' electron laser'
        self.submission.flags['asdf1234'] = MetadataFlag(
            event_id='asdf1234',
            creator=self.creator,
            created=datetime.now(UTC),
            flag_type=MetadataFlag.Type.POSSIBLE_DUPLICATE_TITLE,
            flag_data={
                'id': 5,
                'title': title,
                'owner': self.creator
            },
            field='title',
            comment='possible duplicate title')
        event = SetTitle(creator=self.creator, title=title)

        before = copy.deepcopy(self.submission)
        after = copy.deepcopy(self.submission)
        after.metadata = SubmissionMetadata(title=title)

        trigger = Trigger(event=event,
                          actor=self.creator,
                          before=before,
                          after=after,
                          params={'TITLE_SIMILARITY_THRESHOLD': 1.0})
        events = []
        self.process.check_for_duplicates(titles.TITLES, trigger,
                                          events.append)
        self.assertGreater(len(events), 0, "Generates some events")
        self.assertIsInstance(events[0], RemoveFlag)
        self.assertEqual(events[0].flag_id, 'asdf1234')
    def test_clear_previous_tags(self):
        """There were some previous flags."""
        self.submission.flags['asdf1234'] = MetadataFlag(
            event_id='asdf1234',
            creator=self.creator,
            created=datetime.now(UTC),
            flag_type=MetadataFlag.Type.CHARACTER_SET,
            flag_data={'ascii': 0},
            field='title',
            comment='something fishy')
        before = copy.deepcopy(self.submission)
        title = 'A boring title with occâsional non-ASCII characters'
        self.submission.metadata = SubmissionMetadata(title=title)
        event = SetTitle(creator=self.creator, title=title)
        trigger = Trigger(event=event,
                          actor=self.creator,
                          before=before,
                          after=self.submission,
                          params={'METADATA_ASCII_THRESHOLD': 0.1})

        events = []
        self.process.check_title(None, trigger, events.append)
        self.assertGreater(len(events), 0, "Generates some events")
        self.assertIsInstance(events[0], RemoveFlag)
        self.assertEqual(events[0].flag_id, 'asdf1234')
    def test_plenty_of_ascii(self):
        """Title has very planty of ASCII characters."""
        before = copy.deepcopy(self.submission)
        title = 'A boring title with occâsional non-ASCII characters'
        self.submission.metadata = SubmissionMetadata(title=title)
        event = SetTitle(creator=self.creator, title=title)
        trigger = Trigger(event=event,
                          actor=self.creator,
                          before=before,
                          after=self.submission,
                          params={'METADATA_ASCII_THRESHOLD': 0.1})

        events = []
        self.process.check_title(None, trigger, events.append)
        self.assertEqual(len(events), 0, 'No flags generated')
    def test_low_ascii(self):
        """Title has very few ASCII characters."""
        before = copy.deepcopy(self.submission)
        title = 'ⓕöö tïtłę'
        self.submission.metadata = SubmissionMetadata(title=title)
        event = SetTitle(creator=self.creator, title=title)
        trigger = Trigger(event=event,
                          actor=self.creator,
                          before=before,
                          after=self.submission,
                          params={'METADATA_ASCII_THRESHOLD': 0.5})

        events = []
        self.process.check_title(None, trigger, events.append)
        self.assertIsInstance(events[0], AddMetadataFlag, 'Adds metadata flag')
        self.assertEqual(events[0].flag_type, MetadataFlag.Type.CHARACTER_SET)
        self.assertEqual(events[0].field, 'title')
        self.assertEqual(events[0].flag_data['ascii'], 3 / 9)
    def test_check_for_duplicates_with_strict_threshold(self):
        """Look for similar titles with an impossibly strict threshold."""
        title = 'a lepton qed of colliders or interactions with strong field' \
                ' electron laser'
        event = SetTitle(creator=self.creator, title=title)
        before = copy.deepcopy(self.submission)
        after = copy.deepcopy(self.submission)
        after.metadata = SubmissionMetadata(title=title)

        trigger = Trigger(event=event,
                          actor=self.creator,
                          before=before,
                          after=after,
                          params={'TITLE_SIMILARITY_THRESHOLD': 1.0})
        events = []
        self.process.check_for_duplicates(titles.TITLES, trigger,
                                          events.append)
        self.assertEqual(len(events), 0)
    def test_check_similar_titles(self, mock_get_titles):
        """Check for similar titles."""
        mock_get_titles.return_value = titles.TITLES

        user_id = 54321
        title = 'a lepton qed of colliders or interactions with strong field' \
                ' electron laser'
        event = SetTitle(creator=self.creator, title=title)
        before = copy.deepcopy(self.submission)
        after = copy.deepcopy(self.submission)
        after.metadata = SubmissionMetadata(title=title)
        events = []

        trigger = Trigger(event=event,
                          actor=self.creator,
                          before=before,
                          after=after,
                          params={'TITLE_SIMILARITY_WINDOW': 60})

        some_titles = self.process.get_candidates(None, trigger, events.append)

        self.assertEqual(len(some_titles), len(titles.TITLES))
        self.assertEqual(mock_get_titles.call_count, 1)
        self.assertIsInstance(mock_get_titles.call_args[0][0], datetime)