def testExact(self):
     """
     When the target is the same as a word in the title, the title up to
     and including the target should be returned.
     """
     self.assertEqual(
         'Funny sea lion',
         simplifyTitle('Funny sea lion polyomavirus 1 CSL6994', 'lion'))
 def testSuffix(self):
     """
     When the target is a suffix, the title up to the target (including the
     whole word that has the suffix) should be returned.
     """
     self.assertEqual(
         'Funny sea lion polyomavirus',
         simplifyTitle('Funny sea lion polyomavirus 1 CSL6994', 'virus'))
 def testContained(self):
     """
     When the target is contained, the title up to the target (including the
     prefix of the word that has the target) should be returned.
     """
     self.assertEqual(
         'Funny sea lion polyoma',
         simplifyTitle('Funny sea lion polyomavirus 1 CSL6994', 'yoma'))
Exemple #4
0
 def testContained(self):
     """
     When the target is contained, the title up to the target (including the
     prefix of the word that has the target) should be returned.
     """
     self.assertEqual(
         'Funny sea lion polyoma',
         simplifyTitle('Funny sea lion polyomavirus 1 CSL6994', 'yoma'))
Exemple #5
0
 def testExact(self):
     """
     When the target is the same as a word in the title, the title up to
     and including the target should be returned.
     """
     self.assertEqual(
         'Funny sea lion',
         simplifyTitle('Funny sea lion polyomavirus 1 CSL6994', 'lion'))
Exemple #6
0
 def testSuffix(self):
     """
     When the target is a suffix, the title up to the target (including the
     whole word that has the suffix) should be returned.
     """
     self.assertEqual(
         'Funny sea lion polyomavirus',
         simplifyTitle('Funny sea lion polyomavirus 1 CSL6994', 'virus'))
Exemple #7
0
    def accept(self, title):
        """
        Return a value (see below) to indicate if a title is acceptable (and,
        if so, in what way).

        @param title: A C{str} sequence title.
        @return: An C{int} to indicate an acceptable title or not. This will be

            C{self.REJECT} if the title is unacceptable.
            C{self.WHITELIST_ACCEPT} if the title is whitelisted.
            C{self.DEFAULT_ACCEPT} if the title is acceptable by default.

            These three values are needed so our caller can distinguish between
            the two reasons for acceptance.
        """
        if self._whitelist and title in self._whitelist:
            return self.WHITELIST_ACCEPT

        if self._blacklist and title in self._blacklist:
            return self.REJECT

        if self._positiveRegex and self._positiveRegex.search(title) is None:
            return self.REJECT

        if (self._negativeRegex and
                self._negativeRegex.search(title) is not None):
            return self.REJECT

        if self._truncated is not None:
            # Titles start with something like gi|525472786|emb|HG313807.1|
            # that we need to skip.
            titleSansId = title.split(' ', 1)[1]
            truncated = simplifyTitle(titleSansId, self._truncateAfter)
            if truncated in self._truncated:
                # We've already seen this (truncated) title. Reject unless
                # this is the original title that we truncated to make this
                # entry. That title must continue to be accepted.
                if self._truncated[truncated] == title:
                    return self.DEFAULT_ACCEPT
                else:
                    return self.REJECT
            else:
                self._truncated[truncated] = title

        return self.DEFAULT_ACCEPT
Exemple #8
0
    def accept(self, title):
        """
        Return a value (see below) to indicate if a title is acceptable (and,
        if so, in what way).

        @param title: A C{str} sequence title.
        @return: An C{int} to indicate an acceptable title or not. This will be

            C{self.REJECT} if the title is unacceptable.
            C{self.WHITELIST_ACCEPT} if the title is whitelisted.
            C{self.DEFAULT_ACCEPT} if the title is acceptable by default.

            These three values are needed so our caller can distinguish between
            the two reasons for acceptance.
        """
        if self._whitelist and title in self._whitelist:
            return self.WHITELIST_ACCEPT

        if self._blacklist and title in self._blacklist:
            return self.REJECT

        # If we have a positive regex but we don't match it, reject.
        if self._positiveRegex and self._positiveRegex.search(title) is None:
            return self.REJECT

        # If we have a negative regex and we do match it, reject.
        if (self._negativeRegex and
                self._negativeRegex.search(title) is not None):
            return self.REJECT

        if self._truncated is not None:
            truncated = simplifyTitle(title, self._truncateAfter)
            if truncated in self._truncated:
                # We've already seen this (truncated) title. Reject unless
                # this is the original title that we truncated to make this
                # entry. That title must continue to be accepted.
                if self._truncated[truncated] == title:
                    return self.DEFAULT_ACCEPT
                else:
                    return self.REJECT
            else:
                self._truncated[truncated] = title

        return self.DEFAULT_ACCEPT
Exemple #9
0
    def accept(self, title):
        """
        Return a value (see below) to indicate if a title is acceptable (and,
        if so, in what way).

        @param title: A C{str} sequence title.
        @return: An C{int} to indicate an acceptable title or not. This will be

            C{self.REJECT} if the title is unacceptable.
            C{self.WHITELIST_ACCEPT} if the title is whitelisted.
            C{self.DEFAULT_ACCEPT} if the title is acceptable by default.

            These three values are needed so our caller can distinguish between
            the two reasons for acceptance.
        """
        if self._whitelist and title in self._whitelist:
            return self.WHITELIST_ACCEPT

        if self._blacklist and title in self._blacklist:
            return self.REJECT

        # If we have a positive regex but we don't match it, reject.
        if self._positiveRegex and self._positiveRegex.search(title) is None:
            return self.REJECT

        # If we have a negative regex and we do match it, reject.
        if (self._negativeRegex and
                self._negativeRegex.search(title) is not None):
            return self.REJECT

        if self._truncated is not None:
            truncated = simplifyTitle(title, self._truncateAfter)
            if truncated in self._truncated:
                # We've already seen this (truncated) title. Reject unless
                # this is the original title that we truncated to make this
                # entry. That title must continue to be accepted.
                if self._truncated[truncated] == title:
                    return self.DEFAULT_ACCEPT
                else:
                    return self.REJECT
            else:
                self._truncated[truncated] = title

        return self.DEFAULT_ACCEPT
 def testEmptyTitle(self):
     """
     Simplifying an empty title with a non-empty target should return
     an empty title.
     """
     self.assertEqual('', simplifyTitle('', 'xxx'))
 def testEmtpyTitleWithEmptyTarget(self):
     """
     Simplifying an empty title should return an empty title.
     """
     self.assertEqual('', simplifyTitle('', ''))
Exemple #12
0
 def testEmtpyTitleWithEmptyTarget(self):
     """
     Simplifying an empty title should return an empty title.
     """
     self.assertEqual('', simplifyTitle('', ''))
Exemple #13
0
 def testEmptyTitle(self):
     """
     Simplifying an empty title with a non-empty target should return
     an empty title.
     """
     self.assertEqual('', simplifyTitle('', 'xxx'))