def testExact(self): """ When the target is the same as a word in the title, the title up to and including the target should be returned. """ self.assertEqual( 'Funny sea lion', simplifyTitle('Funny sea lion polyomavirus 1 CSL6994', 'lion'))
def testSuffix(self): """ When the target is a suffix, the title up to the target (including the whole word that has the suffix) should be returned. """ self.assertEqual( 'Funny sea lion polyomavirus', simplifyTitle('Funny sea lion polyomavirus 1 CSL6994', 'virus'))
def testContained(self): """ When the target is contained, the title up to the target (including the prefix of the word that has the target) should be returned. """ self.assertEqual( 'Funny sea lion polyoma', simplifyTitle('Funny sea lion polyomavirus 1 CSL6994', 'yoma'))
def accept(self, title): """ Return a value (see below) to indicate if a title is acceptable (and, if so, in what way). @param title: A C{str} sequence title. @return: An C{int} to indicate an acceptable title or not. This will be C{self.REJECT} if the title is unacceptable. C{self.WHITELIST_ACCEPT} if the title is whitelisted. C{self.DEFAULT_ACCEPT} if the title is acceptable by default. These three values are needed so our caller can distinguish between the two reasons for acceptance. """ if self._whitelist and title in self._whitelist: return self.WHITELIST_ACCEPT if self._blacklist and title in self._blacklist: return self.REJECT if self._positiveRegex and self._positiveRegex.search(title) is None: return self.REJECT if (self._negativeRegex and self._negativeRegex.search(title) is not None): return self.REJECT if self._truncated is not None: # Titles start with something like gi|525472786|emb|HG313807.1| # that we need to skip. titleSansId = title.split(' ', 1)[1] truncated = simplifyTitle(titleSansId, self._truncateAfter) if truncated in self._truncated: # We've already seen this (truncated) title. Reject unless # this is the original title that we truncated to make this # entry. That title must continue to be accepted. if self._truncated[truncated] == title: return self.DEFAULT_ACCEPT else: return self.REJECT else: self._truncated[truncated] = title return self.DEFAULT_ACCEPT
def accept(self, title): """ Return a value (see below) to indicate if a title is acceptable (and, if so, in what way). @param title: A C{str} sequence title. @return: An C{int} to indicate an acceptable title or not. This will be C{self.REJECT} if the title is unacceptable. C{self.WHITELIST_ACCEPT} if the title is whitelisted. C{self.DEFAULT_ACCEPT} if the title is acceptable by default. These three values are needed so our caller can distinguish between the two reasons for acceptance. """ if self._whitelist and title in self._whitelist: return self.WHITELIST_ACCEPT if self._blacklist and title in self._blacklist: return self.REJECT # If we have a positive regex but we don't match it, reject. if self._positiveRegex and self._positiveRegex.search(title) is None: return self.REJECT # If we have a negative regex and we do match it, reject. if (self._negativeRegex and self._negativeRegex.search(title) is not None): return self.REJECT if self._truncated is not None: truncated = simplifyTitle(title, self._truncateAfter) if truncated in self._truncated: # We've already seen this (truncated) title. Reject unless # this is the original title that we truncated to make this # entry. That title must continue to be accepted. if self._truncated[truncated] == title: return self.DEFAULT_ACCEPT else: return self.REJECT else: self._truncated[truncated] = title return self.DEFAULT_ACCEPT
def testEmptyTitle(self): """ Simplifying an empty title with a non-empty target should return an empty title. """ self.assertEqual('', simplifyTitle('', 'xxx'))
def testEmtpyTitleWithEmptyTarget(self): """ Simplifying an empty title should return an empty title. """ self.assertEqual('', simplifyTitle('', ''))