def test_min_match_not_satisfied(self): '''Tests returned values correct if there are some matching characters, but fewer than the minimum required ''' matches = spm.get_matches('PNF', 'SPLNFRT', 2) self.assertEqual(matches, []) matches = spm.get_matches('PNF', 'PTTP', 2) self.assertEqual(matches, [])
def test_min_match_satisfied(self): '''Tests returned value correct when minMatch > 1 ''' matches = spm.get_matches('PNF', 'PNF', 2) self.assertEqual(matches, [(0, 3)]) matches = spm.get_matches('AB', 'ABTABAA', 2) self.assertEqual(matches, [(0, 2), (3, 2)]) matches = spm.get_matches('AB', 'ATTABTTAJ', 2) self.assertEqual(matches, [(3, 2)]) matches = spm.get_matches('ABC', 'ATTABTTAJABC', 2) self.assertEqual(matches, [(3, 2), (9, 3)])
def score(self, query, matchString): '''Scores a text-based match between strings, giving matching beginnings high scores. Overrides ScoringMethod.score() Arguments: query -- the query string, e.g. 'lond' matchString -- UTF-8 string on which the query matched; e.g. 'london'. Returns: A score in range [0, 1] where 0 indicates no match at all (as determined by get_matches method in simpleprefixmatch.py), and 1 indicates a character-for-character exact match between query and matchString. Preconditions: -- All alphabetical characters in query and matchString are in the same case (all upper or all lower). ''' # Get location and number of character matches match = simpleprefixmatch.get_matches(query, matchString, 1) baseShift = self.baseShift substringBonus = self.subStringBonus exactMatchBonus = self.exactMatchBonus startMatchBonus = self.startMatchBonus # No match at all, return the lowest possible score. if (len(match) == 0): return 0.0 # Full, character-for-character match returns 1.0 if (matchString == query): return 1.0 # If the match is partial, the more characters match the better. # Single-character matches result in tonnes of results, so don't count them. matchLen = 0.0 fullSubString = False for tup in match: if (tup[1] > 1): matchLen = matchLen + tup[1] if (tup[1] >= len(query)): fullSubString = True # The greater portion of query matches the matchString, the better: baseScore = matchLen / len(matchString) # Since a raw score of 0 will result in a score of 0.5 once sigmoid applied, # shift the function to the right by a bit. # For reference, sigmoid(-2) is approx 0.12 baseScore = baseScore - baseShift # Give an additional bonus if the query string is a substring of matchString. # Note the query may occur multiple times within the matchString; # e.g. query = 'lon' and matchString = 'lonlon' if (fullSubString): baseScore = baseScore + substringBonus # Additional bonus if strings match exactly if (len(query) == len(matchString)): baseScore = baseScore + exactMatchBonus # We give bonus points for matching at the start of the matchString, # and for having a longer matching sequence if (match[0][0] == 0): baseScore = baseScore + startMatchBonus * match[0][1] return sigmoid(baseScore)
def test_empty_text(self): matches = spm.get_matches('adj', '') self.assertEqual(matches, [])
def test_empty_pattern(self): matches = spm.get_matches('', 'adj') self.assertEqual(matches, [])
def test_mixed(self): '''Multiple partial pattern matches in text''' matches = spm.get_matches('jam', 'ajajamkaj', 1) self.assertEqual(matches, [(1,2), (3, 3), (8, 1)])
def test_sequential(self): matches = spm.get_matches('lond', 'lonlon') self.assertEqual(matches, [(0, 3), (3, 3)])
def test_match_prefix(self): '''Pattern is a prefix of text''' matches = spm.get_matches('lond', 'londonderry') self.assertEqual(matches, [(0, 4)])
def test_match_suffix(self): '''Pattern is a suffix of text''' matches = spm.get_matches('shire', 'oxfordshire') self.assertEqual(matches, [(6, 5)])
def test_no_match(self): matches = spm.get_matches('london', 'sydney') self.assertEqual(matches, [])
def test_pattern_longer_none(self): matches = spm.get_matches('baroomfoom', 'zim') self.assertEqual(matches, [])
def test_pattern_longer_full(self): matches = spm.get_matches('sydn', 'syd') self.assertEqual(matches, [(0, 3)])
def test_pattern_longer_partial(self): matches = spm.get_matches('baroomfoom', 'cabar') self.assertEqual(matches, [(2, 3)])
def test_pattern_longer(self): '''Case in which pattern is longer than text, full match.''' matches = spm.get_matches('bambambam', 'bam') self.assertEqual(matches,[(0, 3)])