Esempio n. 1
0
 def test_min_match_not_satisfied(self):
     '''Tests returned values correct if there are some matching characters,
     but fewer than the minimum required
     '''
     
     matches = spm.get_matches('PNF', 'SPLNFRT', 2)
     self.assertEqual(matches, [])
     
     matches = spm.get_matches('PNF', 'PTTP', 2)
     self.assertEqual(matches, [])
Esempio n. 2
0
 def test_min_match_satisfied(self):
     '''Tests returned value correct when minMatch > 1
     '''
     matches = spm.get_matches('PNF', 'PNF', 2)
     self.assertEqual(matches, [(0, 3)])
     matches = spm.get_matches('AB', 'ABTABAA', 2)
     self.assertEqual(matches, [(0, 2), (3, 2)])
     matches = spm.get_matches('AB', 'ATTABTTAJ', 2)
     self.assertEqual(matches, [(3, 2)])
     matches = spm.get_matches('ABC', 'ATTABTTAJABC', 2)
     self.assertEqual(matches, [(3, 2), (9, 3)])
    def score(self, query, matchString):
        '''Scores a text-based match between strings, giving matching beginnings high scores.
        
        Overrides ScoringMethod.score()
        
        Arguments:
            query -- the query string, e.g. 'lond'
            matchString -- UTF-8 string on which the query matched; e.g. 'london'. 
                    
        Returns: 
            A score in range [0, 1] where 0 indicates no match at all (as determined by
            get_matches method in simpleprefixmatch.py), and 1 indicates 
            a character-for-character exact match between query and matchString. 

        Preconditions: 
            -- All alphabetical characters in query and matchString are in the 
               same case (all upper or all lower). 
        '''

        # Get location and number of character matches
        match = simpleprefixmatch.get_matches(query, matchString, 1)
        baseShift = self.baseShift
        substringBonus = self.subStringBonus
        exactMatchBonus = self.exactMatchBonus
        startMatchBonus = self.startMatchBonus

        # No match at all, return the lowest possible score.
        if (len(match) == 0):
            return 0.0

        # Full, character-for-character match returns 1.0
        if (matchString == query):
            return 1.0

        # If the match is partial, the more characters match the better.
        # Single-character matches result in tonnes of results, so don't count them.
        matchLen = 0.0
        fullSubString = False
        for tup in match:
            if (tup[1] > 1):
                matchLen = matchLen + tup[1]
                if (tup[1] >= len(query)):
                    fullSubString = True

        # The greater portion of query matches the matchString, the better:
        baseScore = matchLen / len(matchString)

        # Since a raw score of 0 will result in a score of 0.5 once sigmoid applied,
        # shift the function to the right by a bit.
        # For reference, sigmoid(-2) is approx 0.12
        baseScore = baseScore - baseShift

        # Give an additional bonus if the query string is a substring of matchString.
        # Note the query may occur multiple times within the matchString;
        # e.g. query = 'lon' and matchString = 'lonlon'
        if (fullSubString):
            baseScore = baseScore + substringBonus
            # Additional bonus if strings match exactly
            if (len(query) == len(matchString)):
                baseScore = baseScore + exactMatchBonus

        # We give bonus points for matching at the start of the matchString,
        # and for having a longer matching sequence
        if (match[0][0] == 0):
            baseScore = baseScore + startMatchBonus * match[0][1]

        return sigmoid(baseScore)
Esempio n. 4
0
 def test_empty_text(self):
     matches = spm.get_matches('adj', '')
     self.assertEqual(matches, [])
Esempio n. 5
0
 def test_empty_pattern(self):
     matches = spm.get_matches('', 'adj')
     self.assertEqual(matches, [])
Esempio n. 6
0
 def test_mixed(self):
     '''Multiple partial pattern matches in text'''
     matches = spm.get_matches('jam', 'ajajamkaj', 1)
     self.assertEqual(matches, [(1,2), (3, 3), (8, 1)])
Esempio n. 7
0
 def test_sequential(self):
     matches = spm.get_matches('lond', 'lonlon')
     self.assertEqual(matches, [(0, 3), (3, 3)])
Esempio n. 8
0
 def test_match_prefix(self):
     '''Pattern is a prefix of text''' 
     matches = spm.get_matches('lond', 'londonderry') 
     self.assertEqual(matches, [(0, 4)])
Esempio n. 9
0
 def test_match_suffix(self):
     '''Pattern is a suffix of text'''
     matches = spm.get_matches('shire', 'oxfordshire')
     self.assertEqual(matches, [(6, 5)]) 
Esempio n. 10
0
 def test_no_match(self):
     matches = spm.get_matches('london', 'sydney') 
     self.assertEqual(matches, [])
Esempio n. 11
0
 def test_pattern_longer_none(self):
     matches = spm.get_matches('baroomfoom', 'zim')
     self.assertEqual(matches, [])
Esempio n. 12
0
 def test_pattern_longer_full(self):
     matches = spm.get_matches('sydn', 'syd')
     self.assertEqual(matches, [(0, 3)])
Esempio n. 13
0
 def test_pattern_longer_partial(self):
     matches = spm.get_matches('baroomfoom', 'cabar')
     self.assertEqual(matches, [(2, 3)])
Esempio n. 14
0
 def test_pattern_longer(self):
     '''Case in which pattern is longer than text, full match.'''
     matches = spm.get_matches('bambambam', 'bam')
     self.assertEqual(matches,[(0, 3)])