def test(self): from highlighter import score_index from highlighter import build_stem_index from highlighter import tokenize from highlighter import english_suffix_stemmer as stemmer from highlighter import build_scorecard regex = re.compile(r'(\W)') doc = "This ham sammy is the bomb!" tokens = tokenize(doc, regex) stem_lookup = dict((token, stemmer(token)) for token in tokens) stem_index = build_stem_index(tokens, stem_lookup) scorecard = build_scorecard(doc, 0.0) score_index(stem_index, scorecard, lambda x: 3.3) assert scorecard == [3.3, 3.3, 3.3, 0.0, 0.0, 3.3, 3.3, 3.3, 0.0, 3.3, 3.3, 3.3, 3.3, 3.3, 0.0, 0.0, 0.0, 0.0, 3.3, 3.3, 3.3, 0.0, 3.3, 3.3, 3.3, 3.3, 3.3] score_index(stem_index, scorecard, lambda x: 1.0 if len(x) > 1 else 0.0) assert scorecard == [4.3, 4.3, 4.3, 0.0, 0.0, 4.3, 4.3, 4.3, 0.0, 4.3, 4.3, 4.3, 4.3, 4.3, 0.0, 0.0, 0.0, 0.0, 4.3, 4.3, 4.3, 0.0, 4.3, 4.3, 4.3, 4.3, 3.3]
def test(self): from highlighter import build_stem_index from highlighter import tokenize from highlighter import english_suffix_stemmer as stemmer regex = re.compile(' ') doc = "This ham sammy is the bomb!" tokens = tokenize(doc, regex) stem_lookup = dict((token, stemmer(token)) for token in tokens) assert build_stem_index(None, None) == {} assert build_stem_index('', None) == {} assert build_stem_index(tokens, None) == {} assert build_stem_index(tokens, {'This': 'dis'}) == {'dis': [(0, 4)]} assert build_stem_index(tokens, stem_lookup) == {'the': [(14, 17)], 'sammy': [(7, 12)], 'bomb!': [(17, 22)], 'ham': [(4, 7)], 'thi': [(0, 4)]}
def test(self): from highlighter import english_suffix_stemmer as stemmer assert stemmer(None) == '' assert stemmer(()) == '' assert stemmer([]) == '' assert stemmer({}) == '' assert stemmer('') == '' assert stemmer(' ') == '' assert stemmer('hello world') == 'hello world' assert stemmer('HELLO world') == 'hello world' assert stemmer('ly') == '' assert stemmer('created') == 'creat' assert stemmer('bamboozled') == 'bamboozl' assert stemmer(u'☃') == u'☃' assert stemmer(u'Yelp是涼爽') == u'yelp是涼爽'