def test_tps():
    path = os.path.dirname(__file__)
    path = os.path.join( path, _TEST_DATA_ROOT, 'test' )
    text1 = open(os.path.join(path, 'nytimes-index-clean-visible.html')).read().decode('utf8')
    print hashlib.md5(repr(text1)).hexdigest(), tps(text1)
    text2 = open(os.path.join(path, 'nytimes-index-clean-visible-dup.html')).read().decode('utf8')
    print hashlib.md5(repr(text2)).hexdigest(), tps(text2)
Example #2
0
def test_tps(test_data_dir):
    path = os.path.join(test_data_dir, 'test')
    text1 = open(os.path.join(path, 'nytimes-index-clean-visible.html')).read().decode('utf8')
    print hashlib.md5(repr(text1)).hexdigest(), tps(text1)
    text2 = open(os.path.join(path, 'nytimes-index-clean-visible-dup.html')).read().decode('utf8')
    print hashlib.md5(repr(text2)).hexdigest(), tps(text2)