Beispiel #1
 def __init__(self, text):
     self.text = force_unicode(text) # this will get modified by some functions
     self.original_text = self.text # this never changes; use it as a fallback or for comparison
     self.has_been_linked = False
     self.tweet_length = None # gets changed by validation method
     self.tweet_is_valid = None # gets changed by validation method
     self.validation_error = None # gets changed by validation method
Beispiel #2
 def __init__(self, text):
     self.text = force_unicode(
         text)  # this will get modified by some functions
     self.original_text = self.text  # this never changes; use it as a fallback or for comparison
     self.has_been_linked = False
     self.tweet_length = None  # gets changed by validation method
     self.tweet_is_valid = None  # gets changed by validation method
     self.validation_error = None  # gets changed by validation method
Beispiel #3
 def __init__(self, text, **kwargs):
     self.text = force_unicode(text)
     self.parent = kwargs.get('parent', False)
Beispiel #4
        assert result == test.get('expected'), error(u'\nTest %d Failed: %s%s' % (attempted, test.get('description'), u'\n%s' % test.get('hits') if test.get('hits') else ''))
        passed += 1
        sys.stdout.write(success(u'Test %d Passed: %s' % (attempted, test.get('description'))))
        sys.stdout.write(error(u'Test %d Failed: %s' % (attempted, test.get('description'))))
        sys.stdout.write(error(u' - with: %s' % test.get('text')))
        sys.stdout.write(error(u' - expected: %s' % test.get('expected')))
        sys.stdout.write(error(u' - got: %s' % result))
        failed += 1


# extractor section
extractor_file = open(os.path.join('twitter-text-conformance', 'extract.yml'), 'r')
extractor_tests = yaml.load(force_unicode(

sys.stdout.write('Testing Extractor\n')

for section in extractor_tests.get('tests'):
    sys.stdout.write('\nTesting Extractor: %s\n' % section)
    for test in extractor_tests.get('tests').get(section):
        if (args.ignore_narrow_errors or narrow_build) and section in ['hashtags'] and test.get('description') in ['Hashtag with ideographic iteration mark']:
            sys.stdout.write('Skipping: %s\n' % test.get('description'))
        extractor = twitter_text.extractor.Extractor(test.get('text'))
        if section == 'mentions':
Beispiel #5
            error(u'Test %d Failed: %s' %
                  (attempted, test.get('description'))))
        sys.stdout.write(error(u' - with: %s' % test.get('text')))
        sys.stdout.write(error(u' - expected: %s' % test.get('expected')))
        sys.stdout.write(error(u' - got: %s' % result))
        failed += 1


# extractor section
extractor_file = open(os.path.join('twitter-text-conformance', 'extract.yml'),
extractor_tests = yaml.load(force_unicode(

sys.stdout.write('Testing Extractor\n')

for section in extractor_tests.get('tests'):
    sys.stdout.write('\nTesting Extractor: %s\n' % section)
    for test in extractor_tests.get('tests').get(section):
        if (args.ignore_narrow_errors or narrow_build) and section in [
        ] and test.get('description') in [
                'Hashtag with ideographic iteration mark'
            sys.stdout.write('Skipping: %s\n' % test.get('description'))
Beispiel #6
 def __init__(self, text):
     self.text = force_unicode(text)
Beispiel #7
 def __init__(self, text, **kwargs):
     self.text = force_unicode(text)
     self.parent = kwargs.get('parent', False)
     self.extractor = Extractor(self.text)
Beispiel #8
 def __init__(self, text):
     self.text = force_unicode(text)
Beispiel #9
def validation_tests(tests, passed, failed):
    print u'Running Validation tests'
    validation = twitter_text.Validation(text)
    if tt.validation.tweet_length() == len(text):
        print u'\033[92m  Attached tweet_length passed\033[0m'
        passed += 1
        print u'\033[91m  Attached tweet_length failed:\033[0m'
        print u'    Expected: %d' % len(text)
        print u'    Returned: %d' % tt.validation.tweet_length()
        failed += 1
    tests += 1

    if validation.tweet_length() == len(text):
        print u'\033[92m  Stand alone tweet_length passed\033[0m'
        passed += 1
        print u'\033[91m  Stand alone tweet_length failed:\033[0m'
        print u'    Expected: %d' % len(text)
        print u'    Returned: %d' % validation.tweet_length()
        failed += 1
    tests += 1

    if tt.validation.tweet_invalid() == (False, None):
        print u'\033[92m  Attached tweet_invalid passed\033[0m'
        passed += 1
        print u'\033[91m  Attached tweet_invalid failed:\033[0m'
        print u'    Expected: %s' % 'False, None'
        print u'    Returned: %s, %s' % tt.validation.tweet_invalid()
        failed += 1
    tests += 1

    if validation.tweet_invalid() == (False, None):
        print u'\033[92m  Stand alone tweet_invalid passed\033[0m'
        passed += 1
        print u'\033[91m  Stand alone tweet_invalid failed:\033[0m'
        print u'    Expected: %s' % 'False, None'
        print u'    Returned: %s, %s' % validation.tweet_invalid()
        failed += 1
    tests += 1
    print ''
    print u'Running Validation tests on bad text'
    bad_tweets = {
        'empty': u'',
        'too_long': text + text,
        'invalid_characters': text
    this_tt = twitter_text.TwitterText(bad_tweets['empty'])
    if this_tt.validation.tweet_invalid() == (True, 'Empty text'):
        print u'\033[92m  Empty Text tweet_invalid passed\033[0m'
        passed += 1
        print u'\033[91m  Empty Text tweet_invalid failed:\033[0m'
        print u'    Expected: %s' % 'True, Empty text'
        print u'    Returned: %s, %s' % validation.tweet_invalid()
        failed += 1
    tests += 1
    this_tt = twitter_text.TwitterText(bad_tweets['too_long'])
    if this_tt.validation.tweet_invalid() == (True, 'Too long'):
        print u'\033[92m  Too Long tweet_invalid passed\033[0m'
        passed += 1
        print u'\033[91m  Too Long tweet_invalid failed:\033[0m'
        print u'    Expected: %s' % 'True, Too long'
        print u'    Returned: %s, %s' % validation.tweet_invalid()
        failed += 1
    tests += 1
    missed_bad_characters = []
    for bad_character in validation.INVALID_CHARACTERS:
        this_text = force_unicode(bad_tweets['invalid_characters'] + bad_character)
        this_tt = twitter_text.TwitterText(this_text)
        if not this_tt.validation.tweet_invalid() == (True, 'Invalid characters'):
    if not len(missed_bad_characters):
        print u'\033[92m  Invalid Characters tweet_invalid passed\033[0m'
        passed += 1
        print u'\033[91m  Invalid Characters tweet_invalid failed:\033[0m'
        print u'    Expected: %s' % '[]'
        print u'    Returned: %s' % force_unicode(missed_bad_characters)
        failed += 1
    tests += 1
    return tests, passed, failed
Beispiel #10
def extractor_tests(tests, passed, failed):
    print u'Running Extractor tests'

    correct_mentioned_screen_names = [u'foo', u'monkeybat', u'bar']
    correct_mentioned_screen_names_with_indices = [{'indices': (0, 4), 'screen_name': u'foo'}, {'indices': (32, 42), 'screen_name': u'monkeybat'}, {'indices': (47, 51), 'screen_name': u'bar'}]
    correct_reply_screen_name = 'foo'
    correct_urls = [u'']
    correct_urls_with_indices = [{'url': u'', 'indices': (52, 90)}]
    correct_hashtags = [u'comedy', u'url']
    correct_hashtags_with_indices = [{'indices': (91, 98), 'hashtag': u'comedy'}, {'indices': (99, 103), 'hashtag': u'url'}]

    extractor = twitter_text.Extractor(text)
    if tt.extractor.extract_mentioned_screen_names() == correct_mentioned_screen_names:
        print u'\033[92m  Attached extract_mentioned_screen_names passed\033[0m'
        passed += 1
        print u'\033[91m  Attached extract_mentioned_screen_names failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_mentioned_screen_names)
        print u'    Returned: %s' % force_unicode(tt.extractor.extract_mentioned_screen_names())
        failed +=1
    tests +=1

    if extractor.extract_mentioned_screen_names() == correct_mentioned_screen_names:
        print u'\033[92m  Stand alone extract_mentioned_screen_names passed\033[0m'
        passed += 1
        print u'\033[91m  Stand alone extract_mentioned_screen_names failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_mentioned_screen_names)
        print u'    Returned: %s' % force_unicode(extractor.extract_mentioned_screen_names())
        failed +=1
    tests +=1

    if tt.extractor.extract_mentioned_screen_names_with_indices() == correct_mentioned_screen_names_with_indices:
        print u'\033[92m  Attached extract_mentioned_screen_names_with_indices passed\033[0m'
        passed += 1
        print u'\033[91m  Attached extract_mentioned_screen_names_with_indices failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_mentioned_screen_names_with_indices)
        print u'    Returned: %s' % force_unicode(tt.extractor.extract_mentioned_screen_names_with_indices())
        failed += 1
    tests += 1

    if extractor.extract_mentioned_screen_names_with_indices() == correct_mentioned_screen_names_with_indices:
        print u'\033[92m  Stand alone extract_mentioned_screen_names_with_indices passed\033[0m'
        passed += 1
        print u'\033[91m  Stand alone extract_mentioned_screen_names_with_indices failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_mentioned_screen_names_with_indices)
        print u'    Returned: %s' % force_unicode(extractor.extract_mentioned_screen_names_with_indices())
        failed += 1
    tests += 1

    if tt.extractor.extract_reply_screen_name() == correct_reply_screen_name:
        print u'\033[92m  Attached extract_reply_screen_name passed\033[0m'
        passed += 1
        print u'\033[91m  Attached extract_reply_screen_name failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_reply_screen_name)
        print u'    Returned: %s' % force_unicode(tt.extractor.extract_reply_screen_name())
        failed +=1
    tests +=1

    if extractor.extract_reply_screen_name() == correct_reply_screen_name:
        print u'\033[92m  Stand alone extract_reply_screen_name passed\033[0m'
        passed += 1
        print u'\033[91m  Stand alone extract_reply_screen_name failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_reply_screen_name)
        print u'    Returned: %s' % force_unicode(extractor.extract_reply_screen_name())
        failed +=1
    tests +=1

    if tt.extractor.extract_urls() == correct_urls:
        print u'\033[92m  Attached extract_urls passed\033[0m'
        passed += 1
        print u'\033[91m  Attached extract_urls failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_urls)
        print u'    Returned: %s' % force_unicode(tt.extractor.extract_urls())
        failed +=1
    tests +=1

    if extractor.extract_urls() == correct_urls:
        print u'\033[92m  Stand alone extract_urls passed\033[0m'
        passed += 1
        print u'\033[91m  Stand alone extract_urls failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_urls)
        print u'    Returned: %s' % force_unicode(extractor.extract_urls())
        failed +=1
    tests +=1

    if tt.extractor.extract_urls_with_indices() == correct_urls_with_indices:
        print u'\033[92m  Attached extract_urls_with_indices passed\033[0m'
        passed += 1
        print u'\033[91m  Attached extract_urls_with_indices failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_urls_with_indices)
        print u'    Returned: %s' % force_unicode(tt.extractor.extract_urls_with_indices())
        failed += 1
    tests += 1

    if extractor.extract_urls_with_indices() == correct_urls_with_indices:
        print u'\033[92m  Stand alone extract_urls_with_indices passed\033[0m'
        passed += 1
        print u'\033[91m  Stand alone extract_urls_with_indices failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_urls_with_indices)
        print u'    Returned: %s' % force_unicode(extractor.extract_urls_with_indices())
        failed += 1
    tests += 1

    if tt.extractor.extract_hashtags() == correct_hashtags:
        print u'\033[92m  Attached extract_hashtags passed\033[0m'
        passed += 1
        print u'\033[91m  Attached extract_hashtags failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_hashtags)
        print u'    Returned: %s' % force_unicode(tt.extractor.extract_hashtags())
        failed +=1
    tests +=1

    if extractor.extract_hashtags() == correct_hashtags:
        print u'\033[92m  Stand alone extract_hashtags passed\033[0m'
        passed += 1
        print u'\033[91m  Stand alone extract_hashtags failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_hashtags)
        print u'    Returned: %s' % force_unicode(extractor.extract_hashtags())
        failed +=1
    tests +=1

    if tt.extractor.extract_hashtags_with_indices() == correct_hashtags_with_indices:
        print u'\033[92m  Attached extract_hashtags_with_indices passed\033[0m'
        passed += 1
        print u'\033[91m  Attached extract_hashtags_with_indices failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_hashtags_with_indices)
        print u'    Returned: %s' % force_unicode(tt.extractor.extract_hashtags_with_indices())
        failed += 1
    tests += 1
    if extractor.extract_hashtags_with_indices() == correct_hashtags_with_indices:
        print u'\033[92m  Stand alone extract_hashtags_with_indices passed\033[0m'
        passed += 1
        print u'\033[91m  Stand alone extract_hashtags_with_indices failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_hashtags_with_indices)
        print u'    Returned: %s' % force_unicode(extractor.extract_hashtags_with_indices())
        failed += 1
    tests += 1
    return tests, passed, failed
Beispiel #11
# encoding=utf-8

import twitter_text
from twitter_text.unicode import force_unicode

text = force_unicode('@foo said the funniest thing to @monkeybat and @bar #comedy #url')
tt = twitter_text.TwitterText(text)

def autolink_tests(tests, passed, failed):
    print u'Running Autolink tests'

    correct_auto_link = u'<a class="tweet-url username" href="" rel="nofollow">@foo</a> said the funniest thing to <a class="tweet-url username" href="" rel="nofollow">@monkeybat</a> and <a class="tweet-url username" href="" rel="nofollow">@bar</a> <a href="" rel="nofollow">…</a> <a href="" title="#comedy" class="tweet-url hashtag" rel="nofollow">#comedy</a> <a href="" title="#url" class="tweet-url hashtag" rel="nofollow">#url</a>'
    correct_auto_link_with_hit_highlight = u'<a class="tweet-url username" href="" rel="nofollow">@foo</a> said the <em class="search-hit">funniest</em> thing to <a class="tweet-url username" href="" rel="nofollow">@monkeybat</a> and <a class="tweet-url username" href="" rel="nofollow">@bar</a> <a href="" rel="nofollow">…</a> <a href="" title="#comedy" class="tweet-url hashtag" rel="nofollow">#comedy</a> <a href="" title="#url" class="tweet-url hashtag" rel="nofollow">#url</a>'
    correct_auto_link_usernames_or_lists = u'<a class="tweet-url username" href="" rel="nofollow">@foo</a> said the funniest thing to <a class="tweet-url username" href="" rel="nofollow">@monkeybat</a> and <a class="tweet-url username" href="" rel="nofollow">@bar</a> #comedy #url'
    correct_auto_link_hashtags = u'@foo said the funniest thing to @monkeybat and @bar <a href="" title="#comedy" class="tweet-url hashtag" rel="nofollow">#comedy</a> <a href="" title="#url" class="tweet-url hashtag" rel="nofollow">#url</a>'
    correct_auto_link_urls_custom = u'@foo said the funniest thing to @monkeybat and @bar <a href="" rel="nofollow">…</a> #comedy #url'
    correct_auto_link_urls_custom_with_kwargs = u'@foo said the funniest thing to @monkeybat and @bar <a href="" class="boosh" rel="external" title="a link">…</a> #comedy #url'

    autolink = twitter_text.Autolink(text)

    # test the overall auto_link method
    test_autolink = tt.autolink.auto_link()
    if test_autolink == correct_auto_link_with_hit_highlight:
        print u'\033[92m  Attached auto_link passed\033[0m'
        passed += 1
        print u'\033[91m  Attached auto_link failed:\033[0m'
        print u'    Expected: %s' % correct_auto_link_with_hit_highlight
        print u'    Returned: %s' % test_autolink
        failed +=1
    tests +=1
Beispiel #12
def validation_tests(tests, passed, failed):
    print u'Running Validation tests'
    validation = twitter_text.Validation(text)
    if tt.validation.tweet_length() == len(text):
        print u'\033[92m  Attached tweet_length passed\033[0m'
        passed += 1
        print u'\033[91m  Attached tweet_length failed:\033[0m'
        print u'    Expected: %d' % len(text)
        print u'    Returned: %d' % tt.validation.tweet_length()
        failed += 1
    tests += 1

    if validation.tweet_length() == len(text):
        print u'\033[92m  Stand alone tweet_length passed\033[0m'
        passed += 1
        print u'\033[91m  Stand alone tweet_length failed:\033[0m'
        print u'    Expected: %d' % len(text)
        print u'    Returned: %d' % validation.tweet_length()
        failed += 1
    tests += 1

    if tt.validation.tweet_invalid() == (False, None):
        print u'\033[92m  Attached tweet_invalid passed\033[0m'
        passed += 1
        print u'\033[91m  Attached tweet_invalid failed:\033[0m'
        print u'    Expected: %s' % 'False, None'
        print u'    Returned: %s, %s' % tt.validation.tweet_invalid()
        failed += 1
    tests += 1

    if validation.tweet_invalid() == (False, None):
        print u'\033[92m  Stand alone tweet_invalid passed\033[0m'
        passed += 1
        print u'\033[91m  Stand alone tweet_invalid failed:\033[0m'
        print u'    Expected: %s' % 'False, None'
        print u'    Returned: %s, %s' % validation.tweet_invalid()
        failed += 1
    tests += 1
    print ''
    print u'Running Validation tests on bad text'
    bad_tweets = {
        'empty': u'',
        'too_long': text + text,
        'invalid_characters': text
    this_tt = twitter_text.TwitterText(bad_tweets['empty'])
    if this_tt.validation.tweet_invalid() == (True, 'Empty text'):
        print u'\033[92m  Empty Text tweet_invalid passed\033[0m'
        passed += 1
        print u'\033[91m  Empty Text tweet_invalid failed:\033[0m'
        print u'    Expected: %s' % 'True, Empty text'
        print u'    Returned: %s, %s' % validation.tweet_invalid()
        failed += 1
    tests += 1
    this_tt = twitter_text.TwitterText(bad_tweets['too_long'])
    if this_tt.validation.tweet_invalid() == (True, 'Too long'):
        print u'\033[92m  Too Long tweet_invalid passed\033[0m'
        passed += 1
        print u'\033[91m  Too Long tweet_invalid failed:\033[0m'
        print u'    Expected: %s' % 'True, Too long'
        print u'    Returned: %s, %s' % validation.tweet_invalid()
        failed += 1
    tests += 1
    missed_bad_characters = []
    for bad_character in validation.INVALID_CHARACTERS:
        this_text = force_unicode(bad_tweets['invalid_characters'] + bad_character)
        this_tt = twitter_text.TwitterText(this_text)
        if not this_tt.validation.tweet_invalid() == (True, 'Invalid characters'):
    if not len(missed_bad_characters):
        print u'\033[92m  Invalid Characters tweet_invalid passed\033[0m'
        passed += 1
        print u'\033[91m  Invalid Characters tweet_invalid failed:\033[0m'
        print u'    Expected: %s' % '[]'
        print u'    Returned: %s' % force_unicode(missed_bad_characters)
        failed += 1
    tests += 1
    return tests, passed, failed
Beispiel #13
def extractor_tests(tests, passed, failed):
    print u'Running Extractor tests'

    correct_mentioned_screen_names = [u'foo', u'monkeybat', u'bar']
    correct_mentioned_screen_names_with_indices = [{'indicies': (0, 4), 'screen_name': u'foo'}, {'indicies': (32, 42), 'screen_name': u'monkeybat'}, {'indicies': (47, 51), 'screen_name': u'bar'}]
    correct_reply_screen_name = 'foo'
    correct_urls = [u'']
    correct_urls_with_indices = [{'url': u'', 'indices': (52, 90)}]
    correct_hashtags = [u'comedy', u'url']
    correct_hashtags_with_indices = [{'indices': (91, 98), 'hashtag': u'comedy'}, {'indices': (99, 103), 'hashtag': u'url'}]

    extractor = twitter_text.Extractor(text)
    if tt.extractor.extract_mentioned_screen_names() == correct_mentioned_screen_names:
        print u'\033[92m  Attached extract_mentioned_screen_names passed\033[0m'
        passed += 1
        print u'\033[91m  Attached extract_mentioned_screen_names failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_mentioned_screen_names)
        print u'    Returned: %s' % force_unicode(tt.extractor.extract_mentioned_screen_names())
        failed +=1
    tests +=1

    if extractor.extract_mentioned_screen_names() == correct_mentioned_screen_names:
        print u'\033[92m  Stand alone extract_mentioned_screen_names passed\033[0m'
        passed += 1
        print u'\033[91m  Stand alone extract_mentioned_screen_names failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_mentioned_screen_names)
        print u'    Returned: %s' % force_unicode(extractor.extract_mentioned_screen_names())
        failed +=1
    tests +=1

    if tt.extractor.extract_mentioned_screen_names_with_indices() == correct_mentioned_screen_names_with_indices:
        print u'\033[92m  Attached extract_mentioned_screen_names_with_indices passed\033[0m'
        passed += 1
        print u'\033[91m  Attached extract_mentioned_screen_names_with_indices failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_mentioned_screen_names_with_indices)
        print u'    Returned: %s' % force_unicode(tt.extractor.extract_mentioned_screen_names_with_indices())
        failed += 1
    tests += 1

    if extractor.extract_mentioned_screen_names_with_indices() == correct_mentioned_screen_names_with_indices:
        print u'\033[92m  Stand alone extract_mentioned_screen_names_with_indices passed\033[0m'
        passed += 1
        print u'\033[91m  Stand alone extract_mentioned_screen_names_with_indices failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_mentioned_screen_names_with_indices)
        print u'    Returned: %s' % force_unicode(extractor.extract_mentioned_screen_names_with_indices())
        failed += 1
    tests += 1

    if tt.extractor.extract_reply_screen_name() == correct_reply_screen_name:
        print u'\033[92m  Attached extract_reply_screen_name passed\033[0m'
        passed += 1
        print u'\033[91m  Attached extract_reply_screen_name failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_reply_screen_name)
        print u'    Returned: %s' % force_unicode(tt.extractor.extract_reply_screen_name())
        failed +=1
    tests +=1

    if extractor.extract_reply_screen_name() == correct_reply_screen_name:
        print u'\033[92m  Stand alone extract_reply_screen_name passed\033[0m'
        passed += 1
        print u'\033[91m  Stand alone extract_reply_screen_name failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_reply_screen_name)
        print u'    Returned: %s' % force_unicode(extractor.extract_reply_screen_name())
        failed +=1
    tests +=1

    if tt.extractor.extract_urls() == correct_urls:
        print u'\033[92m  Attached extract_urls passed\033[0m'
        passed += 1
        print u'\033[91m  Attached extract_urls failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_urls)
        print u'    Returned: %s' % force_unicode(tt.extractor.extract_urls())
        failed +=1
    tests +=1

    if extractor.extract_urls() == correct_urls:
        print u'\033[92m  Stand alone extract_urls passed\033[0m'
        passed += 1
        print u'\033[91m  Stand alone extract_urls failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_urls)
        print u'    Returned: %s' % force_unicode(extractor.extract_urls())
        failed +=1
    tests +=1

    if tt.extractor.extract_urls_with_indices() == correct_urls_with_indices:
        print u'\033[92m  Attached extract_urls_with_indices passed\033[0m'
        passed += 1
        print u'\033[91m  Attached extract_urls_with_indices failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_urls_with_indices)
        print u'    Returned: %s' % force_unicode(tt.extractor.extract_urls_with_indices())
        failed += 1
    tests += 1

    if extractor.extract_urls_with_indices() == correct_urls_with_indices:
        print u'\033[92m  Stand alone extract_urls_with_indices passed\033[0m'
        passed += 1
        print u'\033[91m  Stand alone extract_urls_with_indices failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_urls_with_indices)
        print u'    Returned: %s' % force_unicode(extractor.extract_urls_with_indices())
        failed += 1
    tests += 1

    if tt.extractor.extract_hashtags() == correct_hashtags:
        print u'\033[92m  Attached extract_hashtags passed\033[0m'
        passed += 1
        print u'\033[91m  Attached extract_hashtags failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_hashtags)
        print u'    Returned: %s' % force_unicode(tt.extractor.extract_hashtags())
        failed +=1
    tests +=1

    if extractor.extract_hashtags() == correct_hashtags:
        print u'\033[92m  Stand alone extract_hashtags passed\033[0m'
        passed += 1
        print u'\033[91m  Stand alone extract_hashtags failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_hashtags)
        print u'    Returned: %s' % force_unicode(extractor.extract_hashtags())
        failed +=1
    tests +=1

    if tt.extractor.extract_hashtags_with_indices() == correct_hashtags_with_indices:
        print u'\033[92m  Attached extract_hashtags_with_indices passed\033[0m'
        passed += 1
        print u'\033[91m  Attached extract_hashtags_with_indices failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_hashtags_with_indices)
        print u'    Returned: %s' % force_unicode(tt.extractor.extract_hashtags_with_indices())
        failed += 1
    tests += 1
    if extractor.extract_hashtags_with_indices() == correct_hashtags_with_indices:
        print u'\033[92m  Stand alone extract_hashtags_with_indices passed\033[0m'
        passed += 1
        print u'\033[91m  Stand alone extract_hashtags_with_indices failed:\033[0m'
        print u'    Expected: %s' % force_unicode(correct_hashtags_with_indices)
        print u'    Returned: %s' % force_unicode(extractor.extract_hashtags_with_indices())
        failed += 1
    tests += 1
    return tests, passed, failed
Beispiel #14
# encoding=utf-8

import twitter_text
from twitter_text.unicode import force_unicode

text = force_unicode('@foo said the funniest thing to @monkeybat and @bar #comedy #url')
tt = twitter_text.TwitterText(text)

def autolink_tests(tests, passed, failed):
    print u'Running Autolink tests'

    correct_auto_link = u'<a class="tweet-url username" href="" rel="nofollow">@foo</a> said the funniest thing to <a class="tweet-url username" href="" rel="nofollow">@monkeybat</a> and <a class="tweet-url username" href="" rel="nofollow">@bar</a> <a href="" rel="nofollow">…</a> <a href="" title="#comedy" class="tweet-url hashtag" rel="nofollow">#comedy</a> <a href="" title="#url" class="tweet-url hashtag" rel="nofollow">#url</a>'
    correct_auto_link_with_hit_highlight = u'<a class="tweet-url username" href="" rel="nofollow">@foo</a> said the <em class="search-hit">funniest</em> thing to <a class="tweet-url username" href="" rel="nofollow">@monkeybat</a> and <a class="tweet-url username" href="" rel="nofollow">@bar</a> <a href="" rel="nofollow">…</a> <a href="" title="#comedy" class="tweet-url hashtag" rel="nofollow">#comedy</a> <a href="" title="#url" class="tweet-url hashtag" rel="nofollow">#url</a>'
    correct_auto_link_usernames_or_lists = u'<a class="tweet-url username" href="" rel="nofollow">@foo</a> said the funniest thing to <a class="tweet-url username" href="" rel="nofollow">@monkeybat</a> and <a class="tweet-url username" href="" rel="nofollow">@bar</a> #comedy #url'
    correct_auto_link_hashtags = u'@foo said the funniest thing to @monkeybat and @bar <a href="" title="#comedy" class="tweet-url hashtag" rel="nofollow">#comedy</a> <a href="" title="#url" class="tweet-url hashtag" rel="nofollow">#url</a>'
    correct_auto_link_urls_custom = u'@foo said the funniest thing to @monkeybat and @bar <a href="" rel="nofollow">…</a> #comedy #url'
    correct_auto_link_urls_custom_with_kwargs = u'@foo said the funniest thing to @monkeybat and @bar <a href="" class="boosh" rel="external nofollow" title="a link">…</a> #comedy #url'

    autolink = twitter_text.Autolink(text)

    # test the overall auto_link method
    test_autolink = tt.autolink.auto_link()
    if test_autolink == correct_auto_link_with_hit_highlight:
        print u'\033[92m  Attached auto_link passed\033[0m'
        passed += 1
        print u'\033[91m  Attached auto_link failed:\033[0m'
        print u'    Expected: %s' % correct_auto_link_with_hit_highlight
        print u'    Returned: %s' % test_autolink
        failed +=1
    tests +=1