Esempio n. 1
0
def connect(feed="http://stream.twitter.com/1/statuses/filter.json", track='apple', username=None, password=None, max=5, encoding='utf-8'):
    values = dict(track=track)
    data = urllib.urlencode(values)
    request = urllib2.Request(feed, data)
    base64string = base64.encodestring('%s:%s' % (username, password))[:-1]
    request.add_header("Authorization", "Basic %s" % base64string)
    s = urllib2.urlopen(request)
    tt = TweetTokenizer()
    tp = TweetParser() 
    tagger = TweetTagger()
    count = 0
    for line in iter(s.readline, None):
        count += 1
        print '%s of %s' % (count, max)
        try:
            tweet = simplejson.loads(line)
        except:
            print 'JSON load failed on line===>%s ' % line
            continue
        text = tweet.get('text')
        print text.encode(encoding, 'replace')
        tokens = tt.tokenize(text)
        parsed = tp.parse(text, debug=False)
        
        tags = tagger.tag(text, debug=False)
        print tokens
        print parsed
        print tags
        if count >= max:
            break
Esempio n. 2
0
    def tag(cls, tweet, texttagger=BasicTagger, debug=False):
        """Class method to tag a tweet or other text."""

        parsed_tweet = TweetParser.parse(tweet, debug=debug)
        text = parsed_tweet.get('text')
        tags = texttagger.tag(text)

        # Now add the hashtags from the parsing...
        hashtags = parsed_tweet.get('hashtags', [])
        # Strip off the '#'...
        hashtags = [h[1:] for h in hashtags]
        for tag in hashtags:
            if tag in tags: 
                continue
            tags.append(tag)
        return tags