def extract_lengths(tweets_file, n): """Extract the lengths of the tweets in a tweet dataset Parameters: tweets_file - Dataset filename n - Maximum number of tweets to load from the dataset Note that fewer tweets may be returned if n is larger than the number of tweets in the dataset. Returns: List of integers (lengths) """ l = [] for tweet in workshop.tweets(tweets_file, n): l.append(len(tweet["text"])) return l
def find_date_range(filename): min_date = datetime.now() max_date = datetime(1, 1, 1, 0, 0, 0) n = 0 for tweet in workshop.tweets(filename): d = datetime.strptime(tweet["created_at"],"%a %b %d %H:%M:%S +0000 %Y") if d > max_date: max_date = d if d < min_date: min_date = d n += 1 return n, min_date, max_date
#!/usr/bin/python # # hack@uchicago Introduction to Python Workshop # Borja Sotomayor, 2013-2014 """ Prints some information for each tweet in the dataset """ import workshop import sys tfile = sys.argv[1] n = int(sys.argv[2]) for tweet in workshop.tweets(tfile, n): print "User:"******"user"]["screen_name"] print "Text:", tweet["text"] print "Date:", tweet["created_at"] print "Retweets:", tweet["retweet_count"] print "Hashtags:", tweet["entities"]["hashtags"] print
def extract_lengths(tweets_file, n): l = [] for tweet in workshop.tweets(tweets_file, n): l.append(len(tweet["text"])) return l
def extract_values(tweets_file, n, field): l = [] for tweet in workshop.tweets(tweets_file, n): l.append(tweet[field]) return l