def parse_tweets(result_dict): results = result_dict['results'] messages = [] for result in results: # update timezone of created_at created_at_str = result['created_at'] + ' UTC' # +0000 = UTC created_at = datetime.strptime(created_at_str, '%a, %d %b %Y %H:%M:%S +0000 %Z') result.update({'created_at': created_at}) # save into database try: tweet_json = {} tweet_json['text'] = result['text'] tweet_json['created_at'] = result['created_at'] tweet_json['id_str'] = result['id_str'] tweet_json['profile_image_url'] = result['profile_image_url'] tweet_json['from_user'] = result['from_user'] if result['geo']: tweet_json['lat'] = result['geo']['coordinates'][0] tweet_json['long'] = result['geo']['coordinates'][1] tweet = Tweet(**tweet_json) tweet.save() # download photos in the tweet urls = find_url_in_tweet(tweet.text) image_urls = extract_urls_from_tweet(urls) photos = download_all(image_urls) for photo_name in photos: photo = Photo(name = photo_name, tweet = tweet) photo.save() except (IntegrityError, DatabaseError): pass # tweet already saved messages.append(result['text']) return messages
def test_download_all_returns_list_of_image_paths(self): image_urls = ['http://p.twimg.com/AZh5jT2CEAAXyR-.jpg:large', 'http://distillery.s3.amazonaws.com/media/2011/09/17/bf5bb54387ca4dd98573128bb02f5c0b_7.jpg', 'http://s3.amazonaws.com/twitpic/photos/full/400298248.jpg?AWSAccessKeyId=AKIAJF3XCCKACR3QDMOA&Expires=1317580915&Signature=MByJjnEyjvdQZnP40RjbrPHDUOM%3D'] image_paths = download_all(image_urls) expected = [u'httpp.twimg.comAZh5jT2CEAAXyR-.jpglarge', u'httpdistillery.s3.amazonaws.commedia20110917bf5bb54387ca4dd98573128bb02f5c0b_7.jpg', u'https3.amazonaws.comtwitpicphotosfull400298248.jpg'] self.assertEqual(expected, image_paths)