Esempio n. 1
0
def parse_tweets(result_dict):
    results = result_dict['results']
    messages = []
    for result in results:
        # update timezone of created_at
        created_at_str = result['created_at'] + ' UTC' # +0000 = UTC
        created_at = datetime.strptime(created_at_str, '%a, %d %b %Y %H:%M:%S +0000 %Z')
        result.update({'created_at': created_at})
        # save into database
        try:
            tweet_json = {}
            tweet_json['text'] = result['text']
            tweet_json['created_at'] = result['created_at']
            tweet_json['id_str'] = result['id_str']
            tweet_json['profile_image_url'] = result['profile_image_url']
            tweet_json['from_user'] = result['from_user']
            if result['geo']:
                tweet_json['lat'] = result['geo']['coordinates'][0]
                tweet_json['long'] = result['geo']['coordinates'][1]
            tweet = Tweet(**tweet_json)
            tweet.save()
            # download photos in the tweet
            urls = find_url_in_tweet(tweet.text)
            image_urls = extract_urls_from_tweet(urls)
            photos = download_all(image_urls)
            for photo_name in photos:
                photo = Photo(name = photo_name, tweet = tweet)
                photo.save()
        except (IntegrityError, DatabaseError):
            pass # tweet already saved

        messages.append(result['text'])
    return messages
Esempio n. 2
0
 def test_download_all_returns_list_of_image_paths(self):
     image_urls = ['http://p.twimg.com/AZh5jT2CEAAXyR-.jpg:large',
                   'http://distillery.s3.amazonaws.com/media/2011/09/17/bf5bb54387ca4dd98573128bb02f5c0b_7.jpg',
                   'http://s3.amazonaws.com/twitpic/photos/full/400298248.jpg?AWSAccessKeyId=AKIAJF3XCCKACR3QDMOA&Expires=1317580915&Signature=MByJjnEyjvdQZnP40RjbrPHDUOM%3D'] 
     image_paths = download_all(image_urls)
     expected = [u'httpp.twimg.comAZh5jT2CEAAXyR-.jpglarge',
                 u'httpdistillery.s3.amazonaws.commedia20110917bf5bb54387ca4dd98573128bb02f5c0b_7.jpg',
                 u'https3.amazonaws.comtwitpicphotosfull400298248.jpg']
     self.assertEqual(expected, image_paths)