def image_posts_with_tags(blogname): ''' given the url for a blog, returns a list of all the Image posts that have tags. ''' api = Api(blogname) imgPosts = api.read(type='photo') return filter(lambda x: 'tags' in x, imgPosts)
def populate_models(self): ''' Import tumblr entries for the defined user in settings ''' tumblr_settings = { 'tumblr_user':settings.TUMBLR_USER_NAME, 'email':settings.TUMBLR_USER_EMAIL, 'password':settings.TUMBLR_USER_PASSWORD } if tumblr_settings['email'] != '' and tumblr_settings['password'] != '': self.log.info('email/pwd specified - attempting Authenticated read') tumblr_api = Api(name=tumblr_settings['tumblr_user'], email=tumblr_settings['email'], password=tumblr_settings['password']) tumbls = tumblr_api.authenticated_read(type=self.type) else: self.log.info('email/pwd *NOT* specified - attempting unauthenticated read') tumblr_api = Api(tumblr_settings['tumblr_user']) tumbls = tumblr_api.read(type=self.type) for tumbl in tumbls: self.log.debug(tumbl['type']) self.log.debug(tumbl['id']) self.log.debug(datetime.datetime.strptime(tumbl['date-gmt'], '%Y-%m-%d %H:%M:%S %Z')) if tumbl['type'] == 'regular': self.log.debug(tumbl['regular-title']) self.log.debug('--'*10) # use the class method to triage content-types TumbleItem.create_new_item(tumbl['type'], tumbl) self.log.info('import complete')
def force_update(self, request, tumblr_queryset): ''' Force an update of the tumblr item, including it's tags by deleting the item and creating a new one from the tumblrapi as data-source. function iterates over query_set and can handle multiple checked items. *note: Design decision was made to use delete then create, as opposed to an standard update because of tags. In order to do a pure update method this could be refactored to overload(set) the model data_fields with data from api and then call .clear() on the tag field and then manually add the tags again. Or reconcile tags manually using remove/add Could be performance implications between write/delete vs. update. Though all calls to foreign-key on tags will result in write/delete. Additionally, this causes the id field on all sub-content-modals to increment, could cause integration issues with those that want static urls based on django id. this DOESNT change the tumblr_id however. ''' ## There is only one user now tumblr_user_name = settings.TUMBLR_USER_NAME tumbl_api = Api(tumblr_user_name + ".tumblr.com") for qs_object in tumblr_queryset: update_id = qs_object.tumblr_id tumbl_item = tumbl_api.read(id=update_id) ## ensure we have the new tumblr item data, and any additional verification ## then delete the old to make room to create the new qs_object.delete() # use the class method to triage content-types TumbleItem.create_new_item(tumbl_item['type'], tumbl_item)
def read_blogs(blogs, download_dir): global config # Check if the download dir exists; if not create it if not os.path.exists(download_dir): os.mkdir(download_dir) # Process all given blogs for blog in blogs: # Check if the target dir exists; if not create it target_dir = os.path.join(download_dir, blog) if not os.path.exists(target_dir): os.mkdir(target_dir) print "Downloading images from " + blog + " to " + target_dir + "..." try: site_url = blog api = Api(site_url) posts = api.read(start=0, max=config.max_number_of_images) #posts = api.read(start=0) except: print "error" imageCounter = 1 for post in posts: try: url = post['photo-url-1280'] photo_caption = post['photo-caption'] slug = post['slug'] post_date_str = post['date-gmt'] except: print "error" image_name = url.rsplit('/', 1)[1] # Check if a file extension is given supported_file_types = ['jpg', 'jpeg', 'png', 'gif'] if not image_name[-3:] in supported_file_types: # Add an extension to the image name image_name = image_name + ".jpg" image_file_name = blog + "_-_" + image_name target_file_name = os.path.join(target_dir, image_file_name) # Check if file already exists if os.path.exists(target_file_name): print "Image already exists." imageCounter += 1 continue if imageCounter > config.max_number_of_images: break print "Downloading image Nr " + str(imageCounter) + ": \"" + image_file_name + "\" ..." download_image(url, target_file_name) imageCounter += 1
def blog(request): """ Blog page. Returns the embedded tumblr page. """ BLOG = 'jpglab.tumblr.com' api = Api(BLOG) post_list = api.read() return render_to_response('blog.html', RequestContext(request))
def testWrite(self): api = Api(BLOG, USER, PASSWORD) newpost = api.write_regular('title','body') post = api.read(newpost['id']) assert newpost['id'] == post['id'] newpost = api.write_link('http://www.google.com') post = api.read(newpost['id']) assert newpost['id'] == post['id'] newpost = api.write_quote('it was the best of times...') post = api.read(newpost['id']) assert newpost['id'] == post['id'] newpost = api.write_conversation('me: wow\nyou: double wow!') post = api.read(newpost['id']) assert newpost['id'] == post['id'] newpost = api.write_video('http://www.youtube.com/watch?v=60og9gwKh1o') post = api.read(newpost['id']) assert newpost['id'] == post['id'] newpost = api.write_photo('http://www.google.com/intl/en_ALL/images/logo.gif') post = api.read(newpost['id']) assert newpost['id'] == post['id']
def testWrite(self): api = Api(BLOG, USER, PASSWORD) newpost = api.write_regular('title', 'body') post = api.read(newpost['id']) assert newpost['id'] == post['id'] newpost = api.write_link('http://www.google.com') post = api.read(newpost['id']) assert newpost['id'] == post['id'] newpost = api.write_quote('it was the best of times...') post = api.read(newpost['id']) assert newpost['id'] == post['id'] newpost = api.write_conversation('me: wow\nyou: double wow!') post = api.read(newpost['id']) assert newpost['id'] == post['id'] newpost = api.write_video('http://www.youtube.com/watch?v=60og9gwKh1o') post = api.read(newpost['id']) assert newpost['id'] == post['id'] newpost = api.write_photo( 'http://www.google.com/intl/en_ALL/images/logo.gif') post = api.read(newpost['id']) assert newpost['id'] == post['id']
def handle(self, *args, **options): tumblr_model = get_model('djumblr', 'Tumblr') tumblr_name = 'diegueus9' tumblr_email = '*****@*****.**' print 'working...' tumblr_api = Api(tumblr_name, tumblr_email, getpass.getpass('Your tumblr password:')) t, created = tumblr_model.objects.get_or_create( name=tumblr_name, email=tumblr_email) tumblr_response = tumblr_api.read() for post in tumblr_response: _(post)
def taglist(username): api = Api(username) posts = api.read() alltags = {} for post in posts: tags = post.get('tags', []) for tag in tags: try: alltags[tag][0] += 1 except KeyError: alltags[tag] = [1, post.get('date-gmt', '')] taglist = [tuple(val+[tag]) for tag,val in alltags.items()] return sorted(taglist, reverse=True)
def testRead(self): api = Api(BLOG) freq = {} posts = api.read() total = 0 for post in posts: total += 1 type = post['type'] try: freq[type] += 1 except: freq[type] = 1 assert total > 0 for type in freq: assert self.countType(api,type) == freq[type]
def testRead(self): api = Api(BLOG) freq = {} posts = api.read() total = 0 for post in posts: total += 1 type = post['type'] try: freq[type] += 1 except: freq[type] = 1 assert total > 0 for type in freq: assert self.countType(api, type) == freq[type]
def update_backchannel(): """ Update data for the backchannel from Tumblr """ TUMBLR_FILENAME = 'www/tumblr.json' TUMBLR_BLOG_ID = 'nprbackchannel' TUMBLR_MAX_POSTS = 10 api = Api(TUMBLR_BLOG_ID) posts = list(api.read(max=TUMBLR_MAX_POSTS)) posts.reverse() with open(TUMBLR_FILENAME, 'w') as f: f.write(json.dumps(posts)) if 'settings' in env: conn = boto.connect_s3() bucket = conn.get_bucket(env.s3_bucket) key = Key(bucket) key.key = TUMBLR_FILENAME key.set_contents_from_filename( TUMBLR_FILENAME, policy='public-read', headers={ 'Cache-Control': 'max-age=5 no-cache no-store must-revalidate' }) if env.alt_s3_bucket: conn = boto.connect_s3() bucket = conn.get_bucket(env.alt_s3_bucket) key = Key(bucket) key.key = TUMBLR_FILENAME key.set_contents_from_filename( TUMBLR_FILENAME, policy='public-read', headers={ 'Cache-Control': 'max-age=5 no-cache no-store must-revalidate' })
def update_backchannel(): """ Update data for the backchannel from Tumblr """ TUMBLR_FILENAME = 'www/tumblr.json' TUMBLR_BLOG_ID = 'nprbackchannel' TUMBLR_MAX_POSTS = 10 api = Api(TUMBLR_BLOG_ID) posts = list(api.read(max=TUMBLR_MAX_POSTS)) posts.reverse() with open(TUMBLR_FILENAME, 'w') as f: f.write(json.dumps(posts)) if 'settings' in env: conn = boto.connect_s3() bucket = conn.get_bucket(env.s3_bucket) key = Key(bucket) key.key = TUMBLR_FILENAME key.set_contents_from_filename( TUMBLR_FILENAME, policy='public-read', headers={'Cache-Control': 'max-age=5 no-cache no-store must-revalidate'} ) if env.alt_s3_bucket: conn = boto.connect_s3() bucket = conn.get_bucket(env.alt_s3_bucket) key = Key(bucket) key.key = TUMBLR_FILENAME key.set_contents_from_filename( TUMBLR_FILENAME, policy='public-read', headers={'Cache-Control': 'max-age=5 no-cache no-store must-revalidate'} )
def testDelete(self): api = Api(BLOG, USER, PASSWORD) newpost = api.write_regular('title','body') post = api.read(newpost['id']) api.delete(post['id'])
#!/usr/bin/env python from tumblr import Api from pygooglechart import PieChart3D chart = PieChart3D(400, 200) api = Api('staff.tumblr.com') freq = {} posts = api.read() for post in posts: type = post['type'] try: freq[type] += 1 except: freq[type] = 1 chart.add_data(freq.values()) chart.set_pie_labels(freq.keys()) chart.set_title('staff.tumblr.com') chart.download('staff.png')
def populate_models(self, tumblr_user, user): """ Takes a tumblr username (string), and a User model. Populates the tumblr models with data from 'tumblr_user'.tumblr.com, and associates the entries with 'user'. """ tumbls = Api(tumblr_user + ".tumblr.com") for tumbl in tumbls.read(): # Common to all models tumblr_id = tumbl["id"] pub_date = datetime.datetime.strptime(tumbl["date-gmt"], "%Y-%m-%d %H:%M:%S %Z") format = tumbl["format"] tags = tumbl.get("tags", []) self.log.debug("%s (%s)" % (tumblr_id, tumbl["type"])) try: TumbleItem.objects.get(tumblr_id=tumblr_id) except TumbleItem.DoesNotExist: try: # 'Regular' objects. if tumbl["type"] == "regular": title = tumbl.get("regular-title", "") body = tumbl["regular-body"] m = Regular( tumblr_id=tumblr_id, pub_date=pub_date, user=user, format=format, title=title, body=body ) m.save() m.tags.add(*tags) # 'Photo' objects. elif tumbl["type"] == "photo": source = tumbl["photo-url-500"] caption = tumbl.get("photo-caption", "") m = Photo( tumblr_id=tumblr_id, pub_date=pub_date, user=user, format=format, source=source, caption=caption, ) m.save() m.tags.add(*tags) # 'Quote' objects. elif tumbl["type"] == "quote": quote_text = tumbl.get("quote-text", "") source = tumbl.get("quote-source", "") m = Quote( tumblr_id=tumblr_id, pub_date=pub_date, user=user, format=format, quote_text=quote_text, source=source, ) m.save() m.tags.add(*tags) # 'Link' objects. elif tumbl["type"] == "link": name = tumbl.get("link-text", "") url = tumbl["link-url"] description = tumbl.get("link-description", "") m = Link( tumblr_id=tumblr_id, pub_date=pub_date, user=user, format=format, name=name, url=url, description=description, ) m.save() m.tags.add(*tags) # 'Conversation' objects. elif tumbl["type"] == "conversation": title = tumbl.get("conversation-title", "") conversation_text = tumbl["conversation-text"] m = Conversation( tumblr_id=tumblr_id, pub_date=pub_date, user=user, format=format, title=title, conversation_text=conversation_text, ) m.save() m.tags.add(*tags) # 'Video' objects. elif tumbl["type"] == "video": embed = tumbl["video-player"] caption = tumbl.get("video-caption", "") m = Video( tumblr_id=tumblr_id, pub_date=pub_date, user=user, format=format, embed=embed, caption=caption, ) m.save() m.tags.add(*tags) # 'Audio' objects. elif tumbl["type"] == "audio": embed = tumbl["audio-player"] caption = tumbl.get("audio-caption", "") m = Audio( tumblr_id=tumblr_id, pub_date=pub_date, user=user, format=format, embed=embed, caption=caption, ) m.save() m.tags.add(*tags) # TODO: Raise error. else: self.log.error("Type does not exist: %s" % (tumbl["type"])) except Exception, e: self.log.exception(e)
def write_mr_president_json(): """ Writes the JSON for Dear Mr. President to www. """ # # First, handle stuff from the V1 API. This is fetching the posts by tag. # print "V1: Starting." TUMBLR_FILENAME = 'www/live-data/misterpresident.json' TUMBLR_MAX_POSTS = 10000 MAX_PER_CATEGORY = 100 api = Api(app_config.TUMBLR_BLOG_ID) print "V1: API call made." posts = list(api.read(max=TUMBLR_MAX_POSTS)) print "V1: Fetched %s posts." % len(posts) print "V1: Starting to render." output = { 'idrathernotsayhowivoted': [], 'ivotedforyou': [], 'ididntvoteforyou': [], 'ididntvote': [], 'mostpopular': [] } for post in posts: simple_post = { 'id': post['id'], 'url': post['url'], 'text': post['photo-caption'], 'photo_url': post['photo-url-100'], 'photo_url_250': post['photo-url-250'], 'photo_url_500': post['photo-url-500'], 'photo_url_1280': post['photo-url-1280'], 'timestamp': post['unix-timestamp'] } for tag in post['tags']: try: if len(output[tag]) <= MAX_PER_CATEGORY: output[tag].append(simple_post) except KeyError: pass print "V1: Rendering finished." # # Now, fetch the most popular posts using the V2 API. # print "V2: Starting." # Set constants base_url = 'http://api.tumblr.com/v2/blog/inauguration2013.tumblr.com/posts/photo' key_param = '?api_key=Cxp2JzyA03QxmQixf7Fee0oIYaFtBTTHKzRA0AveHlh094bwDH' limit_param = '&limit=20' limit = 20 new_limit = limit post_list = [] # Figure out the total number of posts. r = requests.get(base_url + key_param) total_count = int(json.loads(r.content)['response']['total_posts']) print "V2: %s total posts available." % total_count # Do the pagination math. pages_count = (total_count / limit) pages_remainder = (total_count % limit) if pages_remainder > 0: pages_count += 1 pages = range(0, pages_count) print "V2: %s pages required." % len(pages) # Start requesting pages. # Note: Maximum of 20 posts per page. print "V2: Requesting pages." for page in pages: # Update all of the pagination shenanigans. start_number = new_limit - limit end_number = new_limit if end_number > total_count: end_number = total_count new_limit = new_limit + limit page_param = '&offset=%s' % start_number page_url = base_url + key_param + limit_param + page_param # Actually fetch the page URL. r = requests.get(page_url) posts = json.loads(r.content) for post in posts['response']['posts']: try: note_count = post['note_count'] post_list.append(post) except KeyError: pass # Sort the results first. print "V2: Finished requesting pages." print "V2: Sorting list." post_list = sorted(post_list, key=lambda post: post['note_count'], reverse=True) # Render the sorted list, but slice to just 24 objects per bb. print "V2: Rendering posts from sorted list." for post in post_list[0:24]: default_photo_url = post['photos'][0]['original_size']['url'] simple_post = { 'id': post['id'], 'url': post['post_url'], 'text': post['caption'], 'timestamp': post['timestamp'], 'note_count': post['note_count'], 'photo_url': default_photo_url, 'photo_url_250': default_photo_url, 'photo_url_500': default_photo_url, 'photo_url_1280': default_photo_url } # Handle the new photo assignment. for photo in post['photos'][0]['alt_sizes']: if int(photo['width']) == 100: simple_post['photo-url-100'] = photo['url'] if int(photo['width']) == 250: simple_post['photo_url_250'] = photo['url'] if int(photo['width']) == 500: simple_post['photo_url_500'] = photo['url'] if int(photo['width']) == 1280: simple_post['photo_url_1280'] = photo['url'] output['mostpopular'].append(simple_post) # Ensure the proper sort on our output list. print "V2: Ordering output." output['mostpopular'] = sorted(output['mostpopular'], key=lambda post: post['note_count'], reverse=True) # Write the JSON file. print "All: Producing JSON file at %s." % TUMBLR_FILENAME json_output = json.dumps(output) with open(TUMBLR_FILENAME, 'w') as f: f.write(json_output) print "All: JSON file written." if app_config.DEPLOYMENT_TARGET: with gzip.open(TUMBLR_FILENAME + '.gz', 'wb') as f: f.write(json_output) for bucket in app_config.S3_BUCKETS: conn = boto.connect_s3() bucket = conn.get_bucket(bucket) key = boto.s3.key.Key(bucket) key.key = '%s/live-data/misterpresident.json' % app_config.DEPLOYED_NAME key.set_contents_from_filename( TUMBLR_FILENAME + '.gz', policy='public-read', headers={ 'Cache-Control': 'max-age=5 no-cache no-store must-revalidate', 'Content-Encoding': 'gzip' } ) os.remove(TUMBLR_FILENAME + '.gz')
def populate_models(tumblr_user, user): """ Takes a tumblr username (string), and a User model. Populates the tumblr models with data from 'tumblr_user'.tumblr.com, and associates the entries with 'user'. """ tumbls = Api(tumblr_user + ".tumblr.com") for tumbl in tumbls.read(): # Common to all models id = tumbl["id"] pub_date = datetime.datetime.strptime(tumbl["date-gmt"], "%Y-%m-%d %H:%M:%S %Z") # 'Regular' objects. if tumbl["type"] == "regular": if tumbl["regular-title"]: title = tumbl["regular-title"] else: title = "" body = tumbl["regular-body"] m = Regular(id=id, pub_date=pub_date, user=user, title=title, body=body) # 'Photo' objects. elif tumbl["type"] == "photo": source = tumbl["photo-url-250"] if tumbl["photo-caption"]: caption = tumbl["photo-caption"] else: caption = "" m = Photo(id=id, pub_date=pub_date, user=user, source=source, caption=caption) # 'Quote' objects. elif tumbl["type"] == "quote": quote = tumbl["quote-text"] if tumbl["quote-source"]: source = tumbl["quote-source"] else: source = "" m = Quote(id=id, pub_date=pub_date, user=user, quote=quote, source=source) # 'Link' objects. elif tumbl["type"] == "link": if tumbl["link-text"]: name = tumbl["link-text"] else: name = "" url = tumbl["link-url"] if tumbl["link-description"]: description = tumbl["link-description"] else: description = "" m = Link(id=id, pub_date=pub_date, user=user, name=name, url=url, description=description) # 'Conversation' objects. elif tumbl["type"] == "conversation": if tumbl["conversation-title"]: title = tumbl["conversation-title"] else: title = "" m = Conversation( id=id, pub_date=pub_date, user=user, title=title, conversation_text=tumbl["conversation-text"] ) m.save() # 'Video' objects. elif tumbl["type"] == "video": embed = tumbl["video-player"] if tumbl["video-caption"]: caption = tumbl["video-caption"] else: caption = "" m = Video(id=id, pub_date=pub_date, user=user, embed=embed, caption=caption) # 'Audio' objects. elif tumbl["type"] == "audio": embed = tumbl["audio-player"] if tumbl["audio-caption"]: caption = tumbl["audio-caption"] else: caption = "" m = Audio(id=id, pub_date=pub_date, user=user, embed=embed, caption=caption) # TODO: Raise error. else: print "ERROR!", tumbl return "" m.save()
def populate_models(tumblr_user, user): ''' Takes a tumblr username (string), and a User model. Populates the tumblr models with data from 'tumblr_user'.tumblr.com, and associates the entries with 'user'. ''' tumbls = Api(tumblr_user + ".tumblr.com") for tumbl in tumbls.read(): # Common to all models id = tumbl['id'] pub_date = datetime.datetime.strptime(tumbl['date-gmt'], '%Y-%m-%d %H:%M:%S %Z') # 'Regular' objects. if tumbl['type'] == "regular": if tumbl['regular-title']: title = tumbl['regular-title'] else: title = "" body = tumbl['regular-body'] m = Regular(id=id, pub_date=pub_date, user=user, title=title, body=body) # 'Photo' objects. elif tumbl['type'] == "photo": source = tumbl['photo-url-250'] if tumbl['photo-caption']: caption = tumbl['photo-caption'] else: caption = "" m = Photo(id=id, pub_date=pub_date, user=user, source=source, caption=caption) # 'Quote' objects. elif tumbl['type'] == "quote": quote = tumbl['quote-text'] if tumbl['quote-source']: source = tumbl['quote-source'] else: source = "" m = Quote(id=id, pub_date=pub_date, user=user, quote=quote, source=source) # 'Link' objects. elif tumbl['type'] == "link": if tumbl['link-text']: name = tumbl['link-text'] else: name = "" url = tumbl['link-url'] if tumbl['link-description']: description = tumbl['link-description'] else: description = "" m = Link(id=id, pub_date=pub_date, user=user, name=name, url=url, description=description) # 'Conversation' objects. elif tumbl['type'] == "conversation": if tumbl['conversation-title']: title = tumbl['conversation-title'] else: title = "" m = Conversation(id=id, pub_date=pub_date, user=user, title=title, conversation_text=tumbl['conversation-text']) m.save() # 'Video' objects. elif tumbl['type'] == "video": embed = tumbl['video-player'] if tumbl['video-caption']: caption = tumbl['video-caption'] else: caption = "" m = Video(id=id, pub_date=pub_date, user=user, embed=embed, caption=caption) # 'Audio' objects. elif tumbl['type'] == "audio": embed = tumbl['audio-player'] if tumbl['audio-caption']: caption = tumbl['audio-caption'] else: caption = "" m = Audio(id=id, pub_date=pub_date, user=user, embed=embed, caption=caption) # TODO: Raise error. else: print "ERROR!", tumbl return '' m.save()