def object_from_photo_url(url, width, height): try: return Object.objects.get(service='', foreign_id=url) except Object.DoesNotExist: pass log.debug("Treating %s as a photo URL and making an image object from it", url) image = Media( image_url=url, width=width, height=height, ) image.save() obj = Object( service='', foreign_id=url, render_mode='image', title='', image=image, author=None, time=datetime.utcnow(), permalink_url=url, ) obj.save() return obj
def object_from_video_data(videodata): video_id = videodata['id'] try: return Object.objects.get(service='vimeo.com', foreign_id=video_id) except Object.DoesNotExist: pass author = account_for_vimeo_id(videodata['owner']['id']) permalink_url = [urldata['_content'] for urldata in videodata['urls']['url'] if urldata['type'] == 'video'][0] width, height = [int(videodata[key]) for key in ('width', 'height')] if width > 660: height = 660 * height / width width = 660 body = ("""<iframe src="http://player.vimeo.com/video/%s" width="%d" height="%d"></iframe>""" % (video_id, width, height)) obj = Object( service='vimeo.com', foreign_id=video_id, render_mode='mixed', title=videodata['title'], body=body, time=datetime.strptime(videodata['upload_date'], '%Y-%m-%d %H:%M:%S'), permalink_url=permalink_url, author=author, ) obj.save() return obj
def make_object_from_photo_data(photodata): log.debug("Creating new object for %s's Flickr photo #%s", photodata['owner'], photodata['id']) # We aren't supposed to be able to ask for the dimensions, but we can, so use 'em. try: height, width = [int(photodata[key]) for key in ('o_height', 'o_width')] except KeyError: # Didn't get those, so we need to get the biggest size we can see. photosizes = call_flickr('flickr.photos.getSizes', photo_id=photodata['id']) largest = max(photosizes['sizes']['size'], key=lambda x: int(x['width']) * int(x['height'])) height, width = [int(largest[key]) for key in ('height', 'width')] photourl = largest['source'] else: photourl = photo_url_for_photo(photodata) if height > width: width = int(1024 * width / height) height = 1024 else: height = int(1024 * height / width) width = 1024 image = Media( image_url=photourl, width=width, height=height, ) image.save() try: owner_nsid = photodata['owner']['nsid'] except TypeError: owner_nsid = photodata['owner'] try: phototitle = photodata['title']['_content'] except TypeError: phototitle = photodata['title'] timestr = photodata.get('dateupload', photodata.get('dateuploaded')) if timestr is None: raise ValueError("Couldn't find an upload date (neither dateupload nor dateuploaded) in photodata %r" % photodata) obj = Object( service='flickr.com', foreign_id=photodata['id'], render_mode='image', title=phototitle, #body=, public=True if photodata.get('ispublic') else False, image=image, time=datetime.utcfromtimestamp(int(timestr)), permalink_url='http://www.flickr.com/photos/%s/%s/' % (owner_nsid, photodata['id']), author=account_for_flickr_id(owner_nsid), ) obj.save() return obj
def object_from_twitpic_url(url): mo = re.match(r'http://twitpic\.com/(\w+)', url) twitpic_id = mo.group(1) try: return Object.objects.get(service='twitpic.com', foreign_id=twitpic_id) except Object.DoesNotExist: pass h = httplib2.Http() resp, content = h.request( 'http://api.twitpic.com/2/media/show.json?id=%s' % twitpic_id) try: picdata = json.loads(content) except ValueError: # Couldn't get twitpic infos... probably because we're banned. return None if picdata.get('errors'): # Hmm, well, guess that didn't work. return None userdata = picdata['user'] # ugh, why did they rename these userdata['id'] = userdata['twitter_id'] userdata['screen_name'] = userdata['username'] userdata['profile_image_url'] = userdata['avatar_url'] pic = Media( image_url='http://twitpic.com/show/large/%s' % twitpic_id, width=int(picdata['width']), height=int(picdata['height']), ) pic.save() obj = Object( service='twitpic.com', foreign_id=twitpic_id, render_mode='image', title=picdata['message'], image=pic, author=account_for_twitter_user(userdata), time=datetime.strptime(picdata['timestamp'], '%Y-%m-%d %H:%M:%S'), permalink_url=url, ) obj.save() return obj
def object_from_twitpic_url(url): mo = re.match(r'http://twitpic\.com/(\w+)', url) twitpic_id = mo.group(1) try: return Object.objects.get(service='twitpic.com', foreign_id=twitpic_id) except Object.DoesNotExist: pass h = httplib2.Http() resp, content = h.request('http://api.twitpic.com/2/media/show.json?id=%s' % twitpic_id) try: picdata = json.loads(content) except ValueError: # Couldn't get twitpic infos... probably because we're banned. return None if picdata.get('errors'): # Hmm, well, guess that didn't work. return None userdata = picdata['user'] # ugh, why did they rename these userdata['id'] = userdata['twitter_id'] userdata['screen_name'] = userdata['username'] userdata['profile_image_url'] = userdata['avatar_url'] pic = Media( image_url='http://twitpic.com/show/large/%s' % twitpic_id, width=int(picdata['width']), height=int(picdata['height']), ) pic.save() obj = Object( service='twitpic.com', foreign_id=twitpic_id, render_mode='image', title=picdata['message'], image=pic, author=account_for_twitter_user(userdata), time=datetime.strptime(picdata['timestamp'], '%Y-%m-%d %H:%M:%S'), permalink_url=url, ) obj.save() return obj
def object_for_typepad_object(tp_obj): try: obj = Object.objects.get(service='typepad.com', foreign_id=tp_obj.url_id) except Object.DoesNotExist: pass else: log.debug("Reusing typepad object %r for asset %s", obj, tp_obj.url_id) return False, obj log.debug("Making new object for TypePad post %s by %s", tp_obj.url_id, tp_obj.author.display_name) author = account_for_typepad_user(tp_obj.author) body = tp_obj.rendered_content if not body and tp_obj.content: if tp_obj.text_format == 'html_convert_linebreaks': body = '\n\n'.join(u'<p>%s</p>' % t for t in tp_obj.content.split('\n\n')) else: body = tp_obj.content if body: body, errors = tidy_fragment(body) else: body = '' obj = Object( service='typepad.com', foreign_id=tp_obj.url_id, render_mode='mixed', title=tp_obj.title, body=body, time=tp_obj.published, permalink_url=tp_obj.permalink_url, author=author, ) if getattr(tp_obj, 'in_reply_to', None) is not None: # This post is in reply, so we don't care if our referent was # really a share. Be transitively in reply to the shared obj. really_a_share, obj.in_reply_to = object_for_typepad_object( tp_obj.in_reply_to) elif getattr(tp_obj, 'reblog_of', None) is not None: # Assets are public so it's okay if we use an anonymous typd here. t = typd.TypePad(endpoint='http://api.typepad.com/') reblog_of = t.assets.get(tp_obj.reblog_of.url_id) really_a_share, obj.in_reply_to = object_for_typepad_object(reblog_of) remove_reblog_boilerplate_from_obj(obj) if not obj.body: return True, obj.in_reply_to elif getattr(tp_obj, 'reblog_of_url', None) is not None: reblog_url = tp_obj.reblog_of_url try: in_reply_to = leapfrog.poll.embedlam.object_for_url(reblog_url) except leapfrog.poll.embedlam.RequestError, exc: in_reply_to = None except ValueError, exc: in_reply_to = None log.error("Error making object from referent %s of %s's post %s", reblog_url, author.display_name, tp_obj.url_id) log.exception(exc)
# URL (if it's an autolinked domain name, we'd break how the # tweet reads). tweet_text = tweetdata['text'] start, end = urldata['indices'] if tweet_text[start:end] == urldata['url']: # Mark links we change the text of as aboutlinks. urldata['text'] = url_page.title urldata['class'] = 'aboutlink' tweet = Object( service='twitter.com', foreign_id=str(tweetdata['id']), render_mode='status', body=tweet_html(tweetdata), time=datetime.strptime(tweetdata['created_at'], '%a %b %d %H:%M:%S +0000 %Y'), public=not tweetdata['user']['protected'], permalink_url='http://twitter.com/%s/status/%d' % (tweetdata['user']['screen_name'], tweetdata['id']), author=account_for_twitter_user(tweetdata['user']), in_reply_to=in_reply_to, ) tweet.save() return False, tweet def poll_twitter(account): user = account.person.user if user is None: return
def object_from_postdata(postdata): tumblr_id = postdata['id'] try: return False, Object.objects.get(service='tumblr.com', foreign_id=tumblr_id) except Object.DoesNotExist: pass obj = Object( service='tumblr.com', foreign_id=tumblr_id, permalink_url=postdata['post_url'], title='', body='', render_mode='mixed', time=datetime.strptime(postdata['date'], '%Y-%m-%d %H:%M:%S GMT'), author=account_for_tumblr_shortname(postdata['blog_name']), ) post_type = postdata['type'] if post_type == 'regular': obj.title = postdata.get('title', '') obj.body = postdata.get('body', '') elif post_type == 'video': player = max((player for player in postdata['player'] if player['width'] <= 700), key=lambda pl: pl['width']) body = player['embed_code'] caption = postdata.get('caption', None) if caption: body = '\n\n'.join((body, caption)) obj.body = body elif post_type == 'audio': obj.title = postdata.get('track_name', '') artist = postdata.get('artist', '') if artist and obj.title: obj.title = u'%s \u2013 %s' % (artist, obj.title) elif artist: obj.title = artist body = postdata.get('player', '') album_art = postdata.get('album_art', '') if album_art: body = u'\n\n'.join((u'<p><img src="%s"></p>' % album_art, body)) caption = postdata.get('caption', '') if caption: body = u'\n\n'.join((body, caption)) obj.body = body elif post_type == 'photo' and len(postdata['photos']) > 1: # photoset photobodies = list() for photo in postdata['photos']: photosize = max((size for size in photo['alt_sizes'] if size['width'] <= 700), key=lambda sz: sz['width']) body = u'<p><img src="%(url)s" width="%(width)s" height="%(height)s"></p>' % photosize photobodies.append(body) caption = photo.get('caption', '') if caption: photobodies.append(u'<p>%s</p>' % photo['caption']) caption = postdata.get('caption', '') if caption: photobodies.append(caption) obj.body = u'\n\n'.join(photobodies) elif post_type == 'photo': # single photo photo = postdata['photos'][0] photosize = max((size for size in photo['alt_sizes'] if size['width'] <= 700), key=lambda sz: sz['width']) image = Media( image_url=photosize['url'], width=photosize['width'], height=photosize['height'], ) image.save() obj.image = image obj.render_mode = 'image' obj.body = postdata.get('caption', '') elif post_type == 'link': # TODO: display the link if we can't make an in_reply_to object. # handle the Page manually to always provide an in_reply_to? # should this just be a render_mode=link object itself instead # of a reply? link_url = postdata['url'] try: in_reply_to_page = leapfrog.poll.embedlam.Page(link_url) except ValueError: pass else: try: in_reply_to = in_reply_to_page.to_object() except ValueError: in_reply_to = None if in_reply_to is None: in_reply_to = Object( service='', foreign_id=in_reply_to_page.url, render_mode='link', title=in_reply_to_page.title, permalink_url=in_reply_to_page.url, time=datetime.utcnow(), ) in_reply_to.save() obj.in_reply_to = in_reply_to obj.title = postdata.get('title', link_url) desc = postdata.get('description', '') if desc: obj.body = desc # If we added no description, make this a share instead. elif obj.in_reply_to: return True, obj.in_reply_to elif post_type == 'quote': quote_text = postdata.get('quote', '') body = u"""<blockquote><p>%s</p></blockquote>""" % (quote_text,) quote_source = postdata.get('source', '') if quote_source: body = u'\n\n'.join((body, u"<p>\u2014%s</p>" % quote_source)) obj.body = body # TODO: handle chat posts (i guess) else: log.debug("Unhandled Tumblr post type %r for post #%s; skipping", post_type, tumblr_id) return None, None # TODO: make reblogs into replies obj.save() return False, obj
def object_from_post_element(post_el, tumblelog_el): tumblr_id = post_el.attrib['id'] try: return False, Object.objects.get(service='tumblr.com', foreign_id=tumblr_id) except Object.DoesNotExist: pass obj = Object( service='tumblr.com', foreign_id=tumblr_id, permalink_url=post_el.attrib['url-with-slug'], title='', body='', render_mode='mixed', time=datetime.strptime(post_el.attrib['date-gmt'], '%Y-%m-%d %H:%M:%S GMT'), author=account_for_tumblelog_element(tumblelog_el), ) post_type = post_el.attrib['type'] if post_type == 'regular': title_el = post_el.find('./regular-title') if title_el is not None: obj.title = title_el.text body_el = post_el.find('./regular-body') if body_el is not None: obj.body = body_el.text elif post_type == 'video': body = post_el.find('./video-player').text video_caption_el = post_el.find('./video-caption') if video_caption_el is not None: video_caption = video_caption_el.text body = '\n\n'.join((body, video_caption)) obj.body = body elif post_type == 'audio': title_el = post_el.find('./id3-title') if title_el is not None: obj.title = title_el.text artist_el = post_el.find('./id3-artist') if artist_el is not None: obj.title = u'%s \u2013 %s' % (artist_el.text, obj.title) body = post_el.find('./audio-player').text audio_art_el = post_el.find('./id3-album-art') if audio_art_el is not None: audio_art_url = audio_art_el.text body = u'\n\n'.join((u'<p><img src="%s"></p>' % audio_art_url, body)) audio_caption_el = post_el.find('./audio-caption') if audio_caption_el is not None: audio_caption = audio_caption_el.text body = u'\n\n'.join((body, audio_caption)) obj.body = body elif post_type == 'photo': # TODO: if there's a photo-link-url, is this really a "photo reply"? photo_el = sorted(post_el.findall('./photo-url'), key=lambda x: int(x.attrib['max-width']), reverse=True)[0] photo_el_width = int(photo_el.attrib['max-width']) try: width, height = post_el.attrib['width'], post_el.attrib['height'] except KeyError: width, height = None, None else: width, height = int(width), int(height) if width > photo_el_width: height = photo_el_width * height / width width = photo_el_width image = Media( image_url=photo_el.text, width=width, height=height, ) image.save() obj.image = image obj.render_mode = 'image' caption_el = post_el.find('./photo-caption') if caption_el is not None: obj.body = caption_el.text elif post_type == 'link': # TODO: display the link if we can't make an in_reply_to object. # handle the Page manually to always provide an in_reply_to? # should this just be a render_mode=link object itself instead # of a reply? link_url = post_el.find('./link-url').text try: in_reply_to_page = leapfrog.poll.embedlam.Page(link_url) except ValueError: pass else: try: in_reply_to = in_reply_to_page.to_object() except ValueError: in_reply_to = None if in_reply_to is None: in_reply_to = Object( service='', foreign_id=in_reply_to_page.url, render_mode='link', title=in_reply_to_page.title, permalink_url=in_reply_to_page.url, time=datetime.utcnow(), ) in_reply_to.save() obj.in_reply_to = in_reply_to title_el = post_el.find('./link-text') obj.title = link_url if title_el is None else title_el.text desc_el = post_el.find('./link-description') if desc_el is not None: obj.body = desc_el.text # If we added no description, make this a share. if obj.in_reply_to and not obj.body: return True, obj.in_reply_to elif post_type == 'quote': quote_text = post_el.find('./quote-text').text body = u"""<blockquote><p>%s</p></blockquote>""" % (quote_text,) quote_source_el = post_el.find('./quote-source') if quote_source_el is not None: quote_source = quote_source_el.text body = u'\n\n'.join((body, u"<p>\u2014%s</p>" % quote_source)) obj.body = body # TODO: handle chat posts (i guess) else: log.debug("Unhandled Tumblr post type %r for post #%s; skipping", post_type, tumblr_id) return None, None try: orig_url = post_el.attrib['reblogged-root-url'] except KeyError: log.debug("Post #%s is not a reblog, leave it alone", tumblr_id) else: log.debug("Post #%s is a reblog of %s; let's try walking up", tumblr_id, orig_url) really_a_share, orig_obj = False, None try: really_a_share, orig_obj = object_from_url(orig_url) except ValueError, exc: # meh log.debug("Couldn't walk up to reblog reference %s: %s", orig_url, str(exc)) if not really_a_share and orig_obj is not None: # Patch up the upstream author's userpic if necessary, since we # don't get those from /api/read, evidently. if orig_obj.author.person.avatar is None and 'reblogged-root-avatar-url-64' in post_el.attrib: avatar = Media( image_url=post_el.attrib['reblogged-root-avatar-url-64'], width=64, height=64, ) avatar.save() orig_obj.author.person.avatar = avatar orig_obj.author.person.save() log.debug("Fixed up post #%s's author's avatar to %s", orig_obj.foreign_id, avatar.image_url) remove_reblog_boilerplate_from_obj(obj, orig_obj) if not obj.body: return True, orig_obj obj.in_reply_to = orig_obj
def object_from_post_element(post_el, tumblelog_el): tumblr_id = post_el.attrib['id'] try: return False, Object.objects.get(service='tumblr.com', foreign_id=tumblr_id) except Object.DoesNotExist: pass obj = Object( service='tumblr.com', foreign_id=tumblr_id, permalink_url=post_el.attrib['url-with-slug'], title='', body='', render_mode='mixed', time=datetime.strptime(post_el.attrib['date-gmt'], '%Y-%m-%d %H:%M:%S GMT'), author=account_for_tumblelog_element(tumblelog_el), ) post_type = post_el.attrib['type'] if post_type == 'regular': title_el = post_el.find('./regular-title') if title_el is not None: obj.title = title_el.text body_el = post_el.find('./regular-body') if body_el is not None: obj.body = body_el.text elif post_type == 'video': body = post_el.find('./video-player').text video_caption_el = post_el.find('./video-caption') if video_caption_el is not None: video_caption = video_caption_el.text body = '\n\n'.join((body, video_caption)) obj.body = body elif post_type == 'audio': title_el = post_el.find('./id3-title') if title_el is not None: obj.title = title_el.text artist_el = post_el.find('./id3-artist') if artist_el is not None: obj.title = u'%s \u2013 %s' % (artist_el.text, obj.title) body = post_el.find('./audio-player').text audio_art_el = post_el.find('./id3-album-art') if audio_art_el is not None: audio_art_url = audio_art_el.text body = u'\n\n'.join( (u'<p><img src="%s"></p>' % audio_art_url, body)) audio_caption_el = post_el.find('./audio-caption') if audio_caption_el is not None: audio_caption = audio_caption_el.text body = u'\n\n'.join((body, audio_caption)) obj.body = body elif post_type == 'photo': # TODO: if there's a photo-link-url, is this really a "photo reply"? photo_el = sorted(post_el.findall('./photo-url'), key=lambda x: int(x.attrib['max-width']), reverse=True)[0] photo_el_width = int(photo_el.attrib['max-width']) try: width, height = post_el.attrib['width'], post_el.attrib['height'] except KeyError: width, height = None, None else: width, height = int(width), int(height) if width > photo_el_width: height = photo_el_width * height / width width = photo_el_width image = Media( image_url=photo_el.text, width=width, height=height, ) image.save() obj.image = image obj.render_mode = 'image' caption_el = post_el.find('./photo-caption') if caption_el is not None: obj.body = caption_el.text elif post_type == 'link': # TODO: display the link if we can't make an in_reply_to object. # handle the Page manually to always provide an in_reply_to? # should this just be a render_mode=link object itself instead # of a reply? link_url = post_el.find('./link-url').text try: in_reply_to_page = leapfrog.poll.embedlam.Page(link_url) except ValueError: pass else: try: in_reply_to = in_reply_to_page.to_object() except ValueError: in_reply_to = None if in_reply_to is None: in_reply_to = Object( service='', foreign_id=in_reply_to_page.url, render_mode='link', title=in_reply_to_page.title, permalink_url=in_reply_to_page.url, time=datetime.utcnow(), ) in_reply_to.save() obj.in_reply_to = in_reply_to title_el = post_el.find('./link-text') obj.title = link_url if title_el is None else title_el.text desc_el = post_el.find('./link-description') if desc_el is not None: obj.body = desc_el.text # If we added no description, make this a share. if obj.in_reply_to and not obj.body: return True, obj.in_reply_to elif post_type == 'quote': quote_text = post_el.find('./quote-text').text body = u"""<blockquote><p>%s</p></blockquote>""" % (quote_text, ) quote_source_el = post_el.find('./quote-source') if quote_source_el is not None: quote_source = quote_source_el.text body = u'\n\n'.join((body, u"<p>\u2014%s</p>" % quote_source)) obj.body = body # TODO: handle chat posts (i guess) else: log.debug("Unhandled Tumblr post type %r for post #%s; skipping", post_type, tumblr_id) return None, None try: orig_url = post_el.attrib['reblogged-root-url'] except KeyError: log.debug("Post #%s is not a reblog, leave it alone", tumblr_id) else: log.debug("Post #%s is a reblog of %s; let's try walking up", tumblr_id, orig_url) really_a_share, orig_obj = False, None try: really_a_share, orig_obj = object_from_url(orig_url) except ValueError, exc: # meh log.debug("Couldn't walk up to reblog reference %s: %s", orig_url, str(exc)) if not really_a_share and orig_obj is not None: # Patch up the upstream author's userpic if necessary, since we # don't get those from /api/read, evidently. if orig_obj.author.person.avatar is None and 'reblogged-root-avatar-url-64' in post_el.attrib: avatar = Media( image_url=post_el.attrib['reblogged-root-avatar-url-64'], width=64, height=64, ) avatar.save() orig_obj.author.person.avatar = avatar orig_obj.author.person.save() log.debug("Fixed up post #%s's author's avatar to %s", orig_obj.foreign_id, avatar.image_url) remove_reblog_boilerplate_from_obj(obj, orig_obj) if not obj.body: return True, orig_obj obj.in_reply_to = orig_obj
# If "message" is included then this becomes a reply. # Otherwise, it's just a share. if "message" in item: # Facebook doesn't return the URL on Facebook in any predictable way, # so we need to synthesize it from the id. id_parts = fb_id.split("_") if len(id_parts) != 2: log.error("id %s is not in the expected format, so skipping", fb_id) return referent obj = Object( service='facebook.com', foreign_id=fb_id, render_mode='status', body=cgi.escape(item["message"]), time=datetime.strptime(item['created_time'], '%Y-%m-%dT%H:%M:%S+0000'), permalink_url="http://www.facebook.com/%s/posts/%s" % (id_parts[0], id_parts[1]), author=author, in_reply_to=referent ) obj.save() return (obj, author) else: return (referent, author)
def object_from_post(post, authtoken=None, authsecret=None): sharekey = post['permalink_page'].split('/')[-1] author = account_for_mlkshk_userinfo(post['user']) if not author.person.avatar_source and author.person.avatar is None: if authtoken and authsecret: userinfo = call_mlkshk('https://mlkshk.com/api/user_id/%s' % author.ident, authtoken=authtoken, authsecret=authsecret) avatar_url = userinfo['profile_image_url'] if 'default-icon' not in avatar_url: avatar = Media( width=100, height=100, image_url=avatar_url, ) avatar.save() author.person.avatar = avatar author.person.save() posted_at = datetime.strptime(post['posted_at'], '%Y-%m-%dT%H:%M:%SZ') body = post.get('description') or '' body = u''.join(urlized_words(body)) body = re.sub(r'\r?\n', '<br>', body) if 'url' in post: obj = leapfrog.poll.embedlam.object_for_url(post['url']) if not post.get('description'): return True, obj try: reply = Object.objects.get(service='mlkshk.com', foreign_id=sharekey) except Object.DoesNotExist: reply = Object( service='mlkshk.com', foreign_id=sharekey, author=author, in_reply_to=obj, title=post['title'], permalink_url=post['permalink_page'], render_mode='mixed', body=body, time=posted_at, ) reply.save() return False, reply try: obj = Object.objects.get(service='mlkshk.com', foreign_id=sharekey) except Object.DoesNotExist: photo = Media( image_url=post['original_image_url'], width=post['width'], height=post['height'], sfw=not post['nsfw'], ) photo.save() obj = Object( service='mlkshk.com', foreign_id=sharekey, image=photo, ) obj.title = post['title'] obj.author = author obj.permalink_url = post['permalink_page'] obj.render_mode = 'image' obj.body = body obj.time = posted_at obj.save() # TODO: consider a "save" a share instead of a post? return False, obj
def object_from_postdata(postdata): tumblr_id = postdata['id'] try: return False, Object.objects.get(service='tumblr.com', foreign_id=tumblr_id) except Object.DoesNotExist: pass obj = Object( service='tumblr.com', foreign_id=tumblr_id, permalink_url=postdata['post_url'], title='', body='', render_mode='mixed', time=datetime.strptime(postdata['date'], '%Y-%m-%d %H:%M:%S GMT'), author=account_for_tumblr_shortname(postdata['blog_name']), ) post_type = postdata['type'] if post_type == 'regular': obj.title = postdata.get('title', '') obj.body = postdata.get('body', '') elif post_type == 'video': player = max( (player for player in postdata['player'] if player['width'] <= 700), key=lambda pl: pl['width']) body = player['embed_code'] caption = postdata.get('caption', None) if caption: body = '\n\n'.join((body, caption)) obj.body = body elif post_type == 'audio': obj.title = postdata.get('track_name', '') artist = postdata.get('artist', '') if artist and obj.title: obj.title = u'%s \u2013 %s' % (artist, obj.title) elif artist: obj.title = artist body = postdata.get('player', '') album_art = postdata.get('album_art', '') if album_art: body = u'\n\n'.join((u'<p><img src="%s"></p>' % album_art, body)) caption = postdata.get('caption', '') if caption: body = u'\n\n'.join((body, caption)) obj.body = body elif post_type == 'photo' and len(postdata['photos']) > 1: # photoset photobodies = list() for photo in postdata['photos']: photosize = max( (size for size in photo['alt_sizes'] if size['width'] <= 700), key=lambda sz: sz['width']) body = u'<p><img src="%(url)s" width="%(width)s" height="%(height)s"></p>' % photosize photobodies.append(body) caption = photo.get('caption', '') if caption: photobodies.append(u'<p>%s</p>' % photo['caption']) caption = postdata.get('caption', '') if caption: photobodies.append(caption) obj.body = u'\n\n'.join(photobodies) elif post_type == 'photo': # single photo photo = postdata['photos'][0] photosize = max( (size for size in photo['alt_sizes'] if size['width'] <= 700), key=lambda sz: sz['width']) image = Media( image_url=photosize['url'], width=photosize['width'], height=photosize['height'], ) image.save() obj.image = image obj.render_mode = 'image' obj.body = postdata.get('caption', '') elif post_type == 'link': # TODO: display the link if we can't make an in_reply_to object. # handle the Page manually to always provide an in_reply_to? # should this just be a render_mode=link object itself instead # of a reply? link_url = postdata['url'] try: in_reply_to_page = leapfrog.poll.embedlam.Page(link_url) except ValueError: pass else: try: in_reply_to = in_reply_to_page.to_object() except ValueError: in_reply_to = None if in_reply_to is None: in_reply_to = Object( service='', foreign_id=in_reply_to_page.url, render_mode='link', title=in_reply_to_page.title, permalink_url=in_reply_to_page.url, time=datetime.utcnow(), ) in_reply_to.save() obj.in_reply_to = in_reply_to obj.title = postdata.get('title', link_url) desc = postdata.get('description', '') if desc: obj.body = desc # If we added no description, make this a share instead. elif obj.in_reply_to: return True, obj.in_reply_to elif post_type == 'quote': quote_text = postdata.get('quote', '') body = u"""<blockquote><p>%s</p></blockquote>""" % (quote_text, ) quote_source = postdata.get('source', '') if quote_source: body = u'\n\n'.join((body, u"<p>\u2014%s</p>" % quote_source)) obj.body = body # TODO: handle chat posts (i guess) else: log.debug("Unhandled Tumblr post type %r for post #%s; skipping", post_type, tumblr_id) return None, None # TODO: make reblogs into replies obj.save() return False, obj
except ValueError, exc: log.debug("Couldn't decode JSON from OEmbed endpoint %r, loooooooool", endpoint_url, exc_info=exc) return try: resource_type = resource['type'] except KeyError: log.debug("wtf is %r", resource) raise RequestError("Resource from OEmbed request %s has no 'type'" % (endpoint_url,)) if resource_type in ('video', 'rich'): obj = Object( service='', foreign_id=target_url, render_mode='mixed', title=resource.get('title', ''), body=resource.get('html', ''), author=account_for_embed_resource(resource), time=datetime.utcnow(), permalink_url=target_url, ) obj.save() return obj elif resource_type in ('photo', 'image'): image = Media( image_url=resource['url'], width=resource.get('width'), height=resource.get('height'), ) image.save() obj = Object( service='',
def object_from_html_head(url, orig_url, head): title = title_from_html_head(head) old_facebook_video_elem = head.find('link', rel='video_src') video_url = value_for_meta_elems((old_facebook_video_elem,), base_url=orig_url) og_image_elem = head.find("meta", property="og:image") old_facebook_image_elem = head.find("link", rel="image_src") image_url = value_for_meta_elems((og_image_elem, old_facebook_image_elem), base_url=orig_url) og_summary_elem = head.find("meta", property="og:description") summary = value_for_meta_elems((og_summary_elem,), "") if not video_url and not image_url and not summary: log.debug("Found neither an image URL nor a summary for %s, so returning no object", url) return None image = None if video_url: embed_code_parts = ["<embed", 'src="%s"' % video_url, 'allowfullscreen="true" wmode="transparent"'] video_height_elem = head.find('meta', attrs={'name': 'video_height'}) video_height = value_for_meta_elems((video_height_elem,), '') video_width_elem = head.find('meta', attrs={'name': 'video_width'}) video_width = value_for_meta_elems((video_width_elem,), '') video_type_elem = head.find('meta', attrs={'name': 'video_type'}) video_type = value_for_meta_elems((video_type_elem,), '') if video_height: embed_code_parts.append('height="%s"' % video_height) if video_width: embed_code_parts.append('width="%s"' % video_width) # Add type and closing bracket always. embed_code_parts.append('type="%s">' % (video_type or 'application/x-shockwave-flash')) image = Media( embed_code=' '.join(embed_code_parts), width=int(video_width) if video_width else None, height=int(video_height) if video_height else None, ) image.save() elif image_url: image = Media() image.image_url = image_url # TODO: how big is this image? image.save() render_mode = 'link' if re.match(r'http://instagr\.am/', url, re.MULTILINE | re.DOTALL | re.VERBOSE): render_mode = 'image' # Use the same text as the Twitter crosspost for the title. if summary and ' at ' in title: place = title.split(' at ', 1)[1] title = '%s @ %s' % (summary, place) elif summary: title = summary summary = '' elif re.match(r'http://yfrog\.com/', url, re.MULTILINE | re.DOTALL | re.VERBOSE): render_mode = 'image' title = '' # TODO: use yfrog xmlInfo call to get the poster's twitter username (if any) obj = Object( service='', foreign_id=url, render_mode=render_mode, title=title, body=summary, permalink_url=url, time=datetime.utcnow(), image=image, ) obj.save() return obj
# Don't replace the if the link text is not identical to the # URL (if it's an autolinked domain name, we'd break how the # tweet reads). tweet_text = tweetdata['text'] start, end = urldata['indices'] if tweet_text[start:end] == urldata['url']: # Mark links we change the text of as aboutlinks. urldata['text'] = url_page.title urldata['class'] = 'aboutlink' tweet = Object( service='twitter.com', foreign_id=str(tweetdata['id']), render_mode='status', body=tweet_html(tweetdata), time=datetime.strptime(tweetdata['created_at'], '%a %b %d %H:%M:%S +0000 %Y'), public=not tweetdata['user']['protected'], permalink_url='http://twitter.com/%s/status/%d' % (tweetdata['user']['screen_name'], tweetdata['id']), author=account_for_twitter_user(tweetdata['user']), in_reply_to=in_reply_to, ) tweet.save() return False, tweet def poll_twitter(account): user = account.person.user if user is None: return
def object_from_html_head(url, orig_url, head): title = title_from_html_head(head) old_facebook_video_elem = head.find('link', rel='video_src') video_url = value_for_meta_elems((old_facebook_video_elem, ), base_url=orig_url) og_image_elem = head.find("meta", property="og:image") old_facebook_image_elem = head.find("link", rel="image_src") image_url = value_for_meta_elems((og_image_elem, old_facebook_image_elem), base_url=orig_url) og_summary_elem = head.find("meta", property="og:description") summary = value_for_meta_elems((og_summary_elem, ), "") if not video_url and not image_url and not summary: log.debug( "Found neither an image URL nor a summary for %s, so returning no object", url) return None image = None if video_url: embed_code_parts = [ "<embed", 'src="%s"' % video_url, 'allowfullscreen="true" wmode="transparent"' ] video_height_elem = head.find('meta', attrs={'name': 'video_height'}) video_height = value_for_meta_elems((video_height_elem, ), '') video_width_elem = head.find('meta', attrs={'name': 'video_width'}) video_width = value_for_meta_elems((video_width_elem, ), '') video_type_elem = head.find('meta', attrs={'name': 'video_type'}) video_type = value_for_meta_elems((video_type_elem, ), '') if video_height: embed_code_parts.append('height="%s"' % video_height) if video_width: embed_code_parts.append('width="%s"' % video_width) # Add type and closing bracket always. embed_code_parts.append( 'type="%s">' % (video_type or 'application/x-shockwave-flash')) image = Media( embed_code=' '.join(embed_code_parts), width=int(video_width) if video_width else None, height=int(video_height) if video_height else None, ) image.save() elif image_url: image = Media() image.image_url = image_url # TODO: how big is this image? image.save() render_mode = 'link' if re.match(r'http://instagr\.am/', url, re.MULTILINE | re.DOTALL | re.VERBOSE): render_mode = 'image' # Use the same text as the Twitter crosspost for the title. if summary and ' at ' in title: place = title.split(' at ', 1)[1] title = '%s @ %s' % (summary, place) elif summary: title = summary summary = '' elif re.match(r'http://yfrog\.com/', url, re.MULTILINE | re.DOTALL | re.VERBOSE): render_mode = 'image' title = '' # TODO: use yfrog xmlInfo call to get the poster's twitter username (if any) obj = Object( service='', foreign_id=url, render_mode=render_mode, title=title, body=summary, permalink_url=url, time=datetime.utcnow(), image=image, ) obj.save() return obj
exc_info=exc) return try: resource_type = resource['type'] except KeyError: log.debug("wtf is %r", resource) raise RequestError("Resource from OEmbed request %s has no 'type'" % (endpoint_url, )) if resource_type in ('video', 'rich'): obj = Object( service='', foreign_id=target_url, render_mode='mixed', title=resource.get('title', ''), body=resource.get('html', ''), author=account_for_embed_resource(resource), time=datetime.utcnow(), permalink_url=target_url, ) obj.save() return obj elif resource_type in ('photo', 'image'): image = Media( image_url=resource['url'], width=resource.get('width'), height=resource.get('height'), ) image.save() obj = Object( service='',