def object_from_post_element(post_el, tumblelog_el): tumblr_id = post_el.attrib['id'] try: return False, Object.objects.get(service='tumblr.com', foreign_id=tumblr_id) except Object.DoesNotExist: pass obj = Object( service='tumblr.com', foreign_id=tumblr_id, permalink_url=post_el.attrib['url-with-slug'], title='', body='', render_mode='mixed', time=datetime.strptime(post_el.attrib['date-gmt'], '%Y-%m-%d %H:%M:%S GMT'), author=account_for_tumblelog_element(tumblelog_el), ) post_type = post_el.attrib['type'] if post_type == 'regular': title_el = post_el.find('./regular-title') if title_el is not None: obj.title = title_el.text body_el = post_el.find('./regular-body') if body_el is not None: obj.body = body_el.text elif post_type == 'video': body = post_el.find('./video-player').text video_caption_el = post_el.find('./video-caption') if video_caption_el is not None: video_caption = video_caption_el.text body = '\n\n'.join((body, video_caption)) obj.body = body elif post_type == 'audio': title_el = post_el.find('./id3-title') if title_el is not None: obj.title = title_el.text artist_el = post_el.find('./id3-artist') if artist_el is not None: obj.title = u'%s \u2013 %s' % (artist_el.text, obj.title) body = post_el.find('./audio-player').text audio_art_el = post_el.find('./id3-album-art') if audio_art_el is not None: audio_art_url = audio_art_el.text body = u'\n\n'.join((u'<p><img src="%s"></p>' % audio_art_url, body)) audio_caption_el = post_el.find('./audio-caption') if audio_caption_el is not None: audio_caption = audio_caption_el.text body = u'\n\n'.join((body, audio_caption)) obj.body = body elif post_type == 'photo': # TODO: if there's a photo-link-url, is this really a "photo reply"? photo_el = sorted(post_el.findall('./photo-url'), key=lambda x: int(x.attrib['max-width']), reverse=True)[0] photo_el_width = int(photo_el.attrib['max-width']) try: width, height = post_el.attrib['width'], post_el.attrib['height'] except KeyError: width, height = None, None else: width, height = int(width), int(height) if width > photo_el_width: height = photo_el_width * height / width width = photo_el_width image = Media( image_url=photo_el.text, width=width, height=height, ) image.save() obj.image = image obj.render_mode = 'image' caption_el = post_el.find('./photo-caption') if caption_el is not None: obj.body = caption_el.text elif post_type == 'link': # TODO: display the link if we can't make an in_reply_to object. # handle the Page manually to always provide an in_reply_to? # should this just be a render_mode=link object itself instead # of a reply? link_url = post_el.find('./link-url').text try: in_reply_to_page = leapfrog.poll.embedlam.Page(link_url) except ValueError: pass else: try: in_reply_to = in_reply_to_page.to_object() except ValueError: in_reply_to = None if in_reply_to is None: in_reply_to = Object( service='', foreign_id=in_reply_to_page.url, render_mode='link', title=in_reply_to_page.title, permalink_url=in_reply_to_page.url, time=datetime.utcnow(), ) in_reply_to.save() obj.in_reply_to = in_reply_to title_el = post_el.find('./link-text') obj.title = link_url if title_el is None else title_el.text desc_el = post_el.find('./link-description') if desc_el is not None: obj.body = desc_el.text # If we added no description, make this a share. if obj.in_reply_to and not obj.body: return True, obj.in_reply_to elif post_type == 'quote': quote_text = post_el.find('./quote-text').text body = u"""<blockquote><p>%s</p></blockquote>""" % (quote_text,) quote_source_el = post_el.find('./quote-source') if quote_source_el is not None: quote_source = quote_source_el.text body = u'\n\n'.join((body, u"<p>\u2014%s</p>" % quote_source)) obj.body = body # TODO: handle chat posts (i guess) else: log.debug("Unhandled Tumblr post type %r for post #%s; skipping", post_type, tumblr_id) return None, None try: orig_url = post_el.attrib['reblogged-root-url'] except KeyError: log.debug("Post #%s is not a reblog, leave it alone", tumblr_id) else: log.debug("Post #%s is a reblog of %s; let's try walking up", tumblr_id, orig_url) really_a_share, orig_obj = False, None try: really_a_share, orig_obj = object_from_url(orig_url) except ValueError, exc: # meh log.debug("Couldn't walk up to reblog reference %s: %s", orig_url, str(exc)) if not really_a_share and orig_obj is not None: # Patch up the upstream author's userpic if necessary, since we # don't get those from /api/read, evidently. if orig_obj.author.person.avatar is None and 'reblogged-root-avatar-url-64' in post_el.attrib: avatar = Media( image_url=post_el.attrib['reblogged-root-avatar-url-64'], width=64, height=64, ) avatar.save() orig_obj.author.person.avatar = avatar orig_obj.author.person.save() log.debug("Fixed up post #%s's author's avatar to %s", orig_obj.foreign_id, avatar.image_url) remove_reblog_boilerplate_from_obj(obj, orig_obj) if not obj.body: return True, orig_obj obj.in_reply_to = orig_obj
def object_from_post_element(post_el, tumblelog_el): tumblr_id = post_el.attrib['id'] try: return False, Object.objects.get(service='tumblr.com', foreign_id=tumblr_id) except Object.DoesNotExist: pass obj = Object( service='tumblr.com', foreign_id=tumblr_id, permalink_url=post_el.attrib['url-with-slug'], title='', body='', render_mode='mixed', time=datetime.strptime(post_el.attrib['date-gmt'], '%Y-%m-%d %H:%M:%S GMT'), author=account_for_tumblelog_element(tumblelog_el), ) post_type = post_el.attrib['type'] if post_type == 'regular': title_el = post_el.find('./regular-title') if title_el is not None: obj.title = title_el.text body_el = post_el.find('./regular-body') if body_el is not None: obj.body = body_el.text elif post_type == 'video': body = post_el.find('./video-player').text video_caption_el = post_el.find('./video-caption') if video_caption_el is not None: video_caption = video_caption_el.text body = '\n\n'.join((body, video_caption)) obj.body = body elif post_type == 'audio': title_el = post_el.find('./id3-title') if title_el is not None: obj.title = title_el.text artist_el = post_el.find('./id3-artist') if artist_el is not None: obj.title = u'%s \u2013 %s' % (artist_el.text, obj.title) body = post_el.find('./audio-player').text audio_art_el = post_el.find('./id3-album-art') if audio_art_el is not None: audio_art_url = audio_art_el.text body = u'\n\n'.join( (u'<p><img src="%s"></p>' % audio_art_url, body)) audio_caption_el = post_el.find('./audio-caption') if audio_caption_el is not None: audio_caption = audio_caption_el.text body = u'\n\n'.join((body, audio_caption)) obj.body = body elif post_type == 'photo': # TODO: if there's a photo-link-url, is this really a "photo reply"? photo_el = sorted(post_el.findall('./photo-url'), key=lambda x: int(x.attrib['max-width']), reverse=True)[0] photo_el_width = int(photo_el.attrib['max-width']) try: width, height = post_el.attrib['width'], post_el.attrib['height'] except KeyError: width, height = None, None else: width, height = int(width), int(height) if width > photo_el_width: height = photo_el_width * height / width width = photo_el_width image = Media( image_url=photo_el.text, width=width, height=height, ) image.save() obj.image = image obj.render_mode = 'image' caption_el = post_el.find('./photo-caption') if caption_el is not None: obj.body = caption_el.text elif post_type == 'link': # TODO: display the link if we can't make an in_reply_to object. # handle the Page manually to always provide an in_reply_to? # should this just be a render_mode=link object itself instead # of a reply? link_url = post_el.find('./link-url').text try: in_reply_to_page = leapfrog.poll.embedlam.Page(link_url) except ValueError: pass else: try: in_reply_to = in_reply_to_page.to_object() except ValueError: in_reply_to = None if in_reply_to is None: in_reply_to = Object( service='', foreign_id=in_reply_to_page.url, render_mode='link', title=in_reply_to_page.title, permalink_url=in_reply_to_page.url, time=datetime.utcnow(), ) in_reply_to.save() obj.in_reply_to = in_reply_to title_el = post_el.find('./link-text') obj.title = link_url if title_el is None else title_el.text desc_el = post_el.find('./link-description') if desc_el is not None: obj.body = desc_el.text # If we added no description, make this a share. if obj.in_reply_to and not obj.body: return True, obj.in_reply_to elif post_type == 'quote': quote_text = post_el.find('./quote-text').text body = u"""<blockquote><p>%s</p></blockquote>""" % (quote_text, ) quote_source_el = post_el.find('./quote-source') if quote_source_el is not None: quote_source = quote_source_el.text body = u'\n\n'.join((body, u"<p>\u2014%s</p>" % quote_source)) obj.body = body # TODO: handle chat posts (i guess) else: log.debug("Unhandled Tumblr post type %r for post #%s; skipping", post_type, tumblr_id) return None, None try: orig_url = post_el.attrib['reblogged-root-url'] except KeyError: log.debug("Post #%s is not a reblog, leave it alone", tumblr_id) else: log.debug("Post #%s is a reblog of %s; let's try walking up", tumblr_id, orig_url) really_a_share, orig_obj = False, None try: really_a_share, orig_obj = object_from_url(orig_url) except ValueError, exc: # meh log.debug("Couldn't walk up to reblog reference %s: %s", orig_url, str(exc)) if not really_a_share and orig_obj is not None: # Patch up the upstream author's userpic if necessary, since we # don't get those from /api/read, evidently. if orig_obj.author.person.avatar is None and 'reblogged-root-avatar-url-64' in post_el.attrib: avatar = Media( image_url=post_el.attrib['reblogged-root-avatar-url-64'], width=64, height=64, ) avatar.save() orig_obj.author.person.avatar = avatar orig_obj.author.person.save() log.debug("Fixed up post #%s's author's avatar to %s", orig_obj.foreign_id, avatar.image_url) remove_reblog_boilerplate_from_obj(obj, orig_obj) if not obj.body: return True, orig_obj obj.in_reply_to = orig_obj
def object_from_postdata(postdata): tumblr_id = postdata['id'] try: return False, Object.objects.get(service='tumblr.com', foreign_id=tumblr_id) except Object.DoesNotExist: pass obj = Object( service='tumblr.com', foreign_id=tumblr_id, permalink_url=postdata['post_url'], title='', body='', render_mode='mixed', time=datetime.strptime(postdata['date'], '%Y-%m-%d %H:%M:%S GMT'), author=account_for_tumblr_shortname(postdata['blog_name']), ) post_type = postdata['type'] if post_type == 'regular': obj.title = postdata.get('title', '') obj.body = postdata.get('body', '') elif post_type == 'video': player = max((player for player in postdata['player'] if player['width'] <= 700), key=lambda pl: pl['width']) body = player['embed_code'] caption = postdata.get('caption', None) if caption: body = '\n\n'.join((body, caption)) obj.body = body elif post_type == 'audio': obj.title = postdata.get('track_name', '') artist = postdata.get('artist', '') if artist and obj.title: obj.title = u'%s \u2013 %s' % (artist, obj.title) elif artist: obj.title = artist body = postdata.get('player', '') album_art = postdata.get('album_art', '') if album_art: body = u'\n\n'.join((u'<p><img src="%s"></p>' % album_art, body)) caption = postdata.get('caption', '') if caption: body = u'\n\n'.join((body, caption)) obj.body = body elif post_type == 'photo' and len(postdata['photos']) > 1: # photoset photobodies = list() for photo in postdata['photos']: photosize = max((size for size in photo['alt_sizes'] if size['width'] <= 700), key=lambda sz: sz['width']) body = u'<p><img src="%(url)s" width="%(width)s" height="%(height)s"></p>' % photosize photobodies.append(body) caption = photo.get('caption', '') if caption: photobodies.append(u'<p>%s</p>' % photo['caption']) caption = postdata.get('caption', '') if caption: photobodies.append(caption) obj.body = u'\n\n'.join(photobodies) elif post_type == 'photo': # single photo photo = postdata['photos'][0] photosize = max((size for size in photo['alt_sizes'] if size['width'] <= 700), key=lambda sz: sz['width']) image = Media( image_url=photosize['url'], width=photosize['width'], height=photosize['height'], ) image.save() obj.image = image obj.render_mode = 'image' obj.body = postdata.get('caption', '') elif post_type == 'link': # TODO: display the link if we can't make an in_reply_to object. # handle the Page manually to always provide an in_reply_to? # should this just be a render_mode=link object itself instead # of a reply? link_url = postdata['url'] try: in_reply_to_page = leapfrog.poll.embedlam.Page(link_url) except ValueError: pass else: try: in_reply_to = in_reply_to_page.to_object() except ValueError: in_reply_to = None if in_reply_to is None: in_reply_to = Object( service='', foreign_id=in_reply_to_page.url, render_mode='link', title=in_reply_to_page.title, permalink_url=in_reply_to_page.url, time=datetime.utcnow(), ) in_reply_to.save() obj.in_reply_to = in_reply_to obj.title = postdata.get('title', link_url) desc = postdata.get('description', '') if desc: obj.body = desc # If we added no description, make this a share instead. elif obj.in_reply_to: return True, obj.in_reply_to elif post_type == 'quote': quote_text = postdata.get('quote', '') body = u"""<blockquote><p>%s</p></blockquote>""" % (quote_text,) quote_source = postdata.get('source', '') if quote_source: body = u'\n\n'.join((body, u"<p>\u2014%s</p>" % quote_source)) obj.body = body # TODO: handle chat posts (i guess) else: log.debug("Unhandled Tumblr post type %r for post #%s; skipping", post_type, tumblr_id) return None, None # TODO: make reblogs into replies obj.save() return False, obj
def object_from_postdata(postdata): tumblr_id = postdata['id'] try: return False, Object.objects.get(service='tumblr.com', foreign_id=tumblr_id) except Object.DoesNotExist: pass obj = Object( service='tumblr.com', foreign_id=tumblr_id, permalink_url=postdata['post_url'], title='', body='', render_mode='mixed', time=datetime.strptime(postdata['date'], '%Y-%m-%d %H:%M:%S GMT'), author=account_for_tumblr_shortname(postdata['blog_name']), ) post_type = postdata['type'] if post_type == 'regular': obj.title = postdata.get('title', '') obj.body = postdata.get('body', '') elif post_type == 'video': player = max( (player for player in postdata['player'] if player['width'] <= 700), key=lambda pl: pl['width']) body = player['embed_code'] caption = postdata.get('caption', None) if caption: body = '\n\n'.join((body, caption)) obj.body = body elif post_type == 'audio': obj.title = postdata.get('track_name', '') artist = postdata.get('artist', '') if artist and obj.title: obj.title = u'%s \u2013 %s' % (artist, obj.title) elif artist: obj.title = artist body = postdata.get('player', '') album_art = postdata.get('album_art', '') if album_art: body = u'\n\n'.join((u'<p><img src="%s"></p>' % album_art, body)) caption = postdata.get('caption', '') if caption: body = u'\n\n'.join((body, caption)) obj.body = body elif post_type == 'photo' and len(postdata['photos']) > 1: # photoset photobodies = list() for photo in postdata['photos']: photosize = max( (size for size in photo['alt_sizes'] if size['width'] <= 700), key=lambda sz: sz['width']) body = u'<p><img src="%(url)s" width="%(width)s" height="%(height)s"></p>' % photosize photobodies.append(body) caption = photo.get('caption', '') if caption: photobodies.append(u'<p>%s</p>' % photo['caption']) caption = postdata.get('caption', '') if caption: photobodies.append(caption) obj.body = u'\n\n'.join(photobodies) elif post_type == 'photo': # single photo photo = postdata['photos'][0] photosize = max( (size for size in photo['alt_sizes'] if size['width'] <= 700), key=lambda sz: sz['width']) image = Media( image_url=photosize['url'], width=photosize['width'], height=photosize['height'], ) image.save() obj.image = image obj.render_mode = 'image' obj.body = postdata.get('caption', '') elif post_type == 'link': # TODO: display the link if we can't make an in_reply_to object. # handle the Page manually to always provide an in_reply_to? # should this just be a render_mode=link object itself instead # of a reply? link_url = postdata['url'] try: in_reply_to_page = leapfrog.poll.embedlam.Page(link_url) except ValueError: pass else: try: in_reply_to = in_reply_to_page.to_object() except ValueError: in_reply_to = None if in_reply_to is None: in_reply_to = Object( service='', foreign_id=in_reply_to_page.url, render_mode='link', title=in_reply_to_page.title, permalink_url=in_reply_to_page.url, time=datetime.utcnow(), ) in_reply_to.save() obj.in_reply_to = in_reply_to obj.title = postdata.get('title', link_url) desc = postdata.get('description', '') if desc: obj.body = desc # If we added no description, make this a share instead. elif obj.in_reply_to: return True, obj.in_reply_to elif post_type == 'quote': quote_text = postdata.get('quote', '') body = u"""<blockquote><p>%s</p></blockquote>""" % (quote_text, ) quote_source = postdata.get('source', '') if quote_source: body = u'\n\n'.join((body, u"<p>\u2014%s</p>" % quote_source)) obj.body = body # TODO: handle chat posts (i guess) else: log.debug("Unhandled Tumblr post type %r for post #%s; skipping", post_type, tumblr_id) return None, None # TODO: make reblogs into replies obj.save() return False, obj
entry = matching_entries[0] else: return None obj = Object( service='', foreign_id=item_url, title=entry.title, permalink_url=item_url, ) # If we have full content then this becomes a "mixed". Otherwise, we # marshall it as a link. if "content" in entry and len(entry.content) > 0: obj.render_mode = 'mixed' obj.body = entry.content[0].value else: obj.render_mode = 'link' obj.body = entry.summary if "summary" in entry else "" object_time = None if "published_parsed" in entry: object_time = entry.published_parsed elif "updated_parsed" in entry: object_time = entry.updated_parsed if object_time is None: log.debug("Feed item %s has no timestamp, so making no object", item_url) return None obj.time = datetime(*object_time[:6])
entry = matching_entries[0] else: return None obj = Object( service='', foreign_id=item_url, title=entry.title, permalink_url=item_url, ) # If we have full content then this becomes a "mixed". Otherwise, we # marshall it as a link. if "content" in entry and len(entry.content) > 0: obj.render_mode = 'mixed' obj.body = entry.content[0].value else: obj.render_mode = 'link' obj.body = entry.summary if "summary" in entry else "" object_time = None if "published_parsed" in entry: object_time = entry.published_parsed elif "updated_parsed" in entry: object_time = entry.updated_parsed if object_time is None: log.debug("Feed item %s has no timestamp, so making no object", item_url) return None
def object_from_post(post, authtoken=None, authsecret=None): sharekey = post['permalink_page'].split('/')[-1] author = account_for_mlkshk_userinfo(post['user']) if not author.person.avatar_source and author.person.avatar is None: if authtoken and authsecret: userinfo = call_mlkshk('https://mlkshk.com/api/user_id/%s' % author.ident, authtoken=authtoken, authsecret=authsecret) avatar_url = userinfo['profile_image_url'] if 'default-icon' not in avatar_url: avatar = Media( width=100, height=100, image_url=avatar_url, ) avatar.save() author.person.avatar = avatar author.person.save() posted_at = datetime.strptime(post['posted_at'], '%Y-%m-%dT%H:%M:%SZ') body = post.get('description') or '' body = u''.join(urlized_words(body)) body = re.sub(r'\r?\n', '<br>', body) if 'url' in post: obj = leapfrog.poll.embedlam.object_for_url(post['url']) if not post.get('description'): return True, obj try: reply = Object.objects.get(service='mlkshk.com', foreign_id=sharekey) except Object.DoesNotExist: reply = Object( service='mlkshk.com', foreign_id=sharekey, author=author, in_reply_to=obj, title=post['title'], permalink_url=post['permalink_page'], render_mode='mixed', body=body, time=posted_at, ) reply.save() return False, reply try: obj = Object.objects.get(service='mlkshk.com', foreign_id=sharekey) except Object.DoesNotExist: photo = Media( image_url=post['original_image_url'], width=post['width'], height=post['height'], sfw=not post['nsfw'], ) photo.save() obj = Object( service='mlkshk.com', foreign_id=sharekey, image=photo, ) obj.title = post['title'] obj.author = author obj.permalink_url = post['permalink_page'] obj.render_mode = 'image' obj.body = body obj.time = posted_at obj.save() # TODO: consider a "save" a share instead of a post? return False, obj