Exemple #1
0
def object_from_postdata(postdata):
    tumblr_id = postdata['id']
    try:
        return False, Object.objects.get(service='tumblr.com', foreign_id=tumblr_id)
    except Object.DoesNotExist:
        pass

    obj = Object(
        service='tumblr.com',
        foreign_id=tumblr_id,
        permalink_url=postdata['post_url'],
        title='',
        body='',
        render_mode='mixed',
        time=datetime.strptime(postdata['date'], '%Y-%m-%d %H:%M:%S GMT'),
        author=account_for_tumblr_shortname(postdata['blog_name']),
    )

    post_type = postdata['type']
    if post_type == 'regular':
        obj.title = postdata.get('title', '')
        obj.body = postdata.get('body', '')
    elif post_type == 'video':
        player = max((player for player in postdata['player'] if player['width'] <= 700), key=lambda pl: pl['width'])
        body = player['embed_code']
        caption = postdata.get('caption', None)
        if caption:
            body = '\n\n'.join((body, caption))
        obj.body = body
    elif post_type == 'audio':
        obj.title = postdata.get('track_name', '')
        artist = postdata.get('artist', '')
        if artist and obj.title:
            obj.title = u'%s \u2013 %s' % (artist, obj.title)
        elif artist:
            obj.title = artist

        body = postdata.get('player', '')
        album_art = postdata.get('album_art', '')
        if album_art:
            body = u'\n\n'.join((u'<p><img src="%s"></p>' % album_art, body))
        caption = postdata.get('caption', '')
        if caption:
            body = u'\n\n'.join((body, caption))

        obj.body = body
    elif post_type == 'photo' and len(postdata['photos']) > 1:  # photoset
        photobodies = list()

        for photo in postdata['photos']:
            photosize = max((size for size in photo['alt_sizes'] if size['width'] <= 700), key=lambda sz: sz['width'])
            body = u'<p><img src="%(url)s" width="%(width)s" height="%(height)s"></p>' % photosize
            photobodies.append(body)
            caption = photo.get('caption', '')
            if caption:
                photobodies.append(u'<p>%s</p>' % photo['caption'])

        caption = postdata.get('caption', '')
        if caption:
            photobodies.append(caption)

        obj.body = u'\n\n'.join(photobodies)
    elif post_type == 'photo':  # single photo
        photo = postdata['photos'][0]
        photosize = max((size for size in photo['alt_sizes'] if size['width'] <= 700), key=lambda sz: sz['width'])

        image = Media(
            image_url=photosize['url'],
            width=photosize['width'],
            height=photosize['height'],
        )
        image.save()

        obj.image = image
        obj.render_mode = 'image'

        obj.body = postdata.get('caption', '')
    elif post_type == 'link':
        # TODO: display the link if we can't make an in_reply_to object.
        # handle the Page manually to always provide an in_reply_to?
        # should this just be a render_mode=link object itself instead
        # of a reply?
        link_url = postdata['url']
        try:
            in_reply_to_page = leapfrog.poll.embedlam.Page(link_url)
        except ValueError:
            pass
        else:
            try:
                in_reply_to = in_reply_to_page.to_object()
            except ValueError:
                in_reply_to = None
            if in_reply_to is None:
                in_reply_to = Object(
                    service='',
                    foreign_id=in_reply_to_page.url,
                    render_mode='link',
                    title=in_reply_to_page.title,
                    permalink_url=in_reply_to_page.url,
                    time=datetime.utcnow(),
                )
                in_reply_to.save()

            obj.in_reply_to = in_reply_to

        obj.title = postdata.get('title', link_url)
        desc = postdata.get('description', '')
        if desc:
            obj.body = desc
        # If we added no description, make this a share instead.
        elif obj.in_reply_to:
            return True, obj.in_reply_to
    elif post_type == 'quote':
        quote_text = postdata.get('quote', '')
        body = u"""<blockquote><p>%s</p></blockquote>""" % (quote_text,)

        quote_source = postdata.get('source', '')
        if quote_source:
            body = u'\n\n'.join((body, u"<p>\u2014%s</p>" % quote_source))

        obj.body = body

    # TODO: handle chat posts (i guess)
    else:
        log.debug("Unhandled Tumblr post type %r for post #%s; skipping", post_type, tumblr_id)
        return None, None

    # TODO: make reblogs into replies

    obj.save()
    return False, obj
Exemple #2
0
def object_from_post_element(post_el, tumblelog_el):
    tumblr_id = post_el.attrib['id']
    try:
        return False, Object.objects.get(service='tumblr.com', foreign_id=tumblr_id)
    except Object.DoesNotExist:
        pass

    obj = Object(
        service='tumblr.com',
        foreign_id=tumblr_id,
        permalink_url=post_el.attrib['url-with-slug'],
        title='',
        body='',
        render_mode='mixed',
        time=datetime.strptime(post_el.attrib['date-gmt'], '%Y-%m-%d %H:%M:%S GMT'),
        author=account_for_tumblelog_element(tumblelog_el),
    )

    post_type = post_el.attrib['type']
    if post_type == 'regular':
        title_el = post_el.find('./regular-title')
        if title_el is not None:
            obj.title = title_el.text
        body_el = post_el.find('./regular-body')
        if body_el is not None:
            obj.body = body_el.text
    elif post_type == 'video':
        body = post_el.find('./video-player').text
        video_caption_el = post_el.find('./video-caption')
        if video_caption_el is not None:
            video_caption = video_caption_el.text
            body = '\n\n'.join((body, video_caption))
        obj.body = body
    elif post_type == 'audio':
        title_el = post_el.find('./id3-title')
        if title_el is not None:
            obj.title = title_el.text
        artist_el = post_el.find('./id3-artist')
        if artist_el is not None:
            obj.title = u'%s \u2013 %s' % (artist_el.text, obj.title)

        body = post_el.find('./audio-player').text
        audio_art_el = post_el.find('./id3-album-art')
        if audio_art_el is not None:
            audio_art_url = audio_art_el.text
            body = u'\n\n'.join((u'<p><img src="%s"></p>' % audio_art_url, body))
        audio_caption_el = post_el.find('./audio-caption')
        if audio_caption_el is not None:
            audio_caption = audio_caption_el.text
            body = u'\n\n'.join((body, audio_caption))
        obj.body = body
    elif post_type == 'photo':
        # TODO: if there's a photo-link-url, is this really a "photo reply"?

        photo_el = sorted(post_el.findall('./photo-url'), key=lambda x: int(x.attrib['max-width']), reverse=True)[0]
        photo_el_width = int(photo_el.attrib['max-width'])
        try:
            width, height = post_el.attrib['width'], post_el.attrib['height']
        except KeyError:
            width, height = None, None
        else:
            width, height = int(width), int(height)
            if width > photo_el_width:
                height = photo_el_width * height / width
                width = photo_el_width

        image = Media(
            image_url=photo_el.text,
            width=width,
            height=height,
        )
        image.save()

        obj.image = image
        obj.render_mode = 'image'

        caption_el = post_el.find('./photo-caption')
        if caption_el is not None:
            obj.body = caption_el.text
    elif post_type == 'link':
        # TODO: display the link if we can't make an in_reply_to object.
        # handle the Page manually to always provide an in_reply_to?
        # should this just be a render_mode=link object itself instead
        # of a reply?
        link_url = post_el.find('./link-url').text
        try:
            in_reply_to_page = leapfrog.poll.embedlam.Page(link_url)
        except ValueError:
            pass
        else:
            try:
                in_reply_to = in_reply_to_page.to_object()
            except ValueError:
                in_reply_to = None
            if in_reply_to is None:
                in_reply_to = Object(
                    service='',
                    foreign_id=in_reply_to_page.url,
                    render_mode='link',
                    title=in_reply_to_page.title,
                    permalink_url=in_reply_to_page.url,
                    time=datetime.utcnow(),
                )
                in_reply_to.save()

            obj.in_reply_to = in_reply_to

        title_el = post_el.find('./link-text')
        obj.title = link_url if title_el is None else title_el.text
        desc_el = post_el.find('./link-description')
        if desc_el is not None:
            obj.body = desc_el.text

        # If we added no description, make this a share.
        if obj.in_reply_to and not obj.body:
            return True, obj.in_reply_to
    elif post_type == 'quote':
        quote_text = post_el.find('./quote-text').text
        body = u"""<blockquote><p>%s</p></blockquote>""" % (quote_text,)

        quote_source_el = post_el.find('./quote-source')
        if quote_source_el is not None:
            quote_source = quote_source_el.text
            body = u'\n\n'.join((body, u"<p>\u2014%s</p>" % quote_source))

        obj.body = body

    # TODO: handle chat posts (i guess)
    else:
        log.debug("Unhandled Tumblr post type %r for post #%s; skipping", post_type, tumblr_id)
        return None, None

    try:
        orig_url = post_el.attrib['reblogged-root-url']
    except KeyError:
        log.debug("Post #%s is not a reblog, leave it alone", tumblr_id)
    else:
        log.debug("Post #%s is a reblog of %s; let's try walking up", tumblr_id, orig_url)

        really_a_share, orig_obj = False, None
        try:
            really_a_share, orig_obj = object_from_url(orig_url)
        except ValueError, exc:
            # meh
            log.debug("Couldn't walk up to reblog reference %s: %s", orig_url, str(exc))
        if not really_a_share and orig_obj is not None:
            # Patch up the upstream author's userpic if necessary, since we
            # don't get those from /api/read, evidently.
            if orig_obj.author.person.avatar is None and 'reblogged-root-avatar-url-64' in post_el.attrib:
                avatar = Media(
                    image_url=post_el.attrib['reblogged-root-avatar-url-64'],
                    width=64,
                    height=64,
                )
                avatar.save()

                orig_obj.author.person.avatar = avatar
                orig_obj.author.person.save()

                log.debug("Fixed up post #%s's author's avatar to %s", orig_obj.foreign_id, avatar.image_url)

            remove_reblog_boilerplate_from_obj(obj, orig_obj)
            if not obj.body:
                return True, orig_obj

            obj.in_reply_to = orig_obj
Exemple #3
0
            foreign_id=target_url,
            render_mode='link',
            title=resource.get('title', ''),
            body=resource.get('html', ''),
            author=account_for_embed_resource(resource),
            permalink_url=resource.get('url') or target_url,  # might be given anyway
            time=datetime.utcnow(),
        )
        if 'thumbnail_url' in resource:
            image = Media(
                image_url=resource['thumbnail_url'],
                width=resource.get('thumbnail_width'),
                height=resource.get('thumbnail_height'),
            )
            image.save()
            obj.image = image
        obj.save()
        return obj

    raise ValueError('Unknown OEmbed resource type %r' % resource_type)


def title_from_html_head(head):
    og_title_elem = head.find("meta", property="og:title")
    old_facebook_title_elem = head.find("meta", {"name":"title"})
    title_elem = head.find("title")
    title = value_for_meta_elems((og_title_elem, old_facebook_title_elem, title_elem), "")
    return title


def object_from_html_head(url, orig_url, head):