Beispiel #1
0
def object_from_photo_url(url, width, height):
    try:
        return Object.objects.get(service='', foreign_id=url)
    except Object.DoesNotExist:
        pass

    log.debug("Treating %s as a photo URL and making an image object from it",
              url)
    image = Media(
        image_url=url,
        width=width,
        height=height,
    )
    image.save()
    obj = Object(
        service='',
        foreign_id=url,
        render_mode='image',
        title='',
        image=image,
        author=None,
        time=datetime.utcnow(),
        permalink_url=url,
    )
    obj.save()
    return obj
Beispiel #2
0
def object_from_photo_url(url, width, height):
    try:
        return Object.objects.get(service='', foreign_id=url)
    except Object.DoesNotExist:
        pass

    log.debug("Treating %s as a photo URL and making an image object from it", url)
    image = Media(
        image_url=url,
        width=width,
        height=height,
    )
    image.save()
    obj = Object(
        service='',
        foreign_id=url,
        render_mode='image',
        title='',
        image=image,
        author=None,
        time=datetime.utcnow(),
        permalink_url=url,
    )
    obj.save()
    return obj
Beispiel #3
0
def object_from_video_data(videodata):
    video_id = videodata['id']
    try:
        return Object.objects.get(service='vimeo.com', foreign_id=video_id)
    except Object.DoesNotExist:
        pass

    author = account_for_vimeo_id(videodata['owner']['id'])
    permalink_url = [urldata['_content'] for urldata in videodata['urls']['url'] if urldata['type'] == 'video'][0]

    width, height = [int(videodata[key]) for key in ('width', 'height')]
    if width > 660:
        height = 660 * height / width
        width = 660
    body = ("""<iframe src="http://player.vimeo.com/video/%s" width="%d" height="%d"></iframe>"""
        % (video_id, width, height))

    obj = Object(
        service='vimeo.com',
        foreign_id=video_id,
        render_mode='mixed',
        title=videodata['title'],
        body=body,
        time=datetime.strptime(videodata['upload_date'], '%Y-%m-%d %H:%M:%S'),
        permalink_url=permalink_url,
        author=author,
    )
    obj.save()

    return obj
Beispiel #4
0
def make_object_from_photo_data(photodata):
    log.debug("Creating new object for %s's Flickr photo #%s", photodata['owner'], photodata['id'])

    # We aren't supposed to be able to ask for the dimensions, but we can, so use 'em.
    try:
        height, width = [int(photodata[key]) for key in ('o_height', 'o_width')]
    except KeyError:
        # Didn't get those, so we need to get the biggest size we can see.
        photosizes = call_flickr('flickr.photos.getSizes', photo_id=photodata['id'])
        largest = max(photosizes['sizes']['size'], key=lambda x: int(x['width']) * int(x['height']))
        height, width = [int(largest[key]) for key in ('height', 'width')]
        photourl = largest['source']
    else:
        photourl = photo_url_for_photo(photodata)

    if height > width:
        width = int(1024 * width / height)
        height = 1024
    else:
        height = int(1024 * height / width)
        width = 1024

    image = Media(
        image_url=photourl,
        width=width,
        height=height,
    )
    image.save()

    try:
        owner_nsid = photodata['owner']['nsid']
    except TypeError:
        owner_nsid = photodata['owner']
    try:
        phototitle = photodata['title']['_content']
    except TypeError:
        phototitle = photodata['title']

    timestr = photodata.get('dateupload', photodata.get('dateuploaded'))
    if timestr is None:
        raise ValueError("Couldn't find an upload date (neither dateupload nor dateuploaded) in photodata %r" % photodata)

    obj = Object(
        service='flickr.com',
        foreign_id=photodata['id'],
        render_mode='image',
        title=phototitle,
        #body=,
        public=True if photodata.get('ispublic') else False,
        image=image,
        time=datetime.utcfromtimestamp(int(timestr)),
        permalink_url='http://www.flickr.com/photos/%s/%s/' % (owner_nsid, photodata['id']),
        author=account_for_flickr_id(owner_nsid),
    )
    obj.save()

    return obj
Beispiel #5
0
def object_from_twitpic_url(url):
    mo = re.match(r'http://twitpic\.com/(\w+)', url)
    twitpic_id = mo.group(1)

    try:
        return Object.objects.get(service='twitpic.com', foreign_id=twitpic_id)
    except Object.DoesNotExist:
        pass

    h = httplib2.Http()
    resp, content = h.request(
        'http://api.twitpic.com/2/media/show.json?id=%s' % twitpic_id)

    try:
        picdata = json.loads(content)
    except ValueError:
        # Couldn't get twitpic infos... probably because we're banned.
        return None
    if picdata.get('errors'):
        # Hmm, well, guess that didn't work.
        return None

    userdata = picdata['user']
    # ugh, why did they rename these
    userdata['id'] = userdata['twitter_id']
    userdata['screen_name'] = userdata['username']
    userdata['profile_image_url'] = userdata['avatar_url']

    pic = Media(
        image_url='http://twitpic.com/show/large/%s' % twitpic_id,
        width=int(picdata['width']),
        height=int(picdata['height']),
    )
    pic.save()
    obj = Object(
        service='twitpic.com',
        foreign_id=twitpic_id,
        render_mode='image',
        title=picdata['message'],
        image=pic,
        author=account_for_twitter_user(userdata),
        time=datetime.strptime(picdata['timestamp'], '%Y-%m-%d %H:%M:%S'),
        permalink_url=url,
    )
    obj.save()
    return obj
Beispiel #6
0
def object_from_twitpic_url(url):
    mo = re.match(r'http://twitpic\.com/(\w+)', url)
    twitpic_id = mo.group(1)

    try:
        return Object.objects.get(service='twitpic.com', foreign_id=twitpic_id)
    except Object.DoesNotExist:
        pass

    h = httplib2.Http()
    resp, content = h.request('http://api.twitpic.com/2/media/show.json?id=%s' % twitpic_id)

    try:
        picdata = json.loads(content)
    except ValueError:
        # Couldn't get twitpic infos... probably because we're banned.
        return None
    if picdata.get('errors'):
        # Hmm, well, guess that didn't work.
        return None

    userdata = picdata['user']
    # ugh, why did they rename these
    userdata['id'] = userdata['twitter_id']
    userdata['screen_name'] = userdata['username']
    userdata['profile_image_url'] = userdata['avatar_url']

    pic = Media(
        image_url='http://twitpic.com/show/large/%s' % twitpic_id,
        width=int(picdata['width']),
        height=int(picdata['height']),
    )
    pic.save()
    obj = Object(
        service='twitpic.com',
        foreign_id=twitpic_id,
        render_mode='image',
        title=picdata['message'],
        image=pic,
        author=account_for_twitter_user(userdata),
        time=datetime.strptime(picdata['timestamp'], '%Y-%m-%d %H:%M:%S'),
        permalink_url=url,
    )
    obj.save()
    return obj
Beispiel #7
0
def object_for_typepad_object(tp_obj):
    try:
        obj = Object.objects.get(service='typepad.com',
                                 foreign_id=tp_obj.url_id)
    except Object.DoesNotExist:
        pass
    else:
        log.debug("Reusing typepad object %r for asset %s", obj, tp_obj.url_id)
        return False, obj

    log.debug("Making new object for TypePad post %s by %s", tp_obj.url_id,
              tp_obj.author.display_name)

    author = account_for_typepad_user(tp_obj.author)
    body = tp_obj.rendered_content
    if not body and tp_obj.content:
        if tp_obj.text_format == 'html_convert_linebreaks':
            body = '\n\n'.join(u'<p>%s</p>' % t
                               for t in tp_obj.content.split('\n\n'))
        else:
            body = tp_obj.content
    if body:
        body, errors = tidy_fragment(body)
    else:
        body = ''

    obj = Object(
        service='typepad.com',
        foreign_id=tp_obj.url_id,
        render_mode='mixed',
        title=tp_obj.title,
        body=body,
        time=tp_obj.published,
        permalink_url=tp_obj.permalink_url,
        author=author,
    )

    if getattr(tp_obj, 'in_reply_to', None) is not None:
        # This post is in reply, so we don't care if our referent was
        # really a share. Be transitively in reply to the shared obj.
        really_a_share, obj.in_reply_to = object_for_typepad_object(
            tp_obj.in_reply_to)
    elif getattr(tp_obj, 'reblog_of', None) is not None:
        # Assets are public so it's okay if we use an anonymous typd here.
        t = typd.TypePad(endpoint='http://api.typepad.com/')
        reblog_of = t.assets.get(tp_obj.reblog_of.url_id)

        really_a_share, obj.in_reply_to = object_for_typepad_object(reblog_of)
        remove_reblog_boilerplate_from_obj(obj)
        if not obj.body:
            return True, obj.in_reply_to
    elif getattr(tp_obj, 'reblog_of_url', None) is not None:
        reblog_url = tp_obj.reblog_of_url
        try:
            in_reply_to = leapfrog.poll.embedlam.object_for_url(reblog_url)
        except leapfrog.poll.embedlam.RequestError, exc:
            in_reply_to = None
        except ValueError, exc:
            in_reply_to = None
            log.error("Error making object from referent %s of %s's post %s",
                      reblog_url, author.display_name, tp_obj.url_id)
            log.exception(exc)
Beispiel #8
0
                # URL (if it's an autolinked domain name, we'd break how the
                # tweet reads).
                tweet_text = tweetdata['text']
                start, end = urldata['indices']
                if tweet_text[start:end] == urldata['url']:
                    # Mark links we change the text of as aboutlinks.
                    urldata['text'] = url_page.title
                    urldata['class'] = 'aboutlink'

    tweet = Object(
        service='twitter.com',
        foreign_id=str(tweetdata['id']),
        render_mode='status',
        body=tweet_html(tweetdata),
        time=datetime.strptime(tweetdata['created_at'],
                               '%a %b %d %H:%M:%S +0000 %Y'),
        public=not tweetdata['user']['protected'],
        permalink_url='http://twitter.com/%s/status/%d' %
        (tweetdata['user']['screen_name'], tweetdata['id']),
        author=account_for_twitter_user(tweetdata['user']),
        in_reply_to=in_reply_to,
    )
    tweet.save()

    return False, tweet


def poll_twitter(account):
    user = account.person.user
    if user is None:
        return
Beispiel #9
0
def object_from_postdata(postdata):
    tumblr_id = postdata['id']
    try:
        return False, Object.objects.get(service='tumblr.com', foreign_id=tumblr_id)
    except Object.DoesNotExist:
        pass

    obj = Object(
        service='tumblr.com',
        foreign_id=tumblr_id,
        permalink_url=postdata['post_url'],
        title='',
        body='',
        render_mode='mixed',
        time=datetime.strptime(postdata['date'], '%Y-%m-%d %H:%M:%S GMT'),
        author=account_for_tumblr_shortname(postdata['blog_name']),
    )

    post_type = postdata['type']
    if post_type == 'regular':
        obj.title = postdata.get('title', '')
        obj.body = postdata.get('body', '')
    elif post_type == 'video':
        player = max((player for player in postdata['player'] if player['width'] <= 700), key=lambda pl: pl['width'])
        body = player['embed_code']
        caption = postdata.get('caption', None)
        if caption:
            body = '\n\n'.join((body, caption))
        obj.body = body
    elif post_type == 'audio':
        obj.title = postdata.get('track_name', '')
        artist = postdata.get('artist', '')
        if artist and obj.title:
            obj.title = u'%s \u2013 %s' % (artist, obj.title)
        elif artist:
            obj.title = artist

        body = postdata.get('player', '')
        album_art = postdata.get('album_art', '')
        if album_art:
            body = u'\n\n'.join((u'<p><img src="%s"></p>' % album_art, body))
        caption = postdata.get('caption', '')
        if caption:
            body = u'\n\n'.join((body, caption))

        obj.body = body
    elif post_type == 'photo' and len(postdata['photos']) > 1:  # photoset
        photobodies = list()

        for photo in postdata['photos']:
            photosize = max((size for size in photo['alt_sizes'] if size['width'] <= 700), key=lambda sz: sz['width'])
            body = u'<p><img src="%(url)s" width="%(width)s" height="%(height)s"></p>' % photosize
            photobodies.append(body)
            caption = photo.get('caption', '')
            if caption:
                photobodies.append(u'<p>%s</p>' % photo['caption'])

        caption = postdata.get('caption', '')
        if caption:
            photobodies.append(caption)

        obj.body = u'\n\n'.join(photobodies)
    elif post_type == 'photo':  # single photo
        photo = postdata['photos'][0]
        photosize = max((size for size in photo['alt_sizes'] if size['width'] <= 700), key=lambda sz: sz['width'])

        image = Media(
            image_url=photosize['url'],
            width=photosize['width'],
            height=photosize['height'],
        )
        image.save()

        obj.image = image
        obj.render_mode = 'image'

        obj.body = postdata.get('caption', '')
    elif post_type == 'link':
        # TODO: display the link if we can't make an in_reply_to object.
        # handle the Page manually to always provide an in_reply_to?
        # should this just be a render_mode=link object itself instead
        # of a reply?
        link_url = postdata['url']
        try:
            in_reply_to_page = leapfrog.poll.embedlam.Page(link_url)
        except ValueError:
            pass
        else:
            try:
                in_reply_to = in_reply_to_page.to_object()
            except ValueError:
                in_reply_to = None
            if in_reply_to is None:
                in_reply_to = Object(
                    service='',
                    foreign_id=in_reply_to_page.url,
                    render_mode='link',
                    title=in_reply_to_page.title,
                    permalink_url=in_reply_to_page.url,
                    time=datetime.utcnow(),
                )
                in_reply_to.save()

            obj.in_reply_to = in_reply_to

        obj.title = postdata.get('title', link_url)
        desc = postdata.get('description', '')
        if desc:
            obj.body = desc
        # If we added no description, make this a share instead.
        elif obj.in_reply_to:
            return True, obj.in_reply_to
    elif post_type == 'quote':
        quote_text = postdata.get('quote', '')
        body = u"""<blockquote><p>%s</p></blockquote>""" % (quote_text,)

        quote_source = postdata.get('source', '')
        if quote_source:
            body = u'\n\n'.join((body, u"<p>\u2014%s</p>" % quote_source))

        obj.body = body

    # TODO: handle chat posts (i guess)
    else:
        log.debug("Unhandled Tumblr post type %r for post #%s; skipping", post_type, tumblr_id)
        return None, None

    # TODO: make reblogs into replies

    obj.save()
    return False, obj
Beispiel #10
0
def object_from_post_element(post_el, tumblelog_el):
    tumblr_id = post_el.attrib['id']
    try:
        return False, Object.objects.get(service='tumblr.com', foreign_id=tumblr_id)
    except Object.DoesNotExist:
        pass

    obj = Object(
        service='tumblr.com',
        foreign_id=tumblr_id,
        permalink_url=post_el.attrib['url-with-slug'],
        title='',
        body='',
        render_mode='mixed',
        time=datetime.strptime(post_el.attrib['date-gmt'], '%Y-%m-%d %H:%M:%S GMT'),
        author=account_for_tumblelog_element(tumblelog_el),
    )

    post_type = post_el.attrib['type']
    if post_type == 'regular':
        title_el = post_el.find('./regular-title')
        if title_el is not None:
            obj.title = title_el.text
        body_el = post_el.find('./regular-body')
        if body_el is not None:
            obj.body = body_el.text
    elif post_type == 'video':
        body = post_el.find('./video-player').text
        video_caption_el = post_el.find('./video-caption')
        if video_caption_el is not None:
            video_caption = video_caption_el.text
            body = '\n\n'.join((body, video_caption))
        obj.body = body
    elif post_type == 'audio':
        title_el = post_el.find('./id3-title')
        if title_el is not None:
            obj.title = title_el.text
        artist_el = post_el.find('./id3-artist')
        if artist_el is not None:
            obj.title = u'%s \u2013 %s' % (artist_el.text, obj.title)

        body = post_el.find('./audio-player').text
        audio_art_el = post_el.find('./id3-album-art')
        if audio_art_el is not None:
            audio_art_url = audio_art_el.text
            body = u'\n\n'.join((u'<p><img src="%s"></p>' % audio_art_url, body))
        audio_caption_el = post_el.find('./audio-caption')
        if audio_caption_el is not None:
            audio_caption = audio_caption_el.text
            body = u'\n\n'.join((body, audio_caption))
        obj.body = body
    elif post_type == 'photo':
        # TODO: if there's a photo-link-url, is this really a "photo reply"?

        photo_el = sorted(post_el.findall('./photo-url'), key=lambda x: int(x.attrib['max-width']), reverse=True)[0]
        photo_el_width = int(photo_el.attrib['max-width'])
        try:
            width, height = post_el.attrib['width'], post_el.attrib['height']
        except KeyError:
            width, height = None, None
        else:
            width, height = int(width), int(height)
            if width > photo_el_width:
                height = photo_el_width * height / width
                width = photo_el_width

        image = Media(
            image_url=photo_el.text,
            width=width,
            height=height,
        )
        image.save()

        obj.image = image
        obj.render_mode = 'image'

        caption_el = post_el.find('./photo-caption')
        if caption_el is not None:
            obj.body = caption_el.text
    elif post_type == 'link':
        # TODO: display the link if we can't make an in_reply_to object.
        # handle the Page manually to always provide an in_reply_to?
        # should this just be a render_mode=link object itself instead
        # of a reply?
        link_url = post_el.find('./link-url').text
        try:
            in_reply_to_page = leapfrog.poll.embedlam.Page(link_url)
        except ValueError:
            pass
        else:
            try:
                in_reply_to = in_reply_to_page.to_object()
            except ValueError:
                in_reply_to = None
            if in_reply_to is None:
                in_reply_to = Object(
                    service='',
                    foreign_id=in_reply_to_page.url,
                    render_mode='link',
                    title=in_reply_to_page.title,
                    permalink_url=in_reply_to_page.url,
                    time=datetime.utcnow(),
                )
                in_reply_to.save()

            obj.in_reply_to = in_reply_to

        title_el = post_el.find('./link-text')
        obj.title = link_url if title_el is None else title_el.text
        desc_el = post_el.find('./link-description')
        if desc_el is not None:
            obj.body = desc_el.text

        # If we added no description, make this a share.
        if obj.in_reply_to and not obj.body:
            return True, obj.in_reply_to
    elif post_type == 'quote':
        quote_text = post_el.find('./quote-text').text
        body = u"""<blockquote><p>%s</p></blockquote>""" % (quote_text,)

        quote_source_el = post_el.find('./quote-source')
        if quote_source_el is not None:
            quote_source = quote_source_el.text
            body = u'\n\n'.join((body, u"<p>\u2014%s</p>" % quote_source))

        obj.body = body

    # TODO: handle chat posts (i guess)
    else:
        log.debug("Unhandled Tumblr post type %r for post #%s; skipping", post_type, tumblr_id)
        return None, None

    try:
        orig_url = post_el.attrib['reblogged-root-url']
    except KeyError:
        log.debug("Post #%s is not a reblog, leave it alone", tumblr_id)
    else:
        log.debug("Post #%s is a reblog of %s; let's try walking up", tumblr_id, orig_url)

        really_a_share, orig_obj = False, None
        try:
            really_a_share, orig_obj = object_from_url(orig_url)
        except ValueError, exc:
            # meh
            log.debug("Couldn't walk up to reblog reference %s: %s", orig_url, str(exc))
        if not really_a_share and orig_obj is not None:
            # Patch up the upstream author's userpic if necessary, since we
            # don't get those from /api/read, evidently.
            if orig_obj.author.person.avatar is None and 'reblogged-root-avatar-url-64' in post_el.attrib:
                avatar = Media(
                    image_url=post_el.attrib['reblogged-root-avatar-url-64'],
                    width=64,
                    height=64,
                )
                avatar.save()

                orig_obj.author.person.avatar = avatar
                orig_obj.author.person.save()

                log.debug("Fixed up post #%s's author's avatar to %s", orig_obj.foreign_id, avatar.image_url)

            remove_reblog_boilerplate_from_obj(obj, orig_obj)
            if not obj.body:
                return True, orig_obj

            obj.in_reply_to = orig_obj
Beispiel #11
0
def object_from_post_element(post_el, tumblelog_el):
    tumblr_id = post_el.attrib['id']
    try:
        return False, Object.objects.get(service='tumblr.com',
                                         foreign_id=tumblr_id)
    except Object.DoesNotExist:
        pass

    obj = Object(
        service='tumblr.com',
        foreign_id=tumblr_id,
        permalink_url=post_el.attrib['url-with-slug'],
        title='',
        body='',
        render_mode='mixed',
        time=datetime.strptime(post_el.attrib['date-gmt'],
                               '%Y-%m-%d %H:%M:%S GMT'),
        author=account_for_tumblelog_element(tumblelog_el),
    )

    post_type = post_el.attrib['type']
    if post_type == 'regular':
        title_el = post_el.find('./regular-title')
        if title_el is not None:
            obj.title = title_el.text
        body_el = post_el.find('./regular-body')
        if body_el is not None:
            obj.body = body_el.text
    elif post_type == 'video':
        body = post_el.find('./video-player').text
        video_caption_el = post_el.find('./video-caption')
        if video_caption_el is not None:
            video_caption = video_caption_el.text
            body = '\n\n'.join((body, video_caption))
        obj.body = body
    elif post_type == 'audio':
        title_el = post_el.find('./id3-title')
        if title_el is not None:
            obj.title = title_el.text
        artist_el = post_el.find('./id3-artist')
        if artist_el is not None:
            obj.title = u'%s \u2013 %s' % (artist_el.text, obj.title)

        body = post_el.find('./audio-player').text
        audio_art_el = post_el.find('./id3-album-art')
        if audio_art_el is not None:
            audio_art_url = audio_art_el.text
            body = u'\n\n'.join(
                (u'<p><img src="%s"></p>' % audio_art_url, body))
        audio_caption_el = post_el.find('./audio-caption')
        if audio_caption_el is not None:
            audio_caption = audio_caption_el.text
            body = u'\n\n'.join((body, audio_caption))
        obj.body = body
    elif post_type == 'photo':
        # TODO: if there's a photo-link-url, is this really a "photo reply"?

        photo_el = sorted(post_el.findall('./photo-url'),
                          key=lambda x: int(x.attrib['max-width']),
                          reverse=True)[0]
        photo_el_width = int(photo_el.attrib['max-width'])
        try:
            width, height = post_el.attrib['width'], post_el.attrib['height']
        except KeyError:
            width, height = None, None
        else:
            width, height = int(width), int(height)
            if width > photo_el_width:
                height = photo_el_width * height / width
                width = photo_el_width

        image = Media(
            image_url=photo_el.text,
            width=width,
            height=height,
        )
        image.save()

        obj.image = image
        obj.render_mode = 'image'

        caption_el = post_el.find('./photo-caption')
        if caption_el is not None:
            obj.body = caption_el.text
    elif post_type == 'link':
        # TODO: display the link if we can't make an in_reply_to object.
        # handle the Page manually to always provide an in_reply_to?
        # should this just be a render_mode=link object itself instead
        # of a reply?
        link_url = post_el.find('./link-url').text
        try:
            in_reply_to_page = leapfrog.poll.embedlam.Page(link_url)
        except ValueError:
            pass
        else:
            try:
                in_reply_to = in_reply_to_page.to_object()
            except ValueError:
                in_reply_to = None
            if in_reply_to is None:
                in_reply_to = Object(
                    service='',
                    foreign_id=in_reply_to_page.url,
                    render_mode='link',
                    title=in_reply_to_page.title,
                    permalink_url=in_reply_to_page.url,
                    time=datetime.utcnow(),
                )
                in_reply_to.save()

            obj.in_reply_to = in_reply_to

        title_el = post_el.find('./link-text')
        obj.title = link_url if title_el is None else title_el.text
        desc_el = post_el.find('./link-description')
        if desc_el is not None:
            obj.body = desc_el.text

        # If we added no description, make this a share.
        if obj.in_reply_to and not obj.body:
            return True, obj.in_reply_to
    elif post_type == 'quote':
        quote_text = post_el.find('./quote-text').text
        body = u"""<blockquote><p>%s</p></blockquote>""" % (quote_text, )

        quote_source_el = post_el.find('./quote-source')
        if quote_source_el is not None:
            quote_source = quote_source_el.text
            body = u'\n\n'.join((body, u"<p>\u2014%s</p>" % quote_source))

        obj.body = body

    # TODO: handle chat posts (i guess)
    else:
        log.debug("Unhandled Tumblr post type %r for post #%s; skipping",
                  post_type, tumblr_id)
        return None, None

    try:
        orig_url = post_el.attrib['reblogged-root-url']
    except KeyError:
        log.debug("Post #%s is not a reblog, leave it alone", tumblr_id)
    else:
        log.debug("Post #%s is a reblog of %s; let's try walking up",
                  tumblr_id, orig_url)

        really_a_share, orig_obj = False, None
        try:
            really_a_share, orig_obj = object_from_url(orig_url)
        except ValueError, exc:
            # meh
            log.debug("Couldn't walk up to reblog reference %s: %s", orig_url,
                      str(exc))
        if not really_a_share and orig_obj is not None:
            # Patch up the upstream author's userpic if necessary, since we
            # don't get those from /api/read, evidently.
            if orig_obj.author.person.avatar is None and 'reblogged-root-avatar-url-64' in post_el.attrib:
                avatar = Media(
                    image_url=post_el.attrib['reblogged-root-avatar-url-64'],
                    width=64,
                    height=64,
                )
                avatar.save()

                orig_obj.author.person.avatar = avatar
                orig_obj.author.person.save()

                log.debug("Fixed up post #%s's author's avatar to %s",
                          orig_obj.foreign_id, avatar.image_url)

            remove_reblog_boilerplate_from_obj(obj, orig_obj)
            if not obj.body:
                return True, orig_obj

            obj.in_reply_to = orig_obj
Beispiel #12
0
    # If "message" is included then this becomes a reply.
    # Otherwise, it's just a share.
    if "message" in item:
        # Facebook doesn't return the URL on Facebook in any predictable way,
        # so we need to synthesize it from the id.
        id_parts = fb_id.split("_")
        if len(id_parts) != 2:
            log.error("id %s is not in the expected format, so skipping", fb_id)
            return referent

        obj = Object(
            service='facebook.com',
            foreign_id=fb_id,
            render_mode='status',
            body=cgi.escape(item["message"]),
            time=datetime.strptime(item['created_time'], '%Y-%m-%dT%H:%M:%S+0000'),
            permalink_url="http://www.facebook.com/%s/posts/%s" % (id_parts[0], id_parts[1]),
            author=author,
            in_reply_to=referent
        )
        obj.save()

        return (obj, author)

    else:

        return (referent, author)



Beispiel #13
0
def object_from_post(post, authtoken=None, authsecret=None):
    sharekey = post['permalink_page'].split('/')[-1]

    author = account_for_mlkshk_userinfo(post['user'])
    if not author.person.avatar_source and author.person.avatar is None:
        if authtoken and authsecret:
            userinfo = call_mlkshk('https://mlkshk.com/api/user_id/%s' %
                                   author.ident,
                                   authtoken=authtoken,
                                   authsecret=authsecret)
            avatar_url = userinfo['profile_image_url']
            if 'default-icon' not in avatar_url:
                avatar = Media(
                    width=100,
                    height=100,
                    image_url=avatar_url,
                )
                avatar.save()
                author.person.avatar = avatar
                author.person.save()
    posted_at = datetime.strptime(post['posted_at'], '%Y-%m-%dT%H:%M:%SZ')

    body = post.get('description') or ''
    body = u''.join(urlized_words(body))
    body = re.sub(r'\r?\n', '<br>', body)

    if 'url' in post:
        obj = leapfrog.poll.embedlam.object_for_url(post['url'])
        if not post.get('description'):
            return True, obj

        try:
            reply = Object.objects.get(service='mlkshk.com',
                                       foreign_id=sharekey)
        except Object.DoesNotExist:
            reply = Object(
                service='mlkshk.com',
                foreign_id=sharekey,
                author=author,
                in_reply_to=obj,
                title=post['title'],
                permalink_url=post['permalink_page'],
                render_mode='mixed',
                body=body,
                time=posted_at,
            )
            reply.save()

        return False, reply

    try:
        obj = Object.objects.get(service='mlkshk.com', foreign_id=sharekey)
    except Object.DoesNotExist:
        photo = Media(
            image_url=post['original_image_url'],
            width=post['width'],
            height=post['height'],
            sfw=not post['nsfw'],
        )
        photo.save()
        obj = Object(
            service='mlkshk.com',
            foreign_id=sharekey,
            image=photo,
        )

    obj.title = post['title']
    obj.author = author
    obj.permalink_url = post['permalink_page']
    obj.render_mode = 'image'
    obj.body = body
    obj.time = posted_at
    obj.save()

    # TODO: consider a "save" a share instead of a post?
    return False, obj
Beispiel #14
0
def object_from_postdata(postdata):
    tumblr_id = postdata['id']
    try:
        return False, Object.objects.get(service='tumblr.com',
                                         foreign_id=tumblr_id)
    except Object.DoesNotExist:
        pass

    obj = Object(
        service='tumblr.com',
        foreign_id=tumblr_id,
        permalink_url=postdata['post_url'],
        title='',
        body='',
        render_mode='mixed',
        time=datetime.strptime(postdata['date'], '%Y-%m-%d %H:%M:%S GMT'),
        author=account_for_tumblr_shortname(postdata['blog_name']),
    )

    post_type = postdata['type']
    if post_type == 'regular':
        obj.title = postdata.get('title', '')
        obj.body = postdata.get('body', '')
    elif post_type == 'video':
        player = max(
            (player
             for player in postdata['player'] if player['width'] <= 700),
            key=lambda pl: pl['width'])
        body = player['embed_code']
        caption = postdata.get('caption', None)
        if caption:
            body = '\n\n'.join((body, caption))
        obj.body = body
    elif post_type == 'audio':
        obj.title = postdata.get('track_name', '')
        artist = postdata.get('artist', '')
        if artist and obj.title:
            obj.title = u'%s \u2013 %s' % (artist, obj.title)
        elif artist:
            obj.title = artist

        body = postdata.get('player', '')
        album_art = postdata.get('album_art', '')
        if album_art:
            body = u'\n\n'.join((u'<p><img src="%s"></p>' % album_art, body))
        caption = postdata.get('caption', '')
        if caption:
            body = u'\n\n'.join((body, caption))

        obj.body = body
    elif post_type == 'photo' and len(postdata['photos']) > 1:  # photoset
        photobodies = list()

        for photo in postdata['photos']:
            photosize = max(
                (size for size in photo['alt_sizes'] if size['width'] <= 700),
                key=lambda sz: sz['width'])
            body = u'<p><img src="%(url)s" width="%(width)s" height="%(height)s"></p>' % photosize
            photobodies.append(body)
            caption = photo.get('caption', '')
            if caption:
                photobodies.append(u'<p>%s</p>' % photo['caption'])

        caption = postdata.get('caption', '')
        if caption:
            photobodies.append(caption)

        obj.body = u'\n\n'.join(photobodies)
    elif post_type == 'photo':  # single photo
        photo = postdata['photos'][0]
        photosize = max(
            (size for size in photo['alt_sizes'] if size['width'] <= 700),
            key=lambda sz: sz['width'])

        image = Media(
            image_url=photosize['url'],
            width=photosize['width'],
            height=photosize['height'],
        )
        image.save()

        obj.image = image
        obj.render_mode = 'image'

        obj.body = postdata.get('caption', '')
    elif post_type == 'link':
        # TODO: display the link if we can't make an in_reply_to object.
        # handle the Page manually to always provide an in_reply_to?
        # should this just be a render_mode=link object itself instead
        # of a reply?
        link_url = postdata['url']
        try:
            in_reply_to_page = leapfrog.poll.embedlam.Page(link_url)
        except ValueError:
            pass
        else:
            try:
                in_reply_to = in_reply_to_page.to_object()
            except ValueError:
                in_reply_to = None
            if in_reply_to is None:
                in_reply_to = Object(
                    service='',
                    foreign_id=in_reply_to_page.url,
                    render_mode='link',
                    title=in_reply_to_page.title,
                    permalink_url=in_reply_to_page.url,
                    time=datetime.utcnow(),
                )
                in_reply_to.save()

            obj.in_reply_to = in_reply_to

        obj.title = postdata.get('title', link_url)
        desc = postdata.get('description', '')
        if desc:
            obj.body = desc
        # If we added no description, make this a share instead.
        elif obj.in_reply_to:
            return True, obj.in_reply_to
    elif post_type == 'quote':
        quote_text = postdata.get('quote', '')
        body = u"""<blockquote><p>%s</p></blockquote>""" % (quote_text, )

        quote_source = postdata.get('source', '')
        if quote_source:
            body = u'\n\n'.join((body, u"<p>\u2014%s</p>" % quote_source))

        obj.body = body

    # TODO: handle chat posts (i guess)
    else:
        log.debug("Unhandled Tumblr post type %r for post #%s; skipping",
                  post_type, tumblr_id)
        return None, None

    # TODO: make reblogs into replies

    obj.save()
    return False, obj
Beispiel #15
0
    except ValueError, exc:
        log.debug("Couldn't decode JSON from OEmbed endpoint %r, loooooooool", endpoint_url, exc_info=exc)
        return

    try:
        resource_type = resource['type']
    except KeyError:
        log.debug("wtf is %r", resource)
        raise RequestError("Resource from OEmbed request %s has no 'type'" % (endpoint_url,))

    if resource_type in ('video', 'rich'):
        obj = Object(
            service='',
            foreign_id=target_url,
            render_mode='mixed',
            title=resource.get('title', ''),
            body=resource.get('html', ''),
            author=account_for_embed_resource(resource),
            time=datetime.utcnow(),
            permalink_url=target_url,
        )
        obj.save()
        return obj
    elif resource_type in ('photo', 'image'):
        image = Media(
            image_url=resource['url'],
            width=resource.get('width'),
            height=resource.get('height'),
        )
        image.save()
        obj = Object(
            service='',
Beispiel #16
0
def object_from_html_head(url, orig_url, head):
    title = title_from_html_head(head)

    old_facebook_video_elem = head.find('link', rel='video_src')
    video_url = value_for_meta_elems((old_facebook_video_elem,), base_url=orig_url)

    og_image_elem = head.find("meta", property="og:image")
    old_facebook_image_elem = head.find("link", rel="image_src")
    image_url = value_for_meta_elems((og_image_elem, old_facebook_image_elem), base_url=orig_url)

    og_summary_elem = head.find("meta", property="og:description")
    summary = value_for_meta_elems((og_summary_elem,), "")

    if not video_url and not image_url and not summary:
        log.debug("Found neither an image URL nor a summary for %s, so returning no object", url)
        return None

    image = None
    if video_url:
        embed_code_parts = ["<embed", 'src="%s"' % video_url, 'allowfullscreen="true" wmode="transparent"']

        video_height_elem = head.find('meta', attrs={'name': 'video_height'})
        video_height = value_for_meta_elems((video_height_elem,), '')
        video_width_elem = head.find('meta', attrs={'name': 'video_width'})
        video_width = value_for_meta_elems((video_width_elem,), '')
        video_type_elem = head.find('meta', attrs={'name': 'video_type'})
        video_type = value_for_meta_elems((video_type_elem,), '')

        if video_height:
            embed_code_parts.append('height="%s"' % video_height)
        if video_width:
            embed_code_parts.append('width="%s"' % video_width)

        # Add type and closing bracket always.
        embed_code_parts.append('type="%s">' % (video_type or 'application/x-shockwave-flash'))

        image = Media(
            embed_code=' '.join(embed_code_parts),
            width=int(video_width) if video_width else None,
            height=int(video_height) if video_height else None,
        )
        image.save()
    elif image_url:
        image = Media()
        image.image_url = image_url
        # TODO: how big is this image?
        image.save()

    render_mode = 'link'
    if re.match(r'http://instagr\.am/', url, re.MULTILINE | re.DOTALL | re.VERBOSE):
        render_mode = 'image'
        # Use the same text as the Twitter crosspost for the title.
        if summary and ' at ' in title:
            place = title.split(' at ', 1)[1]
            title = '%s @ %s' % (summary, place)
        elif summary:
            title = summary
        summary = ''
    elif re.match(r'http://yfrog\.com/', url, re.MULTILINE | re.DOTALL | re.VERBOSE):
        render_mode = 'image'
        title = ''
        # TODO: use yfrog xmlInfo call to get the poster's twitter username (if any)

    obj = Object(
        service='',
        foreign_id=url,
        render_mode=render_mode,
        title=title,
        body=summary,
        permalink_url=url,
        time=datetime.utcnow(),
        image=image,
    )
    obj.save()

    return obj
Beispiel #17
0
def object_from_post(post, authtoken=None, authsecret=None):
    sharekey = post['permalink_page'].split('/')[-1]

    author = account_for_mlkshk_userinfo(post['user'])
    if not author.person.avatar_source and author.person.avatar is None:
        if authtoken and authsecret:
            userinfo = call_mlkshk('https://mlkshk.com/api/user_id/%s' % author.ident,
                authtoken=authtoken, authsecret=authsecret)
            avatar_url = userinfo['profile_image_url']
            if 'default-icon' not in avatar_url:
                avatar = Media(
                    width=100,
                    height=100,
                    image_url=avatar_url,
                )
                avatar.save()
                author.person.avatar = avatar
                author.person.save()
    posted_at = datetime.strptime(post['posted_at'], '%Y-%m-%dT%H:%M:%SZ')

    body = post.get('description') or ''
    body = u''.join(urlized_words(body))
    body = re.sub(r'\r?\n', '<br>', body)

    if 'url' in post:
        obj = leapfrog.poll.embedlam.object_for_url(post['url'])
        if not post.get('description'):
            return True, obj

        try:
            reply = Object.objects.get(service='mlkshk.com', foreign_id=sharekey)
        except Object.DoesNotExist:
            reply = Object(
                service='mlkshk.com',
                foreign_id=sharekey,
                author=author,
                in_reply_to=obj,
                title=post['title'],
                permalink_url=post['permalink_page'],
                render_mode='mixed',
                body=body,
                time=posted_at,
            )
            reply.save()

        return False, reply

    try:
        obj = Object.objects.get(service='mlkshk.com', foreign_id=sharekey)
    except Object.DoesNotExist:
        photo = Media(
            image_url=post['original_image_url'],
            width=post['width'],
            height=post['height'],
            sfw=not post['nsfw'],
        )
        photo.save()
        obj = Object(
            service='mlkshk.com',
            foreign_id=sharekey,
            image=photo,
        )

    obj.title = post['title']
    obj.author = author
    obj.permalink_url = post['permalink_page']
    obj.render_mode = 'image'
    obj.body = body
    obj.time = posted_at
    obj.save()

    # TODO: consider a "save" a share instead of a post?
    return False, obj
Beispiel #18
0
                # Don't replace the if the link text is not identical to the
                # URL (if it's an autolinked domain name, we'd break how the
                # tweet reads).
                tweet_text = tweetdata['text']
                start, end = urldata['indices']
                if tweet_text[start:end] == urldata['url']:
                    # Mark links we change the text of as aboutlinks.
                    urldata['text'] = url_page.title
                    urldata['class'] = 'aboutlink'

    tweet = Object(
        service='twitter.com',
        foreign_id=str(tweetdata['id']),
        render_mode='status',
        body=tweet_html(tweetdata),
        time=datetime.strptime(tweetdata['created_at'], '%a %b %d %H:%M:%S +0000 %Y'),
        public=not tweetdata['user']['protected'],
        permalink_url='http://twitter.com/%s/status/%d'
            % (tweetdata['user']['screen_name'], tweetdata['id']),
        author=account_for_twitter_user(tweetdata['user']),
        in_reply_to=in_reply_to,
    )
    tweet.save()

    return False, tweet


def poll_twitter(account):
    user = account.person.user
    if user is None:
        return
Beispiel #19
0
def object_from_html_head(url, orig_url, head):
    title = title_from_html_head(head)

    old_facebook_video_elem = head.find('link', rel='video_src')
    video_url = value_for_meta_elems((old_facebook_video_elem, ),
                                     base_url=orig_url)

    og_image_elem = head.find("meta", property="og:image")
    old_facebook_image_elem = head.find("link", rel="image_src")
    image_url = value_for_meta_elems((og_image_elem, old_facebook_image_elem),
                                     base_url=orig_url)

    og_summary_elem = head.find("meta", property="og:description")
    summary = value_for_meta_elems((og_summary_elem, ), "")

    if not video_url and not image_url and not summary:
        log.debug(
            "Found neither an image URL nor a summary for %s, so returning no object",
            url)
        return None

    image = None
    if video_url:
        embed_code_parts = [
            "<embed",
            'src="%s"' % video_url,
            'allowfullscreen="true" wmode="transparent"'
        ]

        video_height_elem = head.find('meta', attrs={'name': 'video_height'})
        video_height = value_for_meta_elems((video_height_elem, ), '')
        video_width_elem = head.find('meta', attrs={'name': 'video_width'})
        video_width = value_for_meta_elems((video_width_elem, ), '')
        video_type_elem = head.find('meta', attrs={'name': 'video_type'})
        video_type = value_for_meta_elems((video_type_elem, ), '')

        if video_height:
            embed_code_parts.append('height="%s"' % video_height)
        if video_width:
            embed_code_parts.append('width="%s"' % video_width)

        # Add type and closing bracket always.
        embed_code_parts.append(
            'type="%s">' % (video_type or 'application/x-shockwave-flash'))

        image = Media(
            embed_code=' '.join(embed_code_parts),
            width=int(video_width) if video_width else None,
            height=int(video_height) if video_height else None,
        )
        image.save()
    elif image_url:
        image = Media()
        image.image_url = image_url
        # TODO: how big is this image?
        image.save()

    render_mode = 'link'
    if re.match(r'http://instagr\.am/', url,
                re.MULTILINE | re.DOTALL | re.VERBOSE):
        render_mode = 'image'
        # Use the same text as the Twitter crosspost for the title.
        if summary and ' at ' in title:
            place = title.split(' at ', 1)[1]
            title = '%s @ %s' % (summary, place)
        elif summary:
            title = summary
        summary = ''
    elif re.match(r'http://yfrog\.com/', url,
                  re.MULTILINE | re.DOTALL | re.VERBOSE):
        render_mode = 'image'
        title = ''
        # TODO: use yfrog xmlInfo call to get the poster's twitter username (if any)

    obj = Object(
        service='',
        foreign_id=url,
        render_mode=render_mode,
        title=title,
        body=summary,
        permalink_url=url,
        time=datetime.utcnow(),
        image=image,
    )
    obj.save()

    return obj
Beispiel #20
0
                  exc_info=exc)
        return

    try:
        resource_type = resource['type']
    except KeyError:
        log.debug("wtf is %r", resource)
        raise RequestError("Resource from OEmbed request %s has no 'type'" %
                           (endpoint_url, ))

    if resource_type in ('video', 'rich'):
        obj = Object(
            service='',
            foreign_id=target_url,
            render_mode='mixed',
            title=resource.get('title', ''),
            body=resource.get('html', ''),
            author=account_for_embed_resource(resource),
            time=datetime.utcnow(),
            permalink_url=target_url,
        )
        obj.save()
        return obj
    elif resource_type in ('photo', 'image'):
        image = Media(
            image_url=resource['url'],
            width=resource.get('width'),
            height=resource.get('height'),
        )
        image.save()
        obj = Object(
            service='',
Beispiel #21
0
    # If "message" is included then this becomes a reply.
    # Otherwise, it's just a share.
    if "message" in item:
        # Facebook doesn't return the URL on Facebook in any predictable way,
        # so we need to synthesize it from the id.
        id_parts = fb_id.split("_")
        if len(id_parts) != 2:
            log.error("id %s is not in the expected format, so skipping", fb_id)
            return referent

        obj = Object(
            service='facebook.com',
            foreign_id=fb_id,
            render_mode='status',
            body=cgi.escape(item["message"]),
            time=datetime.strptime(item['created_time'], '%Y-%m-%dT%H:%M:%S+0000'),
            permalink_url="http://www.facebook.com/%s/posts/%s" % (id_parts[0], id_parts[1]),
            author=author,
            in_reply_to=referent
        )
        obj.save()

        return (obj, author)

    else:

        return (referent, author)