Exemple #1
0
    def upload_media(self, media):
        """Uploads one or more images or videos from web URLs.

    https://docs.joinmastodon.org/api/rest/media/

    Args:
      media: sequence of AS image or stream objects, eg:
        [{'url': 'http://picture', 'displayName': 'a thing'}, ...]

    Returns: list of string media ids for uploaded files
    """
        uploaded = set()  # URLs uploaded so far; for de-duping
        ids = []

        for obj in media:
            url = util.get_url(obj, key='stream') or util.get_url(obj)
            if not url or url in uploaded:
                continue

            data = {}
            alt = obj.get('displayName')
            if alt:
                data['description'] = util.ellipsize(alt, chars=MAX_ALT_LENGTH)

            # TODO: mime type check?
            with util.requests_get(url, stream=True) as fetch:
                fetch.raise_for_status()
                upload = self._post(API_MEDIA, files={'file': fetch.raw})

            logging.info('Got: %s', upload)
            media_id = upload['id']
            ids.append(media_id)
            uploaded.add(url)

        return ids
Exemple #2
0
def send_webmentions(handler, activity, **response_props):

    """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery.
    Args:
      handler: RequestHandler
      activity: dict, AS1 activity
      response_props: passed through to the newly created Responses
    """
    verb = activity.get('verb')
    if verb and verb not in SUPPORTED_VERBS:
        error(handler, '%s activities are not supported yet.' % verb)

    # extract source and targets
    source = activity.get('url') or activity.get('id')
    obj = activity.get('object')
    obj_url = util.get_url(obj)

    targets = util.get_list(activity, 'inReplyTo')
    if isinstance(obj, dict):
        if not source:
            source = obj_url or obj.get('id')
        targets.extend(util.get_list(obj, 'inReplyTo'))
    if verb in ('like', 'share'):
         targets.append(obj_url)

    targets = util.dedupe_urls(util.get_url(t) for t in targets)
    if not source:
        error(handler, "Couldn't find original post URL")
    if not targets:
        error(handler, "Couldn't find target URLs (inReplyTo or object)")

    # send webmentions and store Responses
    errors = []
    for target in targets:
        if not target:
            continue

        response = Response(source=source, target=target, direction='in',
                            **response_props)
        response.put()
        wm_source = response.proxy_url() if verb in ('like', 'share') else source
        logging.info('Sending webmention from %s to %s', wm_source, target)

        wm = send.WebmentionSend(wm_source, target)
        if wm.send(headers=HEADERS):
            logging.info('Success: %s', wm.response)
            response.status = 'complete'
        else:
            logging.warning('Failed: %s', wm.error)
            errors.append(wm.error)
            response.status = 'error'
        response.put()

    if errors:
        msg = 'Errors:\n' + '\n'.join(json.dumps(e, indent=2) for e in errors)
        error(handler, msg, status=errors[0].get('http_status'))
Exemple #3
0
    def _single_target(self):
        """
        Returns: string URL, the source's inReplyTo or object (if appropriate)
        """
        target = util.get_first(self.source_obj, 'inReplyTo')
        if target:
            return util.get_url(target)

        if self.source_obj.get('verb') in source.VERBS_WITH_OBJECT:
            return util.get_url(util.get_first(self.source_obj, 'object'))
Exemple #4
0
    def base_object(self, obj):
        """Returns the 'base' Mastodon object that an object operates on.

    If the object is a reply, boost, or favorite of a Mastodon post - on any
    instance - this returns that post object. The id in the returned object is
    the id of that remote post *on the local instance*. (As a Mastodon style id,
    ie an integer in a string, *not* a tag URI.)

    Uses Mastodon's search API on the local instance to determine whether a URL
    is a Mastodon post, and if it is, to find or generate an id for it on the
    local instance.

    Discovered via https://mastodon.social/@jkreeftmeijer/101245063526942536

    Args:
      obj: ActivityStreams object

    Returns:
      dict, minimal ActivityStreams object. Usually has at least id; may
      also have url, author, etc.
    """
        for field in ('inReplyTo', 'object', 'target'):
            for base in util.get_list(obj, field):
                # first, check if it's on local instance
                url = util.get_url(base)
                if url.startswith(self.instance):
                    return self._postprocess_base_object(base)

                # nope; try mastodon's search API
                try:
                    results = self._get(API_SEARCH,
                                        params={
                                            'q': url,
                                            'resolve': True
                                        })
                except requests.RequestException as e:
                    logging.info("%s URL %s doesn't look like Mastodon:",
                                 field, url)
                    continue

                for status in results.get('statuses', []):
                    if url in (status.get('url'), status.get('uri')):
                        # found it!
                        base = self.status_to_object(status)
                        base['id'] = status['id']
                        return self._postprocess_base_object(base)

        return {}
Exemple #5
0
def slap(acct):
    """Accepts POSTs to /[ACCT]/salmon and converts to outbound webmentions."""
    # TODO: unify with activitypub
    body = request.get_data(as_text=True)
    logging.info(f'Got: {body}')

    try:
        parsed = utils.parse_magic_envelope(body)
    except ParseError as e:
        error('Could not parse POST body as XML', exc_info=True)
    data = parsed['data']
    logging.info(f'Decoded: {data}')

    # check that we support this activity type
    try:
        activity = atom.atom_to_activity(data)
    except ParseError as e:
        error('Could not parse envelope data as XML', exc_info=True)

    verb = activity.get('verb')
    if verb and verb not in SUPPORTED_VERBS:
        error(f'Sorry, {verb} activities are not supported yet.', status=501)

    # verify author and signature
    author = util.get_url(activity.get('actor'))
    if ':' not in author:
        author = f'acct:{author}'
    elif not author.startswith('acct:'):
        error(f'Author URI {author} has unsupported scheme; expected acct:')

    logging.info(f'Fetching Salmon key for {author}')
    if not magicsigs.verify(data, parsed['sig'], author_uri=author):
        error('Could not verify magic signature.')
    logging.info('Verified magic signature.')

    # Verify that the timestamp is recent. Required by spec.
    # I get that this helps prevent spam, but in practice it's a bit silly,
    # and other major implementations don't (e.g. Mastodon), so forget it.
    #
    # updated = utils.parse_updated_from_atom(data)
    # if not utils.verify_timestamp(updated):
    #     error('Timestamp is more than 1h old.')

    # send webmentions to each target
    activity = atom.atom_to_activity(data)
    common.send_webmentions(activity, protocol='ostatus', source_atom=data)
    return ''
Exemple #6
0
    def post(self, username, domain):
        logging.info('Got: %s', self.request.body)

        try:
            parsed = utils.parse_magic_envelope(self.request.body)
        except ParseError as e:
            self.error('Could not parse POST body as XML', exc_info=True)
        data = parsed['data']
        logging.info('Decoded: %s', data)

        # check that we support this activity type
        try:
            activity = atom.atom_to_activity(data)
        except ParseError as e:
            self.error('Could not parse envelope data as XML', exc_info=True)

        verb = activity.get('verb')
        if verb and verb not in SUPPORTED_VERBS:
            self.error('Sorry, %s activities are not supported yet.' % verb,
                       status=501)

        # verify author and signature
        author = util.get_url(activity.get('actor'))
        if ':' not in author:
            author = 'acct:%s' % author
        elif not author.startswith('acct:'):
            self.error('Author URI %s has unsupported scheme; expected acct:' %
                       author)

        logging.info('Fetching Salmon key for %s' % author)
        if not magicsigs.verify(author, data, parsed['sig']):
            self.error('Could not verify magic signature.')
        logging.info('Verified magic signature.')

        # Verify that the timestamp is recent. Required by spec.
        # I get that this helps prevent spam, but in practice it's a bit silly,
        # and other major implementations don't (e.g. Mastodon), so forget it.
        #
        # updated = utils.parse_updated_from_atom(data)
        # if not utils.verify_timestamp(updated):
        #     self.error('Timestamp is more than 1h old.')

        # send webmentions to each target
        activity = atom.atom_to_activity(data)
        self.send_webmentions(activity, protocol='ostatus', source_atom=data)
Exemple #7
0
    def _create(self,
                obj,
                preview=None,
                include_link=source.OMIT_LINK,
                ignore_formatting=False):
        """Creates or previews a status (aka toot), reply, boost (aka reblog), or favorite.

    https://docs.joinmastodon.org/api/rest/statuses/

    Based on :meth:`Twitter._create`.

    Args:
      obj: ActivityStreams object
      preview: boolean
      include_link: string
      ignore_formatting: boolean

    Returns: CreationResult. If preview is True, the content will be a unicode
      string HTML snippet. If False, it will be a dict with 'id' and 'url' keys
      for the newly created object.
    """
        assert preview in (False, True)
        type = obj.get('objectType')
        verb = obj.get('verb')

        base_obj = self.base_object(obj)
        base_id = base_obj.get('id')
        base_url = base_obj.get('url')

        is_reply = type == 'comment' or obj.get('inReplyTo')
        is_rsvp = (verb and verb.startswith('rsvp-')) or verb == 'invite'
        atts = obj.get('attachments', [])
        images = util.dedupe_urls(
            util.get_list(obj, 'image') +
            [a for a in atts if a.get('objectType') == 'image'])
        videos = util.dedupe_urls(
            [obj] + [a for a in atts if a.get('objectType') == 'video'],
            key='stream')
        has_media = (images or videos) and (type in ('note', 'article')
                                            or is_reply)

        # prefer displayName over content for articles
        #
        # TODO: handle activities as well as objects? ie pull out ['object'] here if
        # necessary?
        type = obj.get('objectType')
        prefer_content = type == 'note' or (base_url and is_reply)
        preview_description = ''
        content = self._content_for_create(obj,
                                           ignore_formatting=ignore_formatting,
                                           prefer_name=not prefer_content)

        if not content:
            if type == 'activity' and not is_rsvp:
                content = verb
            elif has_media:
                content = ''
            else:
                return source.creation_result(
                    abort=False,  # keep looking for things to publish,
                    error_plain='No content text found.',
                    error_html='No content text found.')

        post_label = '%s %s' % (self.NAME, self.TYPE_LABELS['post'])
        if is_reply and not base_url:
            return source.creation_result(
                abort=True,
                error_plain='Could not find a %s to reply to.' % post_label,
                error_html=
                'Could not find a %s to <a href="http://indiewebcamp.com/reply">reply to</a>. Check that your post has the right <a href="http://indiewebcamp.com/comment">in-reply-to</a> link.'
                % post_label)

        # truncate and ellipsize content if necessary
        # TODO: don't count domains in remote mentions.
        # https://docs.joinmastodon.org/usage/basics/#text
        content = self.truncate(content, obj.get('url'), include_link, type)

        # linkify user mentions
        def linkify_mention(match):
            split = match.group(1).split('@')
            username = split[0]
            instance = ('https://' +
                        split[1]) if len(split) > 1 else self.instance
            url = urllib.parse.urljoin(instance, '/@' + username)
            return '<a href="%s">@%s</a>' % (url, username)

        preview_content = MENTION_RE.sub(linkify_mention, content)

        # linkify (defaults to twitter's behavior)
        preview_content = util.linkify(preview_content,
                                       pretty=True,
                                       skip_bare_cc_tlds=True)
        tags_url = urllib.parse.urljoin(self.instance, '/tags')
        preview_content = HASHTAG_RE.sub(
            r'\1<a href="%s/\2">#\2</a>' % tags_url, preview_content)

        # switch on activity type
        if type == 'activity' and verb == 'like':
            if not base_url:
                return source.creation_result(
                    abort=True,
                    error_plain='Could not find a %s to %s.' %
                    (post_label, self.TYPE_LABELS['like']),
                    error_html=
                    'Could not find a %s to <a href="http://indiewebcamp.com/like">%s</a>. Check that your post has the right <a href="http://indiewebcamp.com/like">u-like-of link</a>.'
                    % (post_label, self.TYPE_LABELS['like']))

            if preview:
                preview_description += '<span class="verb">%s</span> <a href="%s">this %s</a>: %s' % (
                    self.TYPE_LABELS['like'], base_url,
                    self.TYPE_LABELS['post'], self.embed_post(base_obj))
                return source.creation_result(description=preview_description)
            else:
                resp = self._post(API_FAVORITE % base_id)
                resp['type'] = 'like'

        elif type == 'activity' and verb == 'share':
            if not base_url:
                return source.creation_result(
                    abort=True,
                    error_plain='Could not find a %s to %s.' %
                    (post_label, self.TYPE_LABELS['repost']),
                    error_html=
                    'Could not find a %s to <a href="http://indiewebcamp.com/repost">%s</a>. Check that your post has the right <a href="http://indiewebcamp.com/repost">repost-of</a> link.'
                    % (post_label, self.TYPE_LABELS['repost']))

            if preview:
                preview_description += '<span class="verb">%s</span> <a href="%s">this %s</a>: %s' % (
                    self.TYPE_LABELS['repost'], base_url,
                    self.TYPE_LABELS['post'], self.embed_post(base_obj))
                return source.creation_result(description=preview_description)
            else:
                resp = self._post(API_REBLOG % base_id)
                resp['type'] = 'repost'

        elif type in ('note', 'article') or is_reply or is_rsvp:  # a post
            data = {'status': content}

            if is_reply:
                preview_description += 'add a <span class="verb">%s</span> to <a href="%s">this %s</a>: %s' % (
                    self.TYPE_LABELS['comment'], base_url,
                    self.TYPE_LABELS['post'], self.embed_post(base_obj))
                data['in_reply_to_id'] = base_id
            else:
                preview_description += '<span class="verb">%s</span>:' % self.TYPE_LABELS[
                    'post']

            num_media = len(videos) + len(images)
            if num_media > MAX_MEDIA:
                videos = videos[:MAX_MEDIA]
                images = images[:max(MAX_MEDIA - len(videos), 0)]
                logging.warning('Found %d media! Only using the first %d: %r',
                                num_media, MAX_MEDIA, videos + images)

            if preview:
                media_previews = [
                    '<video controls src="%s"><a href="%s">%s</a></video>' %
                    (util.get_url(vid, key='stream'),
                     util.get_url(vid, key='stream'), vid.get('displayName')
                     or 'this video') for vid in videos
                ] + [
                    '<img src="%s" alt="%s" />' %
                    (util.get_url(img), img.get('displayName') or '')
                    for img in images
                ]
                if media_previews:
                    preview_content += '<br /><br />' + ' &nbsp; '.join(
                        media_previews)
                return source.creation_result(content=preview_content,
                                              description=preview_description)

            else:
                ids = self.upload_media(videos + images)
                if ids:
                    data['media_ids'] = ids
                resp = self._post(API_STATUSES, json=data)

        else:
            return source.creation_result(
                abort=False,
                error_plain='Cannot publish type=%s, verb=%s to Mastodon' %
                (type, verb),
                error_html='Cannot publish type=%s, verb=%s to Mastodon' %
                (type, verb))

        if 'url' not in resp:
            resp['url'] = base_url

        return source.creation_result(resp)
Exemple #8
0
    def try_activitypub(self):
        source = util.get_required_param(self, 'source')

        # fetch source page, convert to ActivityStreams
        source_resp = common.requests_get(source)
        source_url = source_resp.url or source
        source_mf2 = mf2py.parse(source_resp.text, url=source_url)
        # logging.debug('Parsed mf2 for %s: %s', source_resp.url, json.dumps(source_mf2, indent=2))

        entry = mf2util.find_first_entry(source_mf2, ['h-entry'])
        logging.info('First entry: %s', json.dumps(entry, indent=2))
        # make sure it has url, since we use that for AS2 id, which is required
        # for ActivityPub.
        props = entry.setdefault('properties', {})
        if not props.get('url'):
            props['url'] = [source_url]

        source_obj = microformats2.json_to_object(entry, fetch_mf2=True)
        logging.info('Converted to AS: %s', json.dumps(source_obj, indent=2))

        # fetch target page as AS object. target is first in-reply-to, like-of,
        # or repost-of, *not* target query param.)
        target = util.get_url(util.get_first(source_obj, 'inReplyTo') or
                              util.get_first(source_obj, 'object'))
        if not target:
            common.error(self, 'No u-in-reply-to, u-like-of, or u-repost-of '
                         'found in %s' % source_url)

        try:
            target_resp = common.get_as2(target)
        except (requests.HTTPError, exc.HTTPBadGateway) as e:
            if (e.response.status_code // 100 == 2 and
                common.content_type(e.response).startswith('text/html')):
                self.resp = Response.get_or_create(
                    source=source_url, target=e.response.url or target,
                    direction='out', source_mf2=json.dumps(source_mf2))
                return self.send_salmon(source_obj, target_resp=e.response)
            raise

        target_url = target_resp.url or target
        self.resp = Response.get_or_create(
            source=source_url, target=target_url, direction='out',
            protocol='activitypub', source_mf2=json.dumps(source_mf2))

        # find actor's inbox
        target_obj = target_resp.json()
        inbox_url = target_obj.get('inbox')

        if not inbox_url:
            # TODO: test actor/attributedTo and not, with/without inbox
            actor = target_obj.get('actor') or target_obj.get('attributedTo')
            if isinstance(actor, dict):
                inbox_url = actor.get('inbox')
                actor = actor.get('url')
            if not inbox_url and not actor:
                common.error(self, 'Target object has no actor or attributedTo URL')

        if not inbox_url:
            # fetch actor as AS object
            actor = common.get_as2(actor).json()
            inbox_url = actor.get('inbox')

        if not inbox_url:
            # TODO: probably need a way to save errors like this so that we can
            # return them if ostatus fails too.
            # common.error(self, 'Target actor has no inbox')
            return self.send_salmon(source_obj, target_resp=target_resp)

        # convert to AS2
        source_domain = urlparse.urlparse(source_url).netloc
        key = MagicKey.get_or_create(source_domain)
        source_activity = common.postprocess_as2(
            as2.from_as1(source_obj), target=target_obj, key=key)

        if self.resp.status == 'complete':
            source_activity['type'] = 'Update'

        # prepare HTTP Signature (required by Mastodon)
        # https://w3c.github.io/activitypub/#authorization-lds
        # https://tools.ietf.org/html/draft-cavage-http-signatures-07
        # https://github.com/tootsuite/mastodon/issues/4906#issuecomment-328844846
        acct = 'acct:%s@%s' % (source_domain, source_domain)
        auth = HTTPSignatureAuth(secret=key.private_pem(), key_id=acct,
                                 algorithm='rsa-sha256')

        # deliver source object to target actor's inbox.
        headers = {
            'Content-Type': common.CONTENT_TYPE_AS2,
            # required for HTTP Signature
            # https://tools.ietf.org/html/draft-cavage-http-signatures-07#section-2.1.3
            'Date': datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT'),
        }
        inbox_url = urlparse.urljoin(target_url, inbox_url)
        resp = common.requests_post(inbox_url, json=source_activity, auth=auth,
                                    headers=headers)
        self.response.status_int = resp.status_code
        if resp.status_code == 202:
            self.response.write('202 response! If this is Mastodon 1.x, their '
                                'signature verification probably failed. :(\n')
        self.response.write(resp.text)
Exemple #9
0
def postprocess_as2(activity, target=None, key=None):
    """Prepare an AS2 object to be served or sent via ActivityPub.

    Args:
      activity: dict, AS2 object or activity
      target: dict, AS2 object, optional. The target of activity's inReplyTo or
        Like/Announce/etc object, if any.
      key: :class:`models.MagicKey`, optional. populated into publicKey field
        if provided.
    """
    type = activity.get('type')

    # actor objects
    if type == 'Person':
        postprocess_as2_actor(activity)
        if not activity.get('publicKey'):
            # underspecified, inferred from this issue and Mastodon's implementation:
            # https://github.com/w3c/activitypub/issues/203#issuecomment-297553229
            # https://github.com/tootsuite/mastodon/blob/bc2c263504e584e154384ecc2d804aeb1afb1ba3/app/services/activitypub/process_account_service.rb#L77
            activity.update({
                'publicKey': {
                    'id': activity.get('preferredUsername'),
                    'publicKeyPem': key.public_pem().decode(),
                },
                '@context': (util.get_list(activity, '@context') +
                             ['https://w3id.org/security/v1']),
            })
        return activity

    for actor in (util.get_list(activity, 'attributedTo') +
                  util.get_list(activity, 'actor')):
        postprocess_as2_actor(actor)

    # inReplyTo: singly valued, prefer id over url
    target_id = target.get('id') if target else None
    in_reply_to = activity.get('inReplyTo')
    if in_reply_to:
        if target_id:
            activity['inReplyTo'] = target_id
        elif isinstance(in_reply_to, list):
            if len(in_reply_to) > 1:
                logging.warning("AS2 doesn't support multiple inReplyTo URLs! "
                                'Only using the first: %s' % in_reply_to[0])
            activity['inReplyTo'] = in_reply_to[0]

        # Mastodon evidently requires a Mention tag for replies to generate a
        # notification to the original post's author. not required for likes,
        # reposts, etc. details:
        # https://github.com/snarfed/bridgy-fed/issues/34
        if target:
            for to in (util.get_list(target, 'attributedTo') +
                       util.get_list(target, 'actor')):
                if isinstance(to, dict):
                    to = to.get('url') or to.get('id')
                if to:
                    activity.setdefault('tag', []).append({
                        'type': 'Mention',
                        'href': to,
                    })

    # activity objects (for Like, Announce, etc): prefer id over url
    obj = activity.get('object')
    if obj:
        if isinstance(obj, dict) and not obj.get('id'):
            obj['id'] = target_id or obj.get('url')
        elif target_id and obj != target_id:
            activity['object'] = target_id

    # id is required for most things. default to url if it's not set.
    if not activity.get('id'):
        activity['id'] = activity.get('url')

    # TODO: find a better way to check this, sometimes or always?
    # removed for now since it fires on posts without u-id or u-url, eg
    # https://chrisbeckstrom.com/2018/12/27/32551/
    # assert activity.get('id') or (isinstance(obj, dict) and obj.get('id'))

    activity['id'] = redirect_wrap(activity.get('id'))
    activity['url'] = redirect_wrap(activity.get('url'))

    # copy image(s) into attachment(s). may be Mastodon-specific.
    # https://github.com/snarfed/bridgy-fed/issues/33#issuecomment-440965618
    obj_or_activity = obj if isinstance(obj, dict) else activity
    obj_or_activity.setdefault('attachment',
                               []).extend(obj_or_activity.get('image', []))

    # cc public and target's author(s) and recipients
    # https://www.w3.org/TR/activitystreams-vocabulary/#audienceTargeting
    # https://w3c.github.io/activitypub/#delivery
    if type in as2.TYPE_TO_VERB or type in ('Article', 'Note'):
        recips = [AS2_PUBLIC_AUDIENCE]
        if target:
            recips += itertools.chain(*(util.get_list(target, field)
                                        for field in ('actor', 'attributedTo',
                                                      'to', 'cc')))
        activity['cc'] = util.dedupe_urls(
            util.get_url(recip) or recip.get('id') for recip in recips)

    # wrap articles and notes in a Create activity
    if type in ('Article', 'Note'):
        activity = {
            '@context': as2.CONTEXT,
            'type': 'Create',
            'id': f'{activity["id"]}#bridgy-fed-create',
            'object': activity,
        }

    return util.trim_nulls(activity)
Exemple #10
0
def send_webmentions(activity_wrapped, proxy=None, **response_props):
    """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery.
    Args:
      activity_wrapped: dict, AS1 activity
      response_props: passed through to the newly created Responses
    """
    activity = redirect_unwrap(activity_wrapped)

    verb = activity.get('verb')
    if verb and verb not in SUPPORTED_VERBS:
        error(f'{verb} activities are not supported yet.')

    # extract source and targets
    source = activity.get('url') or activity.get('id')
    obj = activity.get('object')
    obj_url = util.get_url(obj)

    targets = util.get_list(activity, 'inReplyTo')
    if isinstance(obj, dict):
        if not source or verb in ('create', 'post', 'update'):
            source = obj_url or obj.get('id')
        targets.extend(util.get_list(obj, 'inReplyTo'))

    tags = util.get_list(activity_wrapped, 'tags')
    obj_wrapped = activity_wrapped.get('object')
    if isinstance(obj_wrapped, dict):
        tags.extend(util.get_list(obj_wrapped, 'tags'))
    for tag in tags:
        if tag.get('objectType') == 'mention':
            url = tag.get('url')
            if url and url.startswith(request.host_url):
                targets.append(redirect_unwrap(url))

    if verb in ('follow', 'like', 'share'):
        targets.append(obj_url)

    targets = util.dedupe_urls(util.get_url(t) for t in targets)
    if not source:
        error("Couldn't find original post URL")
    if not targets:
        error(
            "Couldn't find any target URLs in inReplyTo, object, or mention tags"
        )

    # send webmentions and store Responses
    errors = []  # stores (code, body) tuples
    for target in targets:
        if util.domain_from_link(target) == util.domain_from_link(source):
            logging.info(
                f'Skipping same-domain webmention from {source} to {target}')
            continue

        response = Response(source=source,
                            target=target,
                            direction='in',
                            **response_props)
        response.put()
        wm_source = (response.proxy_url() if
                     verb in ('follow', 'like', 'share') or proxy else source)
        logging.info(f'Sending webmention from {wm_source} to {target}')

        try:
            endpoint = webmention.discover(target, headers=HEADERS).endpoint
            if endpoint:
                webmention.send(endpoint, wm_source, target, headers=HEADERS)
                response.status = 'complete'
                logging.info('Success!')
            else:
                response.status = 'ignored'
                logging.info('Ignoring.')
        except BaseException as e:
            errors.append(util.interpret_http_exception(e))
        response.put()

    if errors:
        msg = 'Errors: ' + ', '.join(f'{code} {body}' for code, body in errors)
        error(msg, status=int(errors[0][0] or 502))
Exemple #11
0
def from_activities(activities,
                    actor=None,
                    title=None,
                    feed_url=None,
                    home_page_url=None,
                    hfeed=None):
    """Converts ActivityStreams activities to an RSS 2.0 feed.

  Args:
    activities: sequence of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed title
    feed_url: string, the URL for this RSS feed
    home_page_url: string, the home page URL
    hfeed: dict, parsed mf2 h-feed, if available

  Returns:
    unicode string with RSS 2.0 XML
  """
    try:
        iter(activities)
    except TypeError:
        raise TypeError('activities must be iterable')

    if isinstance(activities, (dict, str)):
        raise TypeError('activities may not be a dict or string')

    fg = FeedGenerator()
    fg.id(feed_url)
    assert feed_url
    fg.link(href=feed_url, rel='self')
    if home_page_url:
        fg.link(href=home_page_url, rel='alternate')
    # TODO: parse language from lang attribute:
    # https://github.com/microformats/mf2py/issues/150
    fg.language('en')
    fg.generator('granary', uri='https://granary.io/')

    hfeed = hfeed or {}
    actor = actor or {}
    image = (util.get_url(hfeed.get('properties', {}), 'photo')
             or util.get_url(actor, 'image'))
    if image:
        fg.image(image)

    props = hfeed.get('properties') or {}
    content = microformats2.get_text(util.get_first(props, 'content', ''))
    summary = util.get_first(props, 'summary', '')
    desc = content or summary or '-'
    fg.description(desc)  # required
    fg.title(title or util.ellipsize(desc))  # required

    latest = None
    feed_has_enclosure = False
    for activity in activities:
        obj = activity.get('object') or activity
        if obj.get('objectType') == 'person':
            continue

        item = fg.add_entry()
        url = obj.get('url')
        id = obj.get('id') or url
        item.id(id)
        item.link(href=url)
        item.guid(url, permalink=True)

        # title (required)
        title = (obj.get('title') or obj.get('displayName')
                 or util.ellipsize(obj.get('content', '-')))
        # strip HTML tags
        title = util.parse_html(title).get_text('').strip()
        item.title(title)

        content = microformats2.render_content(obj,
                                               include_location=True,
                                               render_attachments=True,
                                               render_image=True)
        if not content:
            content = obj.get('summary')
        if content:
            item.content(content, type='CDATA')

        categories = [
            {
                'term': t['displayName']
            } for t in obj.get('tags', [])
            if t.get('displayName') and t.get('verb') not in ('like', 'react',
                                                              'share')
            and t.get('objectType') not in ('article', 'person', 'mention')
        ]
        item.category(categories)

        author = obj.get('author', {})
        author = {
            'name': author.get('displayName') or author.get('username'),
            'uri': author.get('url'),
            'email': author.get('email') or '-',
        }
        item.author(author)

        published = obj.get('published') or obj.get('updated')
        if published and isinstance(published, str):
            try:
                dt = mf2util.parse_datetime(published)
                if not isinstance(dt, datetime):
                    dt = datetime.combine(dt, time.min)
                if not dt.tzinfo:
                    dt = dt.replace(tzinfo=util.UTC)
                item.published(dt)
                if not latest or dt > latest:
                    latest = dt
            except ValueError:  # bad datetime string
                pass

        item_has_enclosure = False
        for att in obj.get('attachments', []):
            stream = util.get_first(att, 'stream') or att
            if not stream:
                continue

            url = stream.get('url') or ''
            mime = mimetypes.guess_type(url)[0] or ''
            if (att.get('objectType') in ENCLOSURE_TYPES
                    or mime and mime.split('/')[0] in ENCLOSURE_TYPES):
                if item_has_enclosure:
                    logging.info(
                        'Warning: item %s already has an RSS enclosure, skipping additional enclosure %s',
                        id, url)
                    continue

                item_has_enclosure = feed_has_enclosure = True
                item.enclosure(url=url,
                               type=mime,
                               length=str(stream.get('size', '')))
                item.load_extension('podcast')
                duration = stream.get('duration')
                if duration:
                    item.podcast.itunes_duration(duration)

    if feed_has_enclosure:
        fg.load_extension('podcast')
        fg.podcast.itunes_author(
            actor.get('displayName') or actor.get('username'))
        if summary:
            fg.podcast.itunes_summary(summary)
        fg.podcast.itunes_explicit('no')
        fg.podcast.itunes_block(False)
        name = author.get('name')
        if name:
            fg.podcast.itunes_author(name)
        if image:
            fg.podcast.itunes_image(image)
        fg.podcast.itunes_category(categories)

    if latest:
        fg.lastBuildDate(latest)

    return fg.rss_str(pretty=True).decode('utf-8')
Exemple #12
0
def postprocess_as2(activity, target=None, key=None):
    """Prepare an AS2 object to be served or sent via ActivityPub.

    Args:
      activity: dict, AS2 object or activity
      target: dict, AS2 object, optional. The target of activity's inReplyTo or
        Like/Announce/etc object, if any.
      key: MagicKey, optional. populated into publicKey field if provided.
    """
    type = activity.get('type')

    # actor objects
    if type == 'Person':
        postprocess_as2_actor(activity)
        if not activity.get('publicKey'):
            # underspecified, inferred from this issue and Mastodon's implementation:
            # https://github.com/w3c/activitypub/issues/203#issuecomment-297553229
            # https://github.com/tootsuite/mastodon/blob/bc2c263504e584e154384ecc2d804aeb1afb1ba3/app/services/activitypub/process_account_service.rb#L77
            activity['publicKey'] = {
                'publicKeyPem': key.public_pem(),
            }
        return activity

    for actor in (util.get_list(activity, 'attributedTo') +
                  util.get_list(activity, 'actor')):
        postprocess_as2_actor(actor)

    # inReplyTo: singly valued, prefer id over url
    target_id = target.get('id') if target else None
    in_reply_to = activity.get('inReplyTo')
    if in_reply_to:
        if target_id:
            activity['inReplyTo'] = target_id
        elif isinstance(in_reply_to, list):
            if len(in_reply_to) > 1:
                logging.warning(
                    "AS2 doesn't support multiple inReplyTo URLs! "
                    'Only using the first: %s' % in_reply_tos[0])
            activity['inReplyTo'] = in_reply_to[0]

    # activity objects (for Like, Announce, etc): prefer id over url
    obj = activity.get('object', {})
    if obj:
        if isinstance(obj, dict) and not obj.get('id'):
            obj['id'] = target_id or obj.get('url')
        elif obj != target_id:
            activity['object'] = target_id

    # id is required for most things. default to url if it's not set.
    if not activity.get('id'):
        activity['id'] = activity.get('url')

    assert activity.get('id') or (isinstance(obj, dict) and obj.get('id'))

    # cc public and target's author(s) and recipients
    # https://www.w3.org/TR/activitystreams-vocabulary/#audienceTargeting
    # https://w3c.github.io/activitypub/#delivery
    if type in as2.TYPE_TO_VERB or type in ('Article', 'Note'):
        recips = [AS2_PUBLIC_AUDIENCE]
        if target:
            recips += itertools.chain(*(util.get_list(target, field) for field in
                                        ('actor', 'attributedTo', 'to', 'cc')))
        activity['cc'] = util.dedupe_urls(util.get_url(recip) for recip in recips)

    # wrap articles and notes in a Create activity
    if type in ('Article', 'Note'):
        activity = {
            '@context': as2.CONTEXT,
            'type': 'Create',
            'object': activity,
        }

    return util.trim_nulls(activity)
Exemple #13
0
def render_content(obj, include_location=True, synthesize_content=True,
                   render_attachments=False, render_image=False,
                   white_space_pre=True):
  """Renders the content of an ActivityStreams object as HTML.

  Includes tags, mentions, and non-note/article attachments. (Note/article
  attachments are converted to mf2 children in object_to_json and then rendered
  in json_to_html.)

  Note that the returned HTML is included in Atom as well as HTML documents,
  so it *must* be HTML4 / XHTML, not HTML5! All tags must be closed, etc.

  Args:
    obj: decoded JSON ActivityStreams object
    include_location: boolean, whether to render location, if provided
    synthesize_content: boolean, whether to generate synthetic content if the
      object doesn't have its own, e.g. 'likes this.' or 'shared this.'
    render_attachments: boolean, whether to render attachments, eg links,
      images, audio, and video
    render_image: boolean, whether to render the object's image(s)
    white_space_pre: boolean, whether to wrap in CSS white-space: pre. If False,
      newlines will be converted to <br> tags instead. Background:
      https://indiewebcamp.com/note#Indieweb_whitespace_thinking

  Returns:
    string, rendered HTML
  """
  content = obj.get('content', '')

  # extract tags. preserve order but de-dupe, ie don't include a tag more than
  # once.
  seen_ids = set()
  mentions = []
  tags = {}  # maps string objectType to list of tag objects
  for t in obj.get('tags', []):
    id = t.get('id')
    if id and id in seen_ids:
      continue
    seen_ids.add(id)

    if 'startIndex' in t and 'length' in t and 'url' in t:
      mentions.append(t)
    else:
      tags.setdefault(source.object_type(t), []).append(t)

  # linkify embedded mention tags inside content.
  if mentions:
    mentions.sort(key=lambda t: t['startIndex'])
    last_end = 0
    orig = util.WideUnicode(content)
    content = util.WideUnicode('')
    for tag in mentions:
      start = tag['startIndex']
      end = start + tag['length']
      content = util.WideUnicode('%s%s<a href="%s">%s</a>' % (
        content, orig[last_end:start], tag['url'], orig[start:end]))
      last_end = end

    content += orig[last_end:]

  # is whitespace in this content meaningful? standard heuristic: if there are
  # no HTML tags in it, and it has a newline, then assume yes.
  # https://indiewebcamp.com/note#Indieweb_whitespace_thinking
  # https://github.com/snarfed/granary/issues/80
  if content and not obj.get('content_is_html') and '\n' in content:
    if white_space_pre:
      content = '<div style="white-space: pre">%s</div>' % content
    else:
      content = content.replace('\n', '<br />\n')

  # linkify embedded links. ignore the "mention" tags that we added ourselves.
  # TODO: fix the bug in test_linkify_broken() in webutil/tests/test_util.py, then
  # uncomment this.
  # if content:
  #   content = util.linkify(content)

  # the image field. may be multiply valued.
  rendered_urls = set()
  if render_image:
    urls = get_urls(obj, 'image')
    content += _render_attachments([{
      'objectType': 'image',
      'image': {'url': url},
    } for url in urls], obj)
    rendered_urls = set(urls)

  # attachments, e.g. links (aka articles)
  # TODO: use oEmbed? http://oembed.com/ , http://code.google.com/p/python-oembed/
  if render_attachments:
    atts = [a for a in obj.get('attachments', [])
            if a.get('objectType') not in ('note', 'article')
            and get_url(a, 'image') not in rendered_urls]
    content += _render_attachments(atts + tags.pop('article', []), obj)

  # generate share/like contexts if the activity does not have content
  # of its own
  obj_type = source.object_type(obj)
  for as_type, verb in (
      ('favorite', 'Favorites'), ('like', 'Likes'), ('share', 'Shared')):
    if (not synthesize_content or obj_type != as_type or 'object' not in obj or
        'content' in obj):
      continue

    targets = get_list(obj, 'object')
    if not targets:
      continue

    for target in targets:
      # sometimes likes don't have enough content to render anything
      # interesting
      if 'url' in target and set(target) <= set(['url', 'objectType']):
        content += '<a href="%s">%s this.</a>' % (
          target.get('url'), verb.lower())

      else:
        author = target.get('author', target.get('actor', {}))
        # special case for twitter RT's
        if obj_type == 'share' and 'url' in obj and re.search(
                '^https?://(?:www\.|mobile\.)?twitter\.com/', obj.get('url')):
          content += 'RT <a href="%s">@%s</a> ' % (
            target.get('url', '#'), author.get('username'))
        else:
          # image looks bad in the simplified rendering
          author = {k: v for k, v in author.items() if k != 'image'}
          content += '%s <a href="%s">%s</a> by %s' % (
            verb, target.get('url', '#'),
            target.get('displayName', target.get('title', 'a post')),
            hcard_to_html(object_to_json(author, default_object_type='person')),
          )
        content += render_content(target, include_location=include_location,
                                  synthesize_content=synthesize_content,
                                  white_space_pre=white_space_pre)
      # only include the first context in the content (if there are
      # others, they'll be included as separate properties)
      break
    break

  if render_attachments and obj.get('verb') == 'share':
    atts = [att for att in itertools.chain.from_iterable(
              o.get('attachments', []) for o in util.get_list(obj, 'object'))
            if att.get('objectType') not in ('note', 'article')]
    content += _render_attachments(atts, obj)

  # location
  loc = obj.get('location')
  if include_location and loc:
    content += '\n<p>%s</p>' % hcard_to_html(
      object_to_json(loc, default_object_type='place'),
      parent_props=['p-location'])

  # these are rendered manually in json_to_html()
  for type in 'like', 'share', 'react', 'person':
    tags.pop(type, None)

  # render the rest
  content += tags_to_html(tags.pop('hashtag', []), 'p-category')
  content += tags_to_html(tags.pop('mention', []), 'u-mention', visible=False)
  content += tags_to_html(sum(tags.values(), []), 'tag')

  return content
Exemple #14
0
def send_webmentions(handler, activity_wrapped, proxy=None, **response_props):
    """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery.
    Args:
      handler: RequestHandler
      activity_wrapped: dict, AS1 activity
      response_props: passed through to the newly created Responses
    """
    activity = common.redirect_unwrap(activity_wrapped)

    verb = activity.get('verb')
    if verb and verb not in SUPPORTED_VERBS:
        error(handler, '%s activities are not supported yet.' % verb)

    # extract source and targets
    source = activity.get('url') or activity.get('id')
    obj = activity.get('object')
    obj_url = util.get_url(obj)

    targets = util.get_list(activity, 'inReplyTo')
    if isinstance(obj, dict):
        if not source or verb in ('create', 'post', 'update'):
            source = obj_url or obj.get('id')
        targets.extend(util.get_list(obj, 'inReplyTo'))

    tags = util.get_list(activity_wrapped, 'tags')
    obj_wrapped = activity_wrapped.get('object')
    if isinstance(obj_wrapped, dict):
        tags.extend(util.get_list(obj_wrapped, 'tags'))
    for tag in tags:
        if tag.get('objectType') == 'mention':
            url = tag.get('url')
            if url and url.startswith(appengine_config.HOST_URL):
                targets.append(redirect_unwrap(url))

    if verb in ('follow', 'like', 'share'):
         targets.append(obj_url)

    targets = util.dedupe_urls(util.get_url(t) for t in targets)
    if not source:
        error(handler, "Couldn't find original post URL")
    if not targets:
        error(handler, "Couldn't find any target URLs in inReplyTo, object, or mention tags")

    # send webmentions and store Responses
    errors = []
    for target in targets:
        if util.domain_from_link(target) == util.domain_from_link(source):
            logging.info('Skipping same-domain webmention from %s to %s',
                         source, target)
            continue

        response = Response(source=source, target=target, direction='in',
                            **response_props)
        response.put()
        wm_source = (response.proxy_url()
                     if verb in ('follow', 'like', 'share') or proxy
                     else source)
        logging.info('Sending webmention from %s to %s', wm_source, target)

        wm = send.WebmentionSend(wm_source, target)
        if wm.send(headers=HEADERS):
            logging.info('Success: %s', wm.response)
            response.status = 'complete'
        else:
            logging.warning('Failed: %s', wm.error)
            errors.append(wm.error)
            response.status = 'error'
        response.put()

    if errors:
        msg = 'Errors:\n' + '\n'.join(json.dumps(e, indent=2) for e in errors)
        error(handler, msg, status=errors[0].get('http_status'))
Exemple #15
0
def from_activities(activities, actor=None, title=None, feed_url=None,
                    home_page_url=None, hfeed=None):
  """Converts ActivityStreams activities to an RSS 2.0 feed.

  Args:
    activities: sequence of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed title
    feed_url: string, the URL for this RSS feed
    home_page_url: string, the home page URL
    hfeed: dict, parsed mf2 h-feed, if available

  Returns:
    unicode string with RSS 2.0 XML
  """
  try:
    iter(activities)
  except TypeError:
    raise TypeError('activities must be iterable')

  if isinstance(activities, (dict, basestring)):
    raise TypeError('activities may not be a dict or string')

  fg = FeedGenerator()
  fg.id(feed_url)
  assert feed_url
  fg.link(href=feed_url, rel='self')
  if home_page_url:
    fg.link(href=home_page_url, rel='alternate')
  # TODO: parse language from lang attribute:
  # https://github.com/microformats/mf2py/issues/150
  fg.language('en')
  fg.generator('granary', uri='https://granary.io/')

  hfeed = hfeed or {}
  actor = actor or {}
  image = util.get_url(hfeed, 'image') or util.get_url(actor, 'image')
  if image:
    fg.image(image)

  props = hfeed.get('properties') or {}
  content = microformats2.get_text(util.get_first(props, 'content', ''))
  summary = util.get_first(props, 'summary', '')
  desc = content or summary or '-'
  fg.description(desc)  # required
  fg.title(title or util.ellipsize(desc))  # required

  latest = None
  enclosures = False
  for activity in activities:
    obj = activity.get('object') or activity
    if obj.get('objectType') == 'person':
      continue

    item = fg.add_entry()
    url = obj.get('url')
    item.id(obj.get('id') or url)
    item.link(href=url)
    item.guid(url, permalink=True)

    item.title(obj.get('title') or obj.get('displayName') or '-')  # required
    content = microformats2.render_content(
      obj, include_location=True, render_attachments=False) or obj.get('summary')
    if content:
      item.content(content, type='CDATA')

    item.category(
      [{'term': t['displayName']} for t in obj.get('tags', [])
       if t.get('displayName') and t.get('verb') not in ('like', 'react', 'share')])

    author = obj.get('author', {})
    item.author({
      'name': author.get('displayName') or author.get('username'),
      'uri': author.get('url'),
    })

    published = obj.get('published') or obj.get('updated')
    if published:
      try:
        dt = mf2util.parse_datetime(published)
        if not isinstance(dt, datetime):
          dt = datetime.combine(dt, time.min)
        if not dt.tzinfo:
          dt = dt.replace(tzinfo=util.UTC)
        item.published(dt)
        if not latest or dt > latest:
          latest = dt
      except ValueError:  # bad datetime string
        pass


    for att in obj.get('attachments', []):
      stream = util.get_first(att, 'stream') or att
      if not stream:
        continue

      url = stream.get('url') or ''
      mime = mimetypes.guess_type(url)[0] or ''
      if (att.get('objectType') in ENCLOSURE_TYPES or
          mime and mime.split('/')[0] in ENCLOSURE_TYPES):
        enclosures = True
        item.enclosure(url=url, type=mime, length='REMOVEME') # TODO: length (bytes)

        item.load_extension('podcast')
        duration = stream.get('duration')
        if duration:
          item.podcast.itunes_duration(duration)

  if enclosures:
    fg.load_extension('podcast')
    fg.podcast.itunes_author(actor.get('displayName') or actor.get('username'))
    if summary:
      fg.podcast.itunes_summary(summary)
    fg.podcast.itunes_explicit('no')
    fg.podcast.itunes_block(False)

  if latest:
    fg.lastBuildDate(latest)

  return fg.rss_str(pretty=True).decode('utf-8').replace(' length="REMOVEME"', '')