Exemple #1
0
def send_webmentions(handler, activity, **response_props):

    """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery.
    Args:
      handler: RequestHandler
      activity: dict, AS1 activity
      response_props: passed through to the newly created Responses
    """
    verb = activity.get('verb')
    if verb and verb not in SUPPORTED_VERBS:
        error(handler, '%s activities are not supported yet.' % verb)

    # extract source and targets
    source = activity.get('url') or activity.get('id')
    obj = activity.get('object')
    obj_url = util.get_url(obj)

    targets = util.get_list(activity, 'inReplyTo')
    if isinstance(obj, dict):
        if not source:
            source = obj_url or obj.get('id')
        targets.extend(util.get_list(obj, 'inReplyTo'))
    if verb in ('like', 'share'):
         targets.append(obj_url)

    targets = util.dedupe_urls(util.get_url(t) for t in targets)
    if not source:
        error(handler, "Couldn't find original post URL")
    if not targets:
        error(handler, "Couldn't find target URLs (inReplyTo or object)")

    # send webmentions and store Responses
    errors = []
    for target in targets:
        if not target:
            continue

        response = Response(source=source, target=target, direction='in',
                            **response_props)
        response.put()
        wm_source = response.proxy_url() if verb in ('like', 'share') else source
        logging.info('Sending webmention from %s to %s', wm_source, target)

        wm = send.WebmentionSend(wm_source, target)
        if wm.send(headers=HEADERS):
            logging.info('Success: %s', wm.response)
            response.status = 'complete'
        else:
            logging.warning('Failed: %s', wm.error)
            errors.append(wm.error)
            response.status = 'error'
        response.put()

    if errors:
        msg = 'Errors:\n' + '\n'.join(json.dumps(e, indent=2) for e in errors)
        error(handler, msg, status=errors[0].get('http_status'))
Exemple #2
0
    def append_in_reply_to(before, after):
        """appends the inReplyTos from the before object to the after object, in place

    Args:
      before, after: dicts, ActivityStreams activities or objects
    """
        obj_b = before.get('object', before)
        obj_a = after.get('object', after)

        if obj_b and obj_a:
            reply_b = util.get_list(obj_b, 'inReplyTo')
            reply_a = util.get_list(obj_a, 'inReplyTo')
            obj_a['inReplyTo'] = util.dedupe_urls(reply_a + reply_b)
Exemple #3
0
    def base_object(self, obj):
        """Returns the 'base' Mastodon object that an object operates on.

    If the object is a reply, boost, or favorite of a Mastodon post - on any
    instance - this returns that post object. The id in the returned object is
    the id of that remote post *on the local instance*. (As a Mastodon style id,
    ie an integer in a string, *not* a tag URI.)

    Uses Mastodon's search API on the local instance to determine whether a URL
    is a Mastodon post, and if it is, to find or generate an id for it on the
    local instance.

    Discovered via https://mastodon.social/@jkreeftmeijer/101245063526942536

    Args:
      obj: ActivityStreams object

    Returns:
      dict, minimal ActivityStreams object. Usually has at least id; may
      also have url, author, etc.
    """
        for field in ('inReplyTo', 'object', 'target'):
            for base in util.get_list(obj, field):
                # first, check if it's on local instance
                url = util.get_url(base)
                if url.startswith(self.instance):
                    return self._postprocess_base_object(base)

                # nope; try mastodon's search API
                try:
                    results = self._get(API_SEARCH,
                                        params={
                                            'q': url,
                                            'resolve': True
                                        })
                except requests.RequestException as e:
                    logging.info("%s URL %s doesn't look like Mastodon:",
                                 field, url)
                    continue

                for status in results.get('statuses', []):
                    if url in (status.get('url'), status.get('uri')):
                        # found it!
                        base = self.status_to_object(status)
                        base['id'] = status['id']
                        return self._postprocess_base_object(base)

        return {}
Exemple #4
0
  def base_object(self, obj):
    """Returns the 'base' silo object that an object operates on.

    For example, if the object is a comment, this returns the post that it's a
    comment on. If it's an RSVP, this returns the event. The id in the returned
    object is silo-specific, ie not a tag URI.

    Subclasses may override this.

    Args:
      obj: ActivityStreams object

    Returns:
      dict, minimal ActivityStreams object. Usually has at least id; may
      also have url, author, etc.
    """
    # look at in-reply-tos first, then objects (for likes and reposts).
    # technically, the ActivityStreams 'object' field is always supposed to be
    # singular, but microformats2.json_to_object() sometimes returns activities
    # that have a list value, e.g. likes or reposts of multiple objects.
    candidates = []
    for field in ('inReplyTo', 'object', 'target'):
      candidates += util.get_list(obj, field)

    for base_obj in candidates:
      parsed_id = util.parse_tag_uri(base_obj.get('id', ''))
      if parsed_id:
        domain = parsed_id[0]
      else:
        domain = util.domain_from_link(base_obj.get('url', ''))
      if domain == self.DOMAIN:
        break
    else:
      return {}

    base_obj = copy.deepcopy(base_obj)
    id = base_obj.get('id')
    url = base_obj.get('url')

    if id:
      parsed = util.parse_tag_uri(id)
      if parsed:
        base_obj['id'] = parsed[1]
    elif url:
      base_obj['id'] = self.base_id(url)

    return base_obj
Exemple #5
0
  def base_object(self, obj):
    """Returns the 'base' silo object that an object operates on.

    For example, if the object is a comment, this returns the post that it's a
    comment on. If it's an RSVP, this returns the event. The id in the returned
    object is silo-specific, ie not a tag URI.

    Subclasses may override this.

    Args:
      obj: ActivityStreams object

    Returns:
      dict, minimal ActivityStreams object. Usually has at least id; may
      also have url, author, etc.
    """
    # look at in-reply-tos first, then objects (for likes and reposts).
    # technically, the ActivityStreams 'object' field is always supposed to be
    # singular, but microformats2.json_to_object() sometimes returns activities
    # that have a list value, e.g. likes or reposts of multiple objects.
    candidates = []
    for field in ('inReplyTo', 'object', 'target'):
      candidates += util.get_list(obj, field)

    for base_obj in candidates:
      parsed_id = util.parse_tag_uri(base_obj.get('id', ''))
      if parsed_id:
        domain = parsed_id[0]
      else:
        domain = util.domain_from_link(base_obj.get('url', ''))
      if domain == self.DOMAIN:
        break
    else:
      return {}

    base_obj = copy.deepcopy(base_obj)
    id = base_obj.get('id')
    url = base_obj.get('url')

    if id:
      parsed = util.parse_tag_uri(id)
      if parsed:
        base_obj['id'] = parsed[1]
    elif url:
      base_obj['id'] = self.base_id(url)

    return base_obj
Exemple #6
0
def to_as1(obj, use_type=True):
  """Converts an ActivityStreams 2 activity or object to ActivityStreams 1.

  Args:
    obj: dict, AS2 activity or object
    use_type: boolean, whether to include objectType and verb

  Returns: dict, AS1 activity or object
  """
  if not obj:
    return {}
  elif isinstance(obj, str):
    return {'url': obj}
  elif not isinstance(obj, dict):
    raise ValueError('Expected dict, got %r' % obj)

  obj = copy.deepcopy(obj)

  obj.pop('@context', None)

  type = obj.pop('type', None)
  if use_type:
    obj['objectType'] = TYPE_TO_OBJECT_TYPE.get(type)
    obj['verb'] = TYPE_TO_VERB.get(type)
    if obj.get('inReplyTo') and obj['objectType'] in ('note', 'article'):
      obj['objectType'] = 'comment'
    elif obj['verb'] and not obj['objectType']:
      obj['objectType'] = 'activity'

  def url_or_as1(val):
    return {'url': val} if isinstance(val, str) else to_as1(val)

  def all_to_as1(field):
    return [to_as1(elem) for elem in util.pop_list(obj, field)]

  images = []
  # icon first since e.g. Mastodon uses icon for profile picture,
  # image for featured photo.
  for as2_img in util.pop_list(obj, 'icon') + util.pop_list(obj, 'image'):
    as1_img = to_as1(as2_img, use_type=False)
    if as1_img not in images:
      images.append(as1_img)

  # inner objects
  inner_objs = all_to_as1('object')
  actor = to_as1(obj.get('actor', {}))

  if type == 'Create':
    for inner_obj in inner_objs:
      inner_obj.setdefault('author', {}).update(actor)

  if len(inner_objs) == 1:
    inner_objs = inner_objs[0]

  obj.update({
    'displayName': obj.pop('name', None),
    'username': obj.pop('preferredUsername', None),
    'actor': actor,
    'attachments': all_to_as1('attachment'),
    'image': images,
    'inReplyTo': [url_or_as1(orig) for orig in util.get_list(obj, 'inReplyTo')],
    'location': url_or_as1(obj.get('location')),
    'object': inner_objs,
    'tags': all_to_as1('tag'),
  })

  # media
  if type in ('Audio', 'Video'):
    duration = util.parse_iso8601_duration(obj.pop('duration', None))
    if duration:
      duration = duration.total_seconds()
    obj['stream'] = {
      'url': obj.pop('url', None),
      # file size in bytes. nonstandard, not in AS1 proper
      'size': obj.pop('size', None),
      'duration': duration or None,
    }
  elif type == 'Mention':
    obj['url'] = obj.pop('href', None)

  # object author
  attrib = util.pop_list(obj, 'attributedTo')
  if attrib:
    if len(attrib) > 1:
      logging.warning('ActivityStreams 1 only supports single author; '
                      'dropping extra attributedTo values: %s' % attrib[1:])
    obj.setdefault('author', {}).update(to_as1(attrib[0]))

  return util.trim_nulls(obj)
Exemple #7
0
def to_as1(obj, use_type=True):
    """Converts an ActivityStreams 2 activity or object to ActivityStreams 1.

  Args:
    obj: dict, AS2 activity or object
    use_type: boolean, whether to include objectType and verb

  Returns: dict, AS1 activity or object
  """
    if not obj:
        return {}
    elif isinstance(obj, basestring):
        return {'url': obj}
    elif not isinstance(obj, dict):
        raise ValueError('Expected dict, got %r' % obj)

    obj = copy.deepcopy(obj)

    obj.pop('@context', None)

    type = obj.pop('type', None)
    if use_type:
        obj['objectType'] = TYPE_TO_OBJECT_TYPE.get(type)
        obj['verb'] = TYPE_TO_VERB.get(type)
        if obj.get('inReplyTo') and obj['objectType'] in ('note', 'article'):
            obj['objectType'] = 'comment'
        elif obj['verb'] and not obj['objectType']:
            obj['objectType'] = 'activity'

    def url_or_as1(val):
        return {'url': val} if isinstance(val, basestring) else to_as1(val)

    def all_to_as1(field):
        return [to_as1(elem) for elem in util.pop_list(obj, field)]

    images = []
    # icon first since e.g. Mastodon uses icon for profile picture,
    # image for featured photo.
    for as2_img in util.pop_list(obj, 'icon') + util.pop_list(obj, 'image'):
        as1_img = to_as1(as2_img, use_type=False)
        if as1_img not in images:
            images.append(as1_img)

    obj.update({
        'displayName':
        obj.pop('name', None),
        'actor':
        to_as1(obj.get('actor')),
        'attachments':
        all_to_as1('attachment'),
        'image':
        images,
        'inReplyTo':
        [url_or_as1(orig) for orig in util.get_list(obj, 'inReplyTo')],
        'location':
        url_or_as1(obj.get('location')),
        'object':
        to_as1(obj.get('object')),
        'tags':
        all_to_as1('tag'),
    })

    if type in ('Audio', 'Video'):
        obj['stream'] = {'url': obj.pop('url', None)}

    attrib = util.pop_list(obj, 'attributedTo')
    if attrib:
        if len(attrib) > 1:
            logging.warning('ActivityStreams 1 only supports single author; '
                            'dropping extra attributedTo values: %s' %
                            attrib[1:])
        obj['author'] = to_as1(attrib[0])

    return util.trim_nulls(obj)
Exemple #8
0
  def _create(self, obj, preview=None, include_link=False, ignore_formatting=False):
    """Creates or previews creating a tweet, reply tweet, retweet, or favorite.

    https://dev.twitter.com/docs/api/1.1/post/statuses/update
    https://dev.twitter.com/docs/api/1.1/post/statuses/retweet/:id
    https://dev.twitter.com/docs/api/1.1/post/favorites/create

    Args:
      obj: ActivityStreams object
      preview: boolean
      include_link: boolean

    Returns:
      a CreationResult

      If preview is True, the content will be a unicode string HTML
      snippet. If False, it will be a dict with 'id' and 'url' keys
      for the newly created Twitter object.
    """
    assert preview in (False, True)
    type = obj.get('objectType')
    verb = obj.get('verb')

    base_obj = self.base_object(obj)
    base_id = base_obj.get('id')
    base_url = base_obj.get('url')

    is_reply = type == 'comment' or 'inReplyTo' in obj
    image_urls = [image.get('url') for image in util.get_list(obj, 'image')]
    video_url = util.get_first(obj, 'stream', {}).get('url')
    has_media = (image_urls or video_url) and (type in ('note', 'article') or is_reply)
    lat = obj.get('location', {}).get('latitude')
    lng = obj.get('location', {}).get('longitude')

    # prefer displayName over content for articles
    type = obj.get('objectType')
    base_url = self.base_object(obj).get('url')
    prefer_content = type == 'note' or (base_url and (type == 'comment'
                                                      or obj.get('inReplyTo')))
    content = self._content_for_create(obj, ignore_formatting=ignore_formatting,
                                       prefer_name=not prefer_content,
                                       strip_first_video_tag=bool(video_url))
    if not content:
      if type == 'activity':
        content = verb
      elif has_media:
        content = ''
      else:
        return source.creation_result(
          abort=False,  # keep looking for things to publish,
          error_plain='No content text found.',
          error_html='No content text found.')

    if is_reply and base_url:
      # extract username from in-reply-to URL so we can @-mention it, if it's
      # not already @-mentioned, since Twitter requires that to make our new
      # tweet a reply.
      # https://dev.twitter.com/docs/api/1.1/post/statuses/update#api-param-in_reply_to_status_id
      # TODO: this doesn't handle an in-reply-to username that's a prefix of
      # another username already mentioned, e.g. in reply to @foo when content
      # includes @foobar.
      parsed = urlparse.urlparse(base_url)
      parts = parsed.path.split('/')
      if len(parts) < 2 or not parts[1]:
        raise ValueError('Could not determine author of in-reply-to URL %s' % base_url)
      mention = '@' + parts[1]
      if mention.lower() not in content.lower():
        content = mention + ' ' + content

      # the embed URL in the preview can't start with mobile. or www., so just
      # hard-code it to twitter.com. index #1 is netloc.
      parsed = list(parsed)
      parsed[1] = self.DOMAIN
      base_url = urlparse.urlunparse(parsed)

    # need a base_url with the tweet id for the embed HTML below. do this
    # *after* checking the real base_url for in-reply-to author username.
    if base_id and not base_url:
      base_url = 'https://twitter.com/-/statuses/' + base_id

    if is_reply and not base_url:
      return source.creation_result(
        abort=True,
        error_plain='Could not find a tweet to reply to.',
        error_html='Could not find a tweet to <a href="http://indiewebcamp.com/reply">reply to</a>. '
        'Check that your post has an <a href="http://indiewebcamp.com/comment">in-reply-to</a> '
        'link a Twitter URL or to an original post that publishes a '
        '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.')

    # truncate and ellipsize content if it's over the character
    # count. URLs will be t.co-wrapped, so include that when counting.
    include_url = obj.get('url') if include_link else None
    content = self._truncate(content, include_url, has_media)

    # linkify defaults to Twitter's link shortening behavior
    preview_content = util.linkify(content, pretty=True, skip_bare_cc_tlds=True)

    if type == 'activity' and verb == 'like':
      if not base_url:
        return source.creation_result(
          abort=True,
          error_plain='Could not find a tweet to like.',
          error_html='Could not find a tweet to <a href="http://indiewebcamp.com/favorite">favorite</a>. '
          'Check that your post has a like-of link to a Twitter URL or to an original post that publishes a '
          '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.')

      if preview:
        return source.creation_result(
          description='<span class="verb">favorite</span> <a href="%s">'
                      'this tweet</a>:\n%s' % (base_url, self.embed_post(base_obj)))
      else:
        data = urllib.urlencode({'id': base_id})
        self.urlopen(API_POST_FAVORITE, data=data)
        resp = {'type': 'like'}

    elif type == 'activity' and verb == 'share':
      if not base_url:
        return source.creation_result(
          abort=True,
          error_plain='Could not find a tweet to retweet.',
          error_html='Could not find a tweet to <a href="http://indiewebcamp.com/repost">retweet</a>. '
          'Check that your post has a repost-of link to a Twitter URL or to an original post that publishes a '
          '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.')

      if preview:
        return source.creation_result(
          description='<span class="verb">retweet</span> <a href="%s">'
                      'this tweet</a>:\n%s' % (base_url, self.embed_post(base_obj)))
      else:
        data = urllib.urlencode({'id': base_id})
        resp = self.urlopen(API_POST_RETWEET % base_id, data=data)
        resp['type'] = 'repost'

    elif type in ('note', 'article') or is_reply:  # a tweet
      content = unicode(content).encode('utf-8')
      data = {'status': content}

      if is_reply:
        description = \
          '<span class="verb">@-reply</span> to <a href="%s">this tweet</a>:\n%s' % (
            base_url, self.embed_post(base_obj))
        data['in_reply_to_status_id'] = base_id
      else:
        description = '<span class="verb">tweet</span>:'

      if video_url:
        preview_content += ('<br /><br /><video controls src="%s"><a href="%s">'
                            'this video</a></video>' % (video_url, video_url))
        if not preview:
          ret = self.upload_video(video_url)
          if isinstance(ret, source.CreationResult):
            return ret
          data['media_ids'] = ret

      elif image_urls:
        num_urls = len(image_urls)
        if num_urls > MAX_MEDIA:
          image_urls = image_urls[:MAX_MEDIA]
          logging.warning('Found %d photos! Only using the first %d: %r',
                          num_urls, MAX_MEDIA, image_urls)
        preview_content += '<br /><br />' + ' &nbsp; '.join(
          '<img src="%s" />' % url for url in image_urls)
        if not preview:
          data['media_ids'] = ','.join(self.upload_images(image_urls))

      if lat and lng:
        preview_content += (
          '<div>at <a href="https://maps.google.com/maps?q=%s,%s">'
          '%s, %s</a></div>' % (lat, lng, lat, lng))
        data['lat'] = lat
        data['long'] = lng

      if preview:
        return source.creation_result(content=preview_content, description=description)
      else:
        resp = self.urlopen(API_POST_TWEET, data=urllib.urlencode(data))
        resp['type'] = 'comment' if is_reply else 'post'

    elif (verb and verb.startswith('rsvp-')) or verb == 'invite':
      return source.creation_result(
        abort=True,
        error_plain='Cannot publish RSVPs to Twitter.',
        error_html='This looks like an <a href="http://indiewebcamp.com/rsvp">RSVP</a>. '
        'Publishing events or RSVPs to Twitter is not supported.')

    else:
      return source.creation_result(
        abort=False,
        error_plain='Cannot publish type=%s, verb=%s to Twitter' % (type, verb),
        error_html='Cannot publish type=%s, verb=%s to Twitter' % (type, verb))

    id_str = resp.get('id_str')
    if id_str:
      resp.update({'id': id_str, 'url': self.tweet_url(resp)})
    elif 'url' not in resp:
      resp['url'] = base_url

    return source.creation_result(resp)
Exemple #9
0
def object_to_json(obj,
                   trim_nulls=True,
                   entry_class='h-entry',
                   default_object_type=None,
                   synthesize_content=True):
    """Converts an ActivityStreams object to microformats2 JSON.

  Args:
    obj: dict, a decoded JSON ActivityStreams object
    trim_nulls: boolean, whether to remove elements with null or empty values
    entry_class: string or sequence, the mf2 class(es) that entries should be
      given (e.g. 'h-cite' when parsing a reference to a foreign entry).
      defaults to 'h-entry'
    default_object_type: string, the ActivityStreams objectType to use if one
      is not present. defaults to None
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns:
    dict, decoded microformats2 JSON
  """
    if not obj or not isinstance(obj, dict):
        return {}

    obj_type = source.object_type(obj) or default_object_type
    # if the activity type is a post, then it's really just a conduit
    # for the object. for other verbs, the activity itself is the
    # interesting thing
    if obj_type == 'post':
        primary = obj.get('object', {})
        obj_type = source.object_type(primary) or default_object_type
    else:
        primary = obj

    # TODO: extract snippet
    name = primary.get('displayName', primary.get('title'))
    summary = primary.get('summary')
    author = obj.get('author', obj.get('actor', {}))

    in_reply_tos = obj.get('inReplyTo',
                           obj.get('context', {}).get('inReplyTo', []))
    is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe')
    if (is_rsvp or obj_type == 'react') and obj.get('object'):
        objs = obj['object']
        in_reply_tos.extend(objs if isinstance(objs, list) else [objs])

    # maps objectType to list of objects
    attachments = defaultdict(list)
    for prop in 'attachments', 'tags':
        for elem in get_list(primary, prop):
            attachments[elem.get('objectType')].append(elem)

    # construct mf2!
    ret = {
        'type': (AS_TO_MF2_TYPE.get(obj_type) or [entry_class] if isinstance(
            entry_class, basestring) else list(entry_class)),
        'properties': {
            'uid': [obj.get('id') or ''],
            'numeric-id': [obj.get('numeric_id') or ''],
            'name': [name],
            'nickname': [obj.get('username') or ''],
            'summary': [summary],
            'url': (list(object_urls(obj) or object_urls(primary)) +
                    obj.get('upstreamDuplicates', [])),
            'photo':
            dedupe_urls(
                get_urls(attachments, 'image', 'image') +
                get_urls(primary, 'image')),
            'video':
            dedupe_urls(
                get_urls(attachments, 'video', 'stream') +
                get_urls(primary, 'stream')),
            'audio':
            get_urls(attachments, 'audio', 'stream'),
            'published': [obj.get('published', primary.get('published', ''))],
            'updated': [obj.get('updated', primary.get('updated', ''))],
            'content': [{
                'value':
                xml.sax.saxutils.unescape(primary.get('content', '')),
                'html':
                render_content(primary,
                               include_location=False,
                               synthesize_content=synthesize_content),
            }],
            'in-reply-to':
            util.trim_nulls([o.get('url') for o in in_reply_tos]),
            'author': [
                object_to_json(author,
                               trim_nulls=False,
                               default_object_type='person')
            ],
            'location': [
                object_to_json(primary.get('location', {}),
                               trim_nulls=False,
                               default_object_type='place')
            ],
            'comment': [
                object_to_json(c, trim_nulls=False, entry_class='h-cite')
                for c in obj.get('replies', {}).get('items', [])
            ],
            'start': [primary.get('startTime')],
            'end': [primary.get('endTime')],
        },
        'children': [
            object_to_json(a,
                           trim_nulls=False,
                           entry_class=['u-quotation-of', 'h-cite'])
            for a in attachments['note'] + attachments['article']
        ]
    }

    # hashtags and person tags
    tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', [])
    ret['properties']['category'] = []
    for tag in tags:
        if tag.get('objectType') == 'person':
            ret['properties']['category'].append(
                object_to_json(tag, entry_class='u-category h-card'))
        elif tag.get('objectType') == 'hashtag':
            name = tag.get('displayName')
            if name:
                ret['properties']['category'].append(name)

    # rsvp
    if is_rsvp:
        ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]]
    elif obj_type == 'invite':
        invitee = object_to_json(obj.get('object'),
                                 trim_nulls=False,
                                 default_object_type='person')
        ret['properties']['invitee'] = [invitee]

    # like and repost mentions
    for type, prop in ('favorite', 'like'), ('like', 'like'), ('share',
                                                               'repost'):
        if obj_type == type:
            # The ActivityStreams spec says the object property should always be a
            # single object, but it's useful to let it be a list, e.g. when a like has
            # multiple targets, e.g. a like of a post with original post URLs in it,
            # which brid.gy does.
            objs = get_list(obj, 'object')
            ret['properties'][prop + '-of'] = [
                # flatten contexts that are just a url
                o['url']
                if 'url' in o and set(o.keys()) <= set(['url', 'objectType'])
                else object_to_json(o, trim_nulls=False, entry_class='h-cite')
                for o in objs
            ]
        else:
            # received likes and reposts
            ret['properties'][prop] = [
                object_to_json(t, trim_nulls=False, entry_class='h-cite')
                for t in tags if source.object_type(t) == type
            ]

    # latitude & longitude
    lat = long = None
    position = ISO_6709_RE.match(primary.get('position') or '')
    if position:
        lat, long = position.groups()
    if not lat:
        lat = primary.get('latitude')
    if not long:
        long = primary.get('longitude')

    if lat:
        ret['properties']['latitude'] = [str(lat)]
    if long:
        ret['properties']['longitude'] = [str(long)]

    if trim_nulls:
        ret = util.trim_nulls(ret)
    return ret
Exemple #10
0
def render_content(obj, include_location=True, synthesize_content=True,
                   render_attachments=False, render_image=False,
                   white_space_pre=True):
  """Renders the content of an ActivityStreams object as HTML.

  Includes tags, mentions, and non-note/article attachments. (Note/article
  attachments are converted to mf2 children in object_to_json and then rendered
  in json_to_html.)

  Note that the returned HTML is included in Atom as well as HTML documents,
  so it *must* be HTML4 / XHTML, not HTML5! All tags must be closed, etc.

  Args:
    obj: decoded JSON ActivityStreams object
    include_location: boolean, whether to render location, if provided
    synthesize_content: boolean, whether to generate synthetic content if the
      object doesn't have its own, e.g. 'likes this.' or 'shared this.'
    render_attachments: boolean, whether to render attachments, eg links,
      images, audio, and video
    render_image: boolean, whether to render the object's image(s)
    white_space_pre: boolean, whether to wrap in CSS white-space: pre. If False,
      newlines will be converted to <br> tags instead. Background:
      https://indiewebcamp.com/note#Indieweb_whitespace_thinking

  Returns:
    string, rendered HTML
  """
  content = obj.get('content', '')

  # extract tags. preserve order but de-dupe, ie don't include a tag more than
  # once.
  seen_ids = set()
  mentions = []
  tags = {}  # maps string objectType to list of tag objects
  for t in obj.get('tags', []):
    id = t.get('id')
    if id and id in seen_ids:
      continue
    seen_ids.add(id)

    if 'startIndex' in t and 'length' in t and 'url' in t:
      mentions.append(t)
    else:
      tags.setdefault(source.object_type(t), []).append(t)

  # linkify embedded mention tags inside content.
  if mentions:
    mentions.sort(key=lambda t: t['startIndex'])
    last_end = 0
    orig = util.WideUnicode(content)
    content = util.WideUnicode('')
    for tag in mentions:
      start = tag['startIndex']
      end = start + tag['length']
      content = util.WideUnicode('%s%s<a href="%s">%s</a>' % (
        content, orig[last_end:start], tag['url'], orig[start:end]))
      last_end = end

    content += orig[last_end:]

  # is whitespace in this content meaningful? standard heuristic: if there are
  # no HTML tags in it, and it has a newline, then assume yes.
  # https://indiewebcamp.com/note#Indieweb_whitespace_thinking
  # https://github.com/snarfed/granary/issues/80
  if content and not obj.get('content_is_html') and '\n' in content:
    if white_space_pre:
      content = '<div style="white-space: pre">%s</div>' % content
    else:
      content = content.replace('\n', '<br />\n')

  # linkify embedded links. ignore the "mention" tags that we added ourselves.
  # TODO: fix the bug in test_linkify_broken() in webutil/tests/test_util.py, then
  # uncomment this.
  # if content:
  #   content = util.linkify(content)

  # the image field. may be multiply valued.
  rendered_urls = set()
  if render_image:
    urls = get_urls(obj, 'image')
    content += _render_attachments([{
      'objectType': 'image',
      'image': {'url': url},
    } for url in urls], obj)
    rendered_urls = set(urls)

  # attachments, e.g. links (aka articles)
  # TODO: use oEmbed? http://oembed.com/ , http://code.google.com/p/python-oembed/
  if render_attachments:
    atts = [a for a in obj.get('attachments', [])
            if a.get('objectType') not in ('note', 'article')
            and get_url(a, 'image') not in rendered_urls]
    content += _render_attachments(atts + tags.pop('article', []), obj)

  # generate share/like contexts if the activity does not have content
  # of its own
  obj_type = source.object_type(obj)
  for as_type, verb in (
      ('favorite', 'Favorites'), ('like', 'Likes'), ('share', 'Shared')):
    if (not synthesize_content or obj_type != as_type or 'object' not in obj or
        'content' in obj):
      continue

    targets = get_list(obj, 'object')
    if not targets:
      continue

    for target in targets:
      # sometimes likes don't have enough content to render anything
      # interesting
      if 'url' in target and set(target) <= set(['url', 'objectType']):
        content += '<a href="%s">%s this.</a>' % (
          target.get('url'), verb.lower())

      else:
        author = target.get('author', target.get('actor', {}))
        # special case for twitter RT's
        if obj_type == 'share' and 'url' in obj and re.search(
                '^https?://(?:www\.|mobile\.)?twitter\.com/', obj.get('url')):
          content += 'RT <a href="%s">@%s</a> ' % (
            target.get('url', '#'), author.get('username'))
        else:
          # image looks bad in the simplified rendering
          author = {k: v for k, v in author.items() if k != 'image'}
          content += '%s <a href="%s">%s</a> by %s' % (
            verb, target.get('url', '#'),
            target.get('displayName', target.get('title', 'a post')),
            hcard_to_html(object_to_json(author, default_object_type='person')),
          )
        content += render_content(target, include_location=include_location,
                                  synthesize_content=synthesize_content,
                                  white_space_pre=white_space_pre)
      # only include the first context in the content (if there are
      # others, they'll be included as separate properties)
      break
    break

  if render_attachments and obj.get('verb') == 'share':
    atts = [att for att in itertools.chain.from_iterable(
              o.get('attachments', []) for o in util.get_list(obj, 'object'))
            if att.get('objectType') not in ('note', 'article')]
    content += _render_attachments(atts, obj)

  # location
  loc = obj.get('location')
  if include_location and loc:
    content += '\n<p>%s</p>' % hcard_to_html(
      object_to_json(loc, default_object_type='place'),
      parent_props=['p-location'])

  # these are rendered manually in json_to_html()
  for type in 'like', 'share', 'react', 'person':
    tags.pop(type, None)

  # render the rest
  content += tags_to_html(tags.pop('hashtag', []), 'p-category')
  content += tags_to_html(tags.pop('mention', []), 'u-mention', visible=False)
  content += tags_to_html(sum(tags.values(), []), 'tag')

  return content
Exemple #11
0
  def _create(self, obj, preview=None, include_link=source.OMIT_LINK,
              ignore_formatting=False):
    """Creates or previews creating a tweet, reply tweet, retweet, or favorite.

    https://dev.twitter.com/docs/api/1.1/post/statuses/update
    https://dev.twitter.com/docs/api/1.1/post/statuses/retweet/:id
    https://dev.twitter.com/docs/api/1.1/post/favorites/create

    Args:
      obj: ActivityStreams object
      preview: boolean
      include_link: string
      ignore_formatting: boolean

    Returns:
      a CreationResult

      If preview is True, the content will be a unicode string HTML
      snippet. If False, it will be a dict with 'id' and 'url' keys
      for the newly created Twitter object.
    """
    assert preview in (False, True)
    type = obj.get('objectType')
    verb = obj.get('verb')

    base_obj = self.base_object(obj)
    base_id = base_obj.get('id')
    base_url = base_obj.get('url')

    is_reply = type == 'comment' or 'inReplyTo' in obj
    image_urls = [image.get('url') for image in util.get_list(obj, 'image')]
    video_url = util.get_first(obj, 'stream', {}).get('url')
    has_media = (image_urls or video_url) and (type in ('note', 'article') or is_reply)
    lat = obj.get('location', {}).get('latitude')
    lng = obj.get('location', {}).get('longitude')

    # prefer displayName over content for articles
    type = obj.get('objectType')
    base_url = self.base_object(obj).get('url')
    prefer_content = type == 'note' or (base_url and (type == 'comment'
                                                      or obj.get('inReplyTo')))
    content = self._content_for_create(obj, ignore_formatting=ignore_formatting,
                                       prefer_name=not prefer_content,
                                       strip_first_video_tag=bool(video_url))
    if not content:
      if type == 'activity':
        content = verb
      elif has_media:
        content = ''
      else:
        return source.creation_result(
          abort=False,  # keep looking for things to publish,
          error_plain='No content text found.',
          error_html='No content text found.')

    if is_reply and base_url:
      # Twitter *used* to require replies to include an @-mention of the
      # original tweet's author
      # https://dev.twitter.com/docs/api/1.1/post/statuses/update#api-param-in_reply_to_status_id
      # ...but now we use the auto_populate_reply_metadata query param instead:
      # https://dev.twitter.com/overview/api/upcoming-changes-to-tweets

      # the embed URL in the preview can't start with mobile. or www., so just
      # hard-code it to twitter.com. index #1 is netloc.
      parsed = urlparse.urlparse(base_url)
      parts = parsed.path.split('/')
      if len(parts) < 2 or not parts[1]:
        raise ValueError('Could not determine author of in-reply-to URL %s' % base_url)
      reply_to_prefix = '@%s ' % parts[1].lower()
      if content.lower().startswith(reply_to_prefix):
        content = content[len(reply_to_prefix):]

      parsed = list(parsed)
      parsed[1] = self.DOMAIN
      base_url = urlparse.urlunparse(parsed)

    # need a base_url with the tweet id for the embed HTML below. do this
    # *after* checking the real base_url for in-reply-to author username.
    if base_id and not base_url:
      base_url = 'https://twitter.com/-/statuses/' + base_id

    if is_reply and not base_url:
      return source.creation_result(
        abort=True,
        error_plain='Could not find a tweet to reply to.',
        error_html='Could not find a tweet to <a href="http://indiewebcamp.com/reply">reply to</a>. '
        'Check that your post has an <a href="http://indiewebcamp.com/comment">in-reply-to</a> '
        'link a Twitter URL or to an original post that publishes a '
        '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.')

    # truncate and ellipsize content if it's over the character
    # count. URLs will be t.co-wrapped, so include that when counting.
    content = self._truncate(
      content, obj.get('url'), include_link, type)

    # linkify defaults to Twitter's link shortening behavior
    preview_content = util.linkify(content, pretty=True, skip_bare_cc_tlds=True)

    if type == 'activity' and verb == 'like':
      if not base_url:
        return source.creation_result(
          abort=True,
          error_plain='Could not find a tweet to like.',
          error_html='Could not find a tweet to <a href="http://indiewebcamp.com/favorite">favorite</a>. '
          'Check that your post has a like-of link to a Twitter URL or to an original post that publishes a '
          '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.')

      if preview:
        return source.creation_result(
          description='<span class="verb">favorite</span> <a href="%s">'
                      'this tweet</a>:\n%s' % (base_url, self.embed_post(base_obj)))
      else:
        data = urllib.urlencode({'id': base_id})
        self.urlopen(API_POST_FAVORITE, data=data)
        resp = {'type': 'like'}

    elif type == 'activity' and verb == 'share':
      if not base_url:
        return source.creation_result(
          abort=True,
          error_plain='Could not find a tweet to retweet.',
          error_html='Could not find a tweet to <a href="http://indiewebcamp.com/repost">retweet</a>. '
          'Check that your post has a repost-of link to a Twitter URL or to an original post that publishes a '
          '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.')

      if preview:
        return source.creation_result(
          description='<span class="verb">retweet</span> <a href="%s">'
                      'this tweet</a>:\n%s' % (base_url, self.embed_post(base_obj)))
      else:
        data = urllib.urlencode({'id': base_id})
        resp = self.urlopen(API_POST_RETWEET % base_id, data=data)
        resp['type'] = 'repost'

    elif type in ('note', 'article') or is_reply:  # a tweet
      content = unicode(content).encode('utf-8')
      data = {'status': content}

      if is_reply:
        description = \
          '<span class="verb">@-reply</span> to <a href="%s">this tweet</a>:\n%s' % (
            base_url, self.embed_post(base_obj))
        data.update({
          'in_reply_to_status_id': base_id,
          'auto_populate_reply_metadata': 'true',
        })
      else:
        description = '<span class="verb">tweet</span>:'

      if video_url:
        preview_content += ('<br /><br /><video controls src="%s"><a href="%s">'
                            'this video</a></video>' % (video_url, video_url))
        if not preview:
          ret = self.upload_video(video_url)
          if isinstance(ret, source.CreationResult):
            return ret
          data['media_ids'] = ret

      elif image_urls:
        num_urls = len(image_urls)
        if num_urls > MAX_MEDIA:
          image_urls = image_urls[:MAX_MEDIA]
          logging.warning('Found %d photos! Only using the first %d: %r',
                          num_urls, MAX_MEDIA, image_urls)
        preview_content += '<br /><br />' + ' &nbsp; '.join(
          '<img src="%s" />' % url for url in image_urls)
        if not preview:
          ret = self.upload_images(image_urls)
          if isinstance(ret, source.CreationResult):
            return ret
          data['media_ids'] = ','.join(ret)

      if lat and lng:
        preview_content += (
          '<div>at <a href="https://maps.google.com/maps?q=%s,%s">'
          '%s, %s</a></div>' % (lat, lng, lat, lng))
        data['lat'] = lat
        data['long'] = lng

      if preview:
        return source.creation_result(content=preview_content, description=description)
      else:
        resp = self.urlopen(API_POST_TWEET, data=urllib.urlencode(data))
        resp['type'] = 'comment' if is_reply else 'post'

    elif (verb and verb.startswith('rsvp-')) or verb == 'invite':
      return source.creation_result(
        abort=True,
        error_plain='Cannot publish RSVPs to Twitter.',
        error_html='This looks like an <a href="http://indiewebcamp.com/rsvp">RSVP</a>. '
        'Publishing events or RSVPs to Twitter is not supported.')

    else:
      return source.creation_result(
        abort=False,
        error_plain='Cannot publish type=%s, verb=%s to Twitter' % (type, verb),
        error_html='Cannot publish type=%s, verb=%s to Twitter' % (type, verb))

    id_str = resp.get('id_str')
    if id_str:
      resp.update({'id': id_str, 'url': self.tweet_url(resp)})
    elif 'url' not in resp:
      resp['url'] = base_url

    return source.creation_result(resp)
Exemple #12
0
def object_to_json(obj, trim_nulls=True, entry_class='h-entry',
                   default_object_type=None, synthesize_content=True):
  """Converts an ActivityStreams object to microformats2 JSON.

  Args:
    obj: dict, a decoded JSON ActivityStreams object
    trim_nulls: boolean, whether to remove elements with null or empty values
    entry_class: string or sequence, the mf2 class(es) that entries should be
      given (e.g. 'h-cite' when parsing a reference to a foreign entry).
      defaults to 'h-entry'
    default_object_type: string, the ActivityStreams objectType to use if one
      is not present. defaults to None
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns:
    dict, decoded microformats2 JSON
  """
  if not obj or not isinstance(obj, dict):
    return {}

  obj_type = source.object_type(obj) or default_object_type
  # if the activity type is a post, then it's really just a conduit
  # for the object. for other verbs, the activity itself is the
  # interesting thing
  if obj_type == 'post':
    primary = obj.get('object', {})
    obj_type = source.object_type(primary) or default_object_type
  else:
    primary = obj

  # TODO: extract snippet
  name = primary.get('displayName', primary.get('title'))
  summary = primary.get('summary')
  author = obj.get('author', obj.get('actor', {}))

  in_reply_tos = obj.get('inReplyTo', obj.get('context', {}).get('inReplyTo', []))
  is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe')
  if (is_rsvp or obj_type == 'react') and obj.get('object'):
    objs = obj['object']
    in_reply_tos.extend(objs if isinstance(objs, list) else [objs])

  # maps objectType to list of objects
  attachments = defaultdict(list)
  for prop in 'attachments', 'tags':
    for elem in get_list(primary, prop):
      attachments[elem.get('objectType')].append(elem)

  # construct mf2!
  ret = {
    'type': (AS_TO_MF2_TYPE.get(obj_type) or
             [entry_class] if isinstance(entry_class, basestring)
             else list(entry_class)),
    'properties': {
      'uid': [obj.get('id') or ''],
      'numeric-id': [obj.get('numeric_id') or ''],
      'name': [name],
      'nickname': [obj.get('username') or ''],
      'summary': [summary],
      'url': (list(object_urls(obj) or object_urls(primary)) +
              obj.get('upstreamDuplicates', [])),
      # photo is special cased below, to handle alt
      'video': dedupe_urls(get_urls(attachments, 'video', 'stream') +
                           get_urls(primary, 'stream')),
      'audio': get_urls(attachments, 'audio', 'stream'),
      'published': [obj.get('published', primary.get('published', ''))],
      'updated': [obj.get('updated', primary.get('updated', ''))],
      'content': [{
          'value': xml.sax.saxutils.unescape(primary.get('content', '')),
          'html': render_content(primary, include_location=False,
                                 synthesize_content=synthesize_content),
      }],
      'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]),
      'author': [object_to_json(
        author, trim_nulls=False, default_object_type='person')],
      'location': [object_to_json(
        primary.get('location', {}), trim_nulls=False,
        default_object_type='place')],
      'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite')
                  for c in obj.get('replies', {}).get('items', [])],
      'start': [primary.get('startTime')],
      'end': [primary.get('endTime')],
    },
    'children': (
      # silly hack: i haven't found anywhere in AS1 or AS2 to indicate that
      # something is being "quoted," like in a quote tweet, so i cheat and use
      # extra knowledge here that quoted tweets are converted to note
      # attachments, but URLs in the tweet text are converted to article tags.
      [object_to_json(a, trim_nulls=False, entry_class=['u-quotation-of', 'h-cite'])
       for a in attachments['note'] if 'startIndex' not in a] +
      [object_to_json(a, trim_nulls=False, entry_class=['h-cite'])
       for a in attachments['article'] if 'startIndex' not in a])
  }

  # photos, including alt text
  photo_urls = set()
  ret['properties']['photo'] = []
  for image in get_list(attachments, 'image') + [primary]:
    for url in get_urls(image, 'image'):
      if url and url not in photo_urls:
        photo_urls.add(url)
        name = get_first(image, 'image', {}).get('displayName')
        ret['properties']['photo'].append({'value': url, 'alt': name} if name else url)

  # hashtags and person tags
  if obj_type == 'tag':
    ret['properties']['tag-of'] = util.get_urls(obj, 'target')

  tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', [])
  if not tags and obj_type == 'tag':
    tags = util.get_list(obj, 'object')
  ret['properties']['category'] = []
  for tag in tags:
    if tag.get('objectType') == 'person':
      ret['properties']['category'].append(
        object_to_json(tag, entry_class='u-category h-card'))
    elif tag.get('objectType') == 'hashtag' or obj_type == 'tag':
      name = tag.get('displayName')
      if name:
        ret['properties']['category'].append(name)

  # rsvp
  if is_rsvp:
    ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]]
  elif obj_type == 'invite':
    invitee = object_to_json(obj.get('object'), trim_nulls=False,
                             default_object_type='person')
    ret['properties']['invitee'] = [invitee]

  # like and repost mentions
  for type, prop in (
      ('favorite', 'like'),
      ('follow', 'follow'),
      ('like', 'like'),
      ('share', 'repost'),
  ):
    if obj_type == type:
      # The ActivityStreams spec says the object property should always be a
      # single object, but it's useful to let it be a list, e.g. when a like has
      # multiple targets, e.g. a like of a post with original post URLs in it,
      # which brid.gy does.
      objs = get_list(obj, 'object')
      ret['properties'][prop + '-of'] = [
        # flatten contexts that are just a url
        o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType'])
        else object_to_json(o, trim_nulls=False, entry_class='h-cite')
        for o in objs]
    else:
      # received likes and reposts
      ret['properties'][prop] = [
        object_to_json(t, trim_nulls=False, entry_class='h-cite')
        for t in tags if source.object_type(t) == type]

  # latitude & longitude
  lat = long = None
  position = ISO_6709_RE.match(primary.get('position') or '')
  if position:
    lat, long = position.groups()
  if not lat:
    lat = primary.get('latitude')
  if not long:
    long = primary.get('longitude')

  if lat:
    ret['properties']['latitude'] = [str(lat)]
  if long:
    ret['properties']['longitude'] = [str(long)]

  if trim_nulls:
    ret = util.trim_nulls(ret)
  return ret
Exemple #13
0
  def original_post_discovery(activity, domains=None, cache=None,
                              include_redirect_sources=True, **kwargs):
    """Discovers original post links.

    This is a variation on http://indiewebcamp.com/original-post-discovery . It
    differs in that it finds multiple candidate links instead of one, and it
    doesn't bother looking for MF2 (etc) markup because the silos don't let you
    input it. More background:
    https://github.com/snarfed/bridgy/issues/51#issuecomment-136018857

    Original post candidates come from the upstreamDuplicates, attachments, and
    tags fields, as well as links and permashortlinks/permashortcitations in the
    text content.

    Args:
      activity: activity dict
      domains: optional sequence of domains. If provided, only links to these
        domains will be considered original and stored in upstreamDuplicates.
        (Permashortcitations are exempt.)
      include_redirect_sources: boolean, whether to include URLs that redirect
        as well as their final destination URLs
      cache: deprecated, unused
      kwargs: passed to requests.head() when following redirects

    Returns:
      ([string original post URLs], [string mention URLs]) tuple
    """
    obj = activity.get('object') or activity
    content = obj.get('content', '').strip()

    # find all candidate URLs
    tags = [t.get('url') for t in obj.get('attachments', []) + obj.get('tags', [])
            if t.get('objectType') in ('article', 'mention', 'note', None)]
    candidates = (tags + util.extract_links(content) +
                  obj.get('upstreamDuplicates', []) +
                  util.get_list(obj, 'targetUrl'))

    # Permashortcitations (http://indiewebcamp.com/permashortcitation) are short
    # references to canonical copies of a given (usually syndicated) post, of
    # the form (DOMAIN PATH). We consider them an explicit original post link.
    candidates += [match.expand(r'http://\1/\2') for match in
                   Source._PERMASHORTCITATION_RE.finditer(content)]

    candidates = set(util.dedupe_urls(
      util.clean_url(url) for url in candidates
      # heuristic: ellipsized URLs are probably incomplete, so omit them.
      if url and not url.endswith('...') and not url.endswith('…')))

    # check for redirect and add their final urls
    redirects = {}  # maps final URL to original URL for redirects
    for url in candidates:
      resolved = util.follow_redirects(url, **kwargs)
      if (resolved.url != url and
          resolved.headers.get('content-type', '').startswith('text/html')):
        redirects[resolved.url] = url

    candidates.update(redirects.keys())

    # use domains to determine which URLs are original post links vs mentions
    originals = set()
    mentions = set()
    for url in util.dedupe_urls(candidates):
      if url in redirects.values():
        # this is a redirected original URL. postpone and handle it when we hit
        # its final URL so that we know the final domain.
        continue
      domain = util.domain_from_link(url)
      which = (originals if not domains or util.domain_or_parent_in(domain, domains)
               else mentions)
      which.add(url)
      redirected_from = redirects.get(url)
      if redirected_from and include_redirect_sources:
        which.add(redirected_from)

    logging.info('Original post discovery found original posts %s, mentions %s',
                 originals, mentions)
    return originals, mentions
Exemple #14
0
def _prepare_activity(a, reader=True):
  """Preprocesses an activity to prepare it to be rendered as Atom.

  Modifies a in place.

  Args:
    a: ActivityStreams 1 activity dict
    reader: boolean, whether the output will be rendered in a feed reader.
      Currently just includes location if True, not otherwise.
  """
  act_type = source.object_type(a)
  obj = util.get_first(a, 'object', default={})
  primary = obj if (not act_type or act_type == 'post') else a

  # Render content as HTML; escape &s
  obj['rendered_content'] = _encode_ampersands(microformats2.render_content(
    primary, include_location=reader, render_attachments=True))

  # Make sure every activity has the title field, since Atom <entry> requires
  # the title element.
  if not a.get('title'):
    a['title'] = util.ellipsize(_encode_ampersands(
      a.get('displayName') or a.get('content') or obj.get('title') or
      obj.get('displayName') or obj.get('content') or 'Untitled'))

  # strip HTML tags. the Atom spec says title is plain text:
  # http://atomenabled.org/developers/syndication/#requiredEntryElements
  a['title'] = xml.sax.saxutils.escape(BeautifulSoup(a['title']).get_text(''))

  children = []
  image_urls_seen = set()
  image_atts = []

  # normalize actor images
  for elem in a, obj:
    actor = elem.get('actor')
    if actor:
      actor['image'] = util.get_first(actor, 'image')

  # normalize attachments, render attached notes/articles
  attachments = a.get('attachments') or obj.get('attachments') or []
  for att in attachments:
    att['stream'] = util.get_first(att, 'stream')
    type = att.get('objectType')

    if type == 'image':
      att['image'] = util.get_first(att, 'image')
      image_atts.append(att['image'])
      continue

    image_urls_seen |= set(util.get_urls(att, 'image'))
    if type in ('note', 'article'):
      html = microformats2.render_content(att, include_location=reader,
                                          render_attachments=True)
      author = att.get('author')
      if author:
        name = microformats2.maybe_linked_name(
          microformats2.object_to_json(author).get('properties') or {})
        html = '%s: %s' % (name.strip(), html)
      children.append(html)

  # render image(s) that we haven't already seen
  for image in image_atts + util.get_list(obj, 'image'):
    if not image:
      continue
    url = image.get('url')
    parsed = urllib.parse.urlparse(url)
    rest = urllib.parse.urlunparse(('', '') + parsed[2:])
    img_src_re = re.compile(r"""src *= *['"] *((https?:)?//%s)?%s *['"]""" %
                            (re.escape(parsed.netloc), re.escape(rest)))
    if (url and url not in image_urls_seen and
        not img_src_re.search(obj['rendered_content'])):
      children.append(microformats2.img(url))
      image_urls_seen.add(url)

  obj['rendered_children'] = [_encode_ampersands(child) for child in children]

  # make sure published and updated are strict RFC 3339 timestamps
  for prop in 'published', 'updated':
    val = obj.get(prop)
    if val:
      obj[prop] = util.maybe_iso8601_to_rfc3339(val)
      # Atom timestamps are even stricter than RFC 3339: they can't be naive ie
      # time zone unaware. They must have either an offset or the Z suffix.
      # https://www.feedvalidator.org/docs/error/InvalidRFC3339Date.html
      if not util.TIMEZONE_OFFSET_RE.search(obj[prop]):
        obj[prop] += 'Z'
Exemple #15
0
  def _create(self, obj, preview, include_link=source.OMIT_LINK,
              ignore_formatting=False):
    """Creates or previews creating for the previous two methods.

    https://www.flickr.com/services/api/upload.api.html
    https://www.flickr.com/services/api/flickr.photos.comments.addComment.html
    https://www.flickr.com/services/api/flickr.favorites.add.html
    https://www.flickr.com/services/api/flickr.photos.people.add.html

    Args:
      obj: ActivityStreams object
      preview: boolean
      include_link: string
      ignore_formatting: boolean

    Return:
      a CreationResult
    """
    # photo, comment, or like
    type = source.object_type(obj)
    logging.debug('publishing object type %s to Flickr', type)
    link_text = '(Originally published at: %s)' % obj.get('url')

    image_url = util.get_first(obj, 'image', {}).get('url')
    video_url = util.get_first(obj, 'stream', {}).get('url')
    content = self._content_for_create(obj, ignore_formatting=ignore_formatting,
                                       strip_first_video_tag=bool(video_url))

    if (video_url or image_url) and type in ('note', 'article'):
      name = obj.get('displayName')
      people = self._get_person_tags(obj)
      hashtags = [t.get('displayName') for t in obj.get('tags', [])
                  if t.get('objectType') == 'hashtag' and t.get('displayName')]
      lat = obj.get('location', {}).get('latitude')
      lng = obj.get('location', {}).get('longitude')

      # if name does not represent an explicit title, then we'll just
      # use it as the title and wipe out the content
      if name and content and not mf2util.is_name_a_title(name, content):
        name = content
        content = None

      # add original post link
      if include_link == source.INCLUDE_LINK:
        content = ((content + '\n\n') if content else '') + link_text

      if preview:
        preview_content = ''
        if name:
          preview_content += '<h4>%s</h4>' % name
        if content:
          preview_content += '<div>%s</div>' % content
        if hashtags:
          preview_content += '<div> %s</div>' % ' '.join('#' + t for t in hashtags)
        if people:
          preview_content += '<div> with %s</div>' % ', '.join(
            ('<a href="%s">%s</a>' % (
              p.get('url'), p.get('displayName') or 'User %s' % p.get('id'))
             for p in people))
        if lat and lng:
          preview_content += '<div> at <a href="https://maps.google.com/maps?q=%s,%s">%s, %s</a></div>' % (lat, lng, lat, lng)

        if video_url:
          preview_content += ('<video controls src="%s"><a href="%s">this video'
                              '</a></video>' % (video_url, video_url))
        else:
          preview_content += '<img src="%s" />' % image_url

        return source.creation_result(content=preview_content, description='post')

      params = []
      if name:
        params.append(('title', name))
      if content:
        params.append(('description', content.encode('utf-8')))
      if hashtags:
        params.append(('tags', ','.join(('"%s"' % t if ' ' in t else t)
                                        for t in hashtags)))

      file = util.urlopen(video_url or image_url)
      try:
        resp = self.upload(params, file)
      except requests.exceptions.ConnectionError as e:
        if str(e.args[0]).startswith('Request exceeds 10 MiB limit'):
          msg = 'Sorry, photos and videos must be under 10MB.'
          return source.creation_result(error_plain=msg, error_html=msg)
        else:
          raise

      photo_id = resp.get('id')
      resp.update({
        'type': 'post',
        'url': self.photo_url(self.path_alias() or self.user_id(), photo_id),
      })
      if video_url:
        resp['granary_message'] = \
          "Note that videos take time to process before they're visible."

      # add person tags
      for person_id in sorted(p.get('id') for p in people):
        self.call_api_method('flickr.photos.people.add', {
          'photo_id': photo_id,
          'user_id': person_id,
        })

      # add location
      if lat and lng:
        self.call_api_method('flickr.photos.geo.setLocation', {
            'photo_id': photo_id,
            'lat': lat,
            'lon': lng,
        })

      return source.creation_result(resp)

    base_obj = self.base_object(obj)
    base_id = base_obj.get('id')
    base_url = base_obj.get('url')

    if type == 'tag':
      if not base_id:
        return source.creation_result(
          abort=True,
          error_plain='Could not find a photo to tag.',
          error_html='Could not find a photo to <a href="http://indiewebcamp.com/tag-reply">tag</a>. '
          'Check that your post has a <a href="http://indiewebcamp.com/https://indieweb.org/tag-reply#How_to_post_a_tag-reply">tag-of</a> '
          'link to a Flickr photo or to an original post that publishes a '
          '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Flickr.')

      tags = sorted(set(util.trim_nulls(t.get('displayName', '').strip()
                                        for t in util.get_list(obj, 'object'))))
      if not tags:
        return source.creation_result(
          abort=True,
          error_plain='No tags found (with p-category) in tag-of post.',
          error_html='No <a href="https://indieweb.org/tags">tags</a> found (with p-category) in <a href="https://indieweb.org/tag-reply#How_to_post_a_tag-reply">tag-of post</a>.')

      if preview:
        return source.creation_result(
          content=content,
          description='add the tag%s %s to <a href="%s">this photo</a>.' %
            ('s' if len(tags) > 1 else '',
             ', '.join('<em>%s</em>' % tag for tag in tags), base_url))

      resp = self.call_api_method('flickr.photos.addTags', {
        'photo_id': base_id,
        # multiply valued fields are space separated. not easy to find in the
        # Flickr API docs, this is the closest I found:
        # https://www.flickr.com/services/api/upload.api.html#yui_3_11_0_1_1606756373916_317
        'tags': ' '.join(tags),
      })
      if not resp:
        resp = {}
      resp.update({
        'type': 'tag',
        'url': '%s#tagged-by-%s' % (base_url, self.user_id()),
        'tags': tags,
      })
      return source.creation_result(resp)

    # maybe a comment on a flickr photo?
    if type == 'comment' or obj.get('inReplyTo'):
      if not base_id:
        return source.creation_result(
          abort=True,
          error_plain='Could not find a photo to comment on.',
          error_html='Could not find a photo to <a href="http://indiewebcamp.com/reply">comment on</a>. '
          'Check that your post has an <a href="http://indiewebcamp.com/comment">in-reply-to</a> '
          'link to a Flickr photo or to an original post that publishes a '
          '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Flickr.')

      if include_link == source.INCLUDE_LINK:
        content += '\n\n' + link_text
      if preview:
        return source.creation_result(
          content=content,
          description='comment on <a href="%s">this photo</a>.' % base_url)

      resp = self.call_api_method('flickr.photos.comments.addComment', {
        'photo_id': base_id,
        'comment_text': content.encode('utf-8'),
      })
      resp = resp.get('comment', {})
      resp.update({
        'type': 'comment',
        'url': resp.get('permalink'),
      })
      return source.creation_result(resp)

    if type == 'like':
      if not base_id:
        return source.creation_result(
          abort=True,
          error_plain='Could not find a photo to favorite.',
          error_html='Could not find a photo to <a href="http://indiewebcamp.com/like">favorite</a>. '
          'Check that your post has an <a href="http://indiewebcamp.com/like">like-of</a> '
          'link to a Flickr photo or to an original post that publishes a '
          '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Flickr.')
      if preview:
        return source.creation_result(
          description='favorite <a href="%s">this photo</a>.' % base_url)

      # this method doesn't return any data
      self.call_api_method('flickr.favorites.add', {
        'photo_id': base_id,
      })
      # TODO should we canonicalize the base_url (e.g. removing trailing path
      # info like "/in/contacts/")
      return source.creation_result({
        'type': 'like',
        'url': '%s#favorited-by-%s' % (base_url, self.user_id()),
      })

    return source.creation_result(
      abort=False,
      error_plain='Cannot publish type=%s to Flickr.' % type,
      error_html='Cannot publish type=%s to Flickr.' % type)
Exemple #16
0
def postprocess_as2(activity, target=None, key=None):
    """Prepare an AS2 object to be served or sent via ActivityPub.

    Args:
      activity: dict, AS2 object or activity
      target: dict, AS2 object, optional. The target of activity's inReplyTo or
        Like/Announce/etc object, if any.
      key: MagicKey, optional. populated into publicKey field if provided.
    """
    type = activity.get('type')

    # actor objects
    if type == 'Person':
        postprocess_as2_actor(activity)
        if not activity.get('publicKey'):
            # underspecified, inferred from this issue and Mastodon's implementation:
            # https://github.com/w3c/activitypub/issues/203#issuecomment-297553229
            # https://github.com/tootsuite/mastodon/blob/bc2c263504e584e154384ecc2d804aeb1afb1ba3/app/services/activitypub/process_account_service.rb#L77
            activity['publicKey'] = {
                'publicKeyPem': key.public_pem(),
            }
        return activity

    for actor in (util.get_list(activity, 'attributedTo') +
                  util.get_list(activity, 'actor')):
        postprocess_as2_actor(actor)

    # inReplyTo: singly valued, prefer id over url
    target_id = target.get('id') if target else None
    in_reply_to = activity.get('inReplyTo')
    if in_reply_to:
        if target_id:
            activity['inReplyTo'] = target_id
        elif isinstance(in_reply_to, list):
            if len(in_reply_to) > 1:
                logging.warning(
                    "AS2 doesn't support multiple inReplyTo URLs! "
                    'Only using the first: %s' % in_reply_tos[0])
            activity['inReplyTo'] = in_reply_to[0]

    # activity objects (for Like, Announce, etc): prefer id over url
    obj = activity.get('object', {})
    if obj:
        if isinstance(obj, dict) and not obj.get('id'):
            obj['id'] = target_id or obj.get('url')
        elif obj != target_id:
            activity['object'] = target_id

    # id is required for most things. default to url if it's not set.
    if not activity.get('id'):
        activity['id'] = activity.get('url')

    assert activity.get('id') or (isinstance(obj, dict) and obj.get('id'))

    # cc public and target's author(s) and recipients
    # https://www.w3.org/TR/activitystreams-vocabulary/#audienceTargeting
    # https://w3c.github.io/activitypub/#delivery
    if type in as2.TYPE_TO_VERB or type in ('Article', 'Note'):
        recips = [AS2_PUBLIC_AUDIENCE]
        if target:
            recips += itertools.chain(*(util.get_list(target, field) for field in
                                        ('actor', 'attributedTo', 'to', 'cc')))
        activity['cc'] = util.dedupe_urls(util.get_url(recip) for recip in recips)

    # wrap articles and notes in a Create activity
    if type in ('Article', 'Note'):
        activity = {
            '@context': as2.CONTEXT,
            'type': 'Create',
            'object': activity,
        }

    return util.trim_nulls(activity)
Exemple #17
0
def send_webmentions(handler, activity_wrapped, proxy=None, **response_props):
    """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery.
    Args:
      handler: RequestHandler
      activity_wrapped: dict, AS1 activity
      response_props: passed through to the newly created Responses
    """
    activity = common.redirect_unwrap(activity_wrapped)

    verb = activity.get('verb')
    if verb and verb not in SUPPORTED_VERBS:
        error(handler, '%s activities are not supported yet.' % verb)

    # extract source and targets
    source = activity.get('url') or activity.get('id')
    obj = activity.get('object')
    obj_url = util.get_url(obj)

    targets = util.get_list(activity, 'inReplyTo')
    if isinstance(obj, dict):
        if not source or verb in ('create', 'post', 'update'):
            source = obj_url or obj.get('id')
        targets.extend(util.get_list(obj, 'inReplyTo'))

    tags = util.get_list(activity_wrapped, 'tags')
    obj_wrapped = activity_wrapped.get('object')
    if isinstance(obj_wrapped, dict):
        tags.extend(util.get_list(obj_wrapped, 'tags'))
    for tag in tags:
        if tag.get('objectType') == 'mention':
            url = tag.get('url')
            if url and url.startswith(appengine_config.HOST_URL):
                targets.append(redirect_unwrap(url))

    if verb in ('follow', 'like', 'share'):
         targets.append(obj_url)

    targets = util.dedupe_urls(util.get_url(t) for t in targets)
    if not source:
        error(handler, "Couldn't find original post URL")
    if not targets:
        error(handler, "Couldn't find any target URLs in inReplyTo, object, or mention tags")

    # send webmentions and store Responses
    errors = []
    for target in targets:
        if util.domain_from_link(target) == util.domain_from_link(source):
            logging.info('Skipping same-domain webmention from %s to %s',
                         source, target)
            continue

        response = Response(source=source, target=target, direction='in',
                            **response_props)
        response.put()
        wm_source = (response.proxy_url()
                     if verb in ('follow', 'like', 'share') or proxy
                     else source)
        logging.info('Sending webmention from %s to %s', wm_source, target)

        wm = send.WebmentionSend(wm_source, target)
        if wm.send(headers=HEADERS):
            logging.info('Success: %s', wm.response)
            response.status = 'complete'
        else:
            logging.warning('Failed: %s', wm.error)
            errors.append(wm.error)
            response.status = 'error'
        response.put()

    if errors:
        msg = 'Errors:\n' + '\n'.join(json.dumps(e, indent=2) for e in errors)
        error(handler, msg, status=errors[0].get('http_status'))
Exemple #18
0
  def _create(self, obj, preview=None, include_link=source.OMIT_LINK,
              ignore_formatting=False):
    """Creates a new issue or comment.

    When creating a new issue, if the authenticated user is a collaborator on
    the repo, tags that match existing labels are converted to those labels and
    included.

    https://developer.github.com/v4/guides/forming-calls/#about-mutations
    https://developer.github.com/v4/mutation/addcomment/
    https://developer.github.com/v4/mutation/addreaction/
    https://developer.github.com/v3/issues/#create-an-issue

    Args:
      obj: ActivityStreams object
      preview: boolean
      include_link: string
      ignore_formatting: boolean

    Returns:
      a CreationResult

      If preview is True, the contents will be a unicode string HTML
      snippet. If False, it will be a dict with 'id' and 'url' keys
      for the newly created GitHub object.
    """
    assert preview in (False, True)

    type = source.object_type(obj)
    if type and type not in ('issue', 'comment', 'activity', 'note', 'article',
                             'like', 'tag'):
      return source.creation_result(
        abort=False, error_plain='Cannot publish %s to GitHub' % type)

    base_obj = self.base_object(obj)
    base_url = base_obj.get('url')
    if not base_url:
      return source.creation_result(
        abort=True,
        error_plain='You need an in-reply-to GitHub repo, issue, PR, or comment URL.')

    content = orig_content = html.escape(
      self._content_for_create(obj, ignore_formatting=ignore_formatting),
      quote=False)
    url = obj.get('url')
    if include_link == source.INCLUDE_LINK and url:
      content += '\n\n(Originally published at: %s)' % url

    parsed = urllib.parse.urlparse(base_url)
    path = parsed.path.strip('/').split('/')
    owner, repo = path[:2]
    if len(path) == 4:
      number = path[3]

    comment_id = re.match(r'^issuecomment-([0-9]+)$', parsed.fragment)
    if comment_id:
      comment_id = comment_id.group(1)
    elif parsed.fragment:
      return source.creation_result(
        abort=True,
        error_plain='Please remove the fragment #%s from your in-reply-to URL.' %
          parsed.fragment)

    if type == 'comment':  # comment or reaction
      if not (len(path) == 4 and path[2] in ('issues', 'pull')):
        return source.creation_result(
          abort=True, error_plain='GitHub comment requires in-reply-to issue or PR URL.')

      is_reaction = orig_content in REACTIONS_GRAPHQL
      if preview:
        if comment_id:
          comment = self.rest(REST_API_COMMENT % (owner, repo, comment_id)).json()
          target_link = '<a href="%s">a comment on %s/%s#%s, <em>%s</em></a>' % (
            base_url, owner, repo, number, util.ellipsize(comment['body']))
        else:
          resp = self.graphql(GRAPHQL_ISSUE_OR_PR, locals())
          issue = (resp.get('repository') or {}).get('issueOrPullRequest')
          target_link = '<a href="%s">%s/%s#%s%s</a>' % (
            base_url, owner, repo, number,
            (', <em>%s</em>' % issue['title']) if issue else '')

        if is_reaction:
          preview_content = None
          desc = u'<span class="verb">react %s</span> to %s.' % (
            orig_content, target_link)
        else:
          preview_content = self.render_markdown(content, owner, repo)
          desc = '<span class="verb">comment</span> on %s:' % target_link
        return source.creation_result(content=preview_content, description=desc)

      else:  # create
        # we originally used the GraphQL API to create issue comments and
        # reactions, but it often gets rejected against org repos due to access
        # controls. oddly, the REST API works fine in those same cases.
        # https://github.com/snarfed/bridgy/issues/824
        if is_reaction:
          if comment_id:
            api_url = REST_API_COMMENT_REACTIONS % (owner, repo, comment_id)
            reacted = self.rest(api_url, data={
              'content': REACTIONS_REST.get(orig_content),
            }).json()
            url = base_url
          else:
            api_url = REST_API_REACTIONS % (owner, repo, number)
            reacted = self.rest(api_url, data={
              'content': REACTIONS_REST.get(orig_content),
            }).json()
            url = '%s#%s-by-%s' % (base_url, reacted['content'].lower(),
                                   reacted['user']['login'])

          return source.creation_result({
            'id': reacted.get('id'),
            'url': url,
            'type': 'react',
          })

        else:
          try:
            api_url = REST_API_COMMENTS % (owner, repo, number)
            commented = self.rest(api_url, data={'body': content}).json()
            return source.creation_result({
              'id': commented.get('id'),
              'url': commented.get('html_url'),
              'type': 'comment',
            })
          except ValueError as e:
            return source.creation_result(abort=True, error_plain=str(e))

    elif type == 'like':  # star
      if not (len(path) == 2 or (len(path) == 3 and path[2] == 'issues')):
        return source.creation_result(
          abort=True, error_plain='GitHub like requires in-reply-to repo URL.')

      if preview:
        return source.creation_result(
          description='<span class="verb">star</span> <a href="%s">%s/%s</a>.' %
            (base_url, owner, repo))
      else:
        issue = self.graphql(GRAPHQL_REPO, locals())
        resp = self.graphql(GRAPHQL_ADD_STAR, {
          'starrable_id': issue['repository']['id'],
        })
        return source.creation_result({
          'url': base_url + '/stargazers',
        })

    elif type == 'tag':  # add label
      if not (len(path) == 4 and path[2] in ('issues', 'pull')):
        return source.creation_result(
          abort=True, error_plain='GitHub tag post requires tag-of issue or PR URL.')

      tags = set(util.trim_nulls(t.get('displayName', '').strip()
                                 for t in util.get_list(obj, 'object')))
      if not tags:
        return source.creation_result(
          abort=True, error_plain='No tags found in tag post!')

      existing_labels = self.existing_labels(owner, repo)
      labels = sorted(tags & existing_labels)
      issue_link = '<a href="%s">%s/%s#%s</a>' % (base_url, owner, repo, number)
      if not labels:
        return source.creation_result(
          abort=True,
          error_html="No tags in [%s] matched %s's existing labels [%s]." %
            (', '.join(sorted(tags)), issue_link, ', '.join(sorted(existing_labels))))

      if preview:
        return source.creation_result(
          description='add label%s <span class="verb">%s</span> to %s.' % (
            ('s' if len(labels) > 1 else ''), ', '.join(labels), issue_link))
      else:
        resp = self.rest(REST_API_ISSUE_LABELS % (owner, repo, number), labels).json()
        return source.creation_result({
          'url': base_url,
          'type': 'tag',
          'tags': labels,
        })

    else:  # new issue
      if not (len(path) == 2 or (len(path) == 3 and path[2] == 'issues')):
        return source.creation_result(
          abort=True, error_plain='New GitHub issue requires in-reply-to repo URL')

      title = util.ellipsize(obj.get('displayName') or obj.get('title') or
                             orig_content)
      tags = set(util.trim_nulls(t.get('displayName', '').strip()
                                 for t in util.get_list(obj, 'tags')))
      labels = sorted(tags & self.existing_labels(owner, repo))

      if preview:
        preview_content = '<b>%s</b><hr>%s' % (
          title, self.render_markdown(content, owner, repo))
        preview_labels = ''
        if labels:
          preview_labels = ' and attempt to add label%s <span class="verb">%s</span>' % (
            's' if len(labels) > 1 else '', ', '.join(labels))
        return source.creation_result(content=preview_content, description="""\
<span class="verb">create a new issue</span> on <a href="%s">%s/%s</a>%s:""" %
            (base_url, owner, repo, preview_labels))
      else:
        resp = self.rest(REST_API_CREATE_ISSUE % (owner, repo), {
          'title': title,
          'body': content,
          'labels': labels,
        }).json()
        resp['url'] = resp.pop('html_url')
        return source.creation_result(resp)

    return source.creation_result(
      abort=False,
      error_plain="%s doesn't look like a GitHub repo, issue, or PR URL." % base_url)
Exemple #19
0
def object_to_json(obj, trim_nulls=True, entry_class='h-entry',
                   default_object_type=None, synthesize_content=True):
  """Converts an ActivityStreams object to microformats2 JSON.

  Args:
    obj: dict, a decoded JSON ActivityStreams object
    trim_nulls: boolean, whether to remove elements with null or empty values
    entry_class: string, the mf2 class that entries should be given (e.g.
      'h-cite' when parsing a reference to a foreign entry). defaults to
      'h-entry'
    default_object_type: string, the ActivityStreams objectType to use if one
      is not present. defaults to None
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns:
    dict, decoded microformats2 JSON
  """
  if not obj or not isinstance(obj, dict):
    return {}

  obj_type = source.object_type(obj) or default_object_type
  # if the activity type is a post, then it's really just a conduit
  # for the object. for other verbs, the activity itself is the
  # interesting thing
  if obj_type == 'post':
    primary = obj.get('object', {})
    obj_type = source.object_type(primary) or default_object_type
  else:
    primary = obj

  # TODO: extract snippet
  name = primary.get('displayName', primary.get('title'))
  summary = primary.get('summary')
  author = obj.get('author', obj.get('actor', {}))

  in_reply_tos = obj.get(
    'inReplyTo', obj.get('context', {}).get('inReplyTo', []))
  is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe')
  if (is_rsvp or obj_type == 'react') and obj.get('object'):
    objs = obj['object']
    in_reply_tos.extend(objs if isinstance(objs, list) else [objs])

  # TODO: more tags. most will be p-category?
  ret = {
    'type': (['h-card'] if obj_type == 'person'
             else ['h-card', 'p-location'] if obj_type == 'place'
             else [entry_class]),
    'properties': {
      'uid': [obj.get('id', '')],
      'name': [name],
      'summary': [summary],
      'url': (list(object_urls(obj) or object_urls(primary)) +
              obj.get('upstreamDuplicates', [])),
      'photo': [image.get('url') for image in
                (util.get_list(obj, 'image') or util.get_list(primary, 'image'))],
      'video': [obj.get('stream', primary.get('stream', {})).get('url')],
      'published': [obj.get('published', primary.get('published', ''))],
      'updated': [obj.get('updated', primary.get('updated', ''))],
      'content': [{
          'value': xml.sax.saxutils.unescape(primary.get('content', '')),
          'html': render_content(primary, include_location=False,
                                 synthesize_content=synthesize_content),
      }],
      'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]),
      'author': [object_to_json(
        author, trim_nulls=False, default_object_type='person')],
      'location': [object_to_json(
        primary.get('location', {}), trim_nulls=False,
        default_object_type='place')],
      'latitude': primary.get('latitude'),
      'longitude': primary.get('longitude'),
      'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite')
                  for c in obj.get('replies', {}).get('items', [])],
    },
    'children': [object_to_json(c, trim_nulls=False, entry_class='h-cite')
                 for c in primary.get('attachments', [])
                 if c.get('objectType') in ('note', 'article')],
  }

  # hashtags and person tags
  tags = obj.get('tags', []) or util.get_first(obj, 'object', {}).get('tags', [])
  ret['properties']['category'] = []
  for tag in tags:
    if tag.get('objectType') == 'person':
      cls = 'u-category h-card'
    elif tag.get('objectType') == 'hashtag':
      cls = 'u-category'
    else:
      continue
    ret['properties']['category'].append(object_to_json(tag, entry_class=cls))

  # rsvp
  if is_rsvp:
    ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]]
  elif obj_type == 'invite':
    invitee = object_to_json(obj.get('object'), trim_nulls=False,
                             default_object_type='person')
    ret['properties']['invitee'] = [invitee]

  # like and repost mentions
  for type, prop in ('like', 'like'), ('share', 'repost'):
    if obj_type == type:
      # The ActivityStreams spec says the object property should always be a
      # single object, but it's useful to let it be a list, e.g. when a like has
      # multiple targets, e.g. a like of a post with original post URLs in it,
      # which brid.gy does.
      objs = util.get_list(obj, 'object')
      ret['properties'][prop + '-of'] = [
        # flatten contexts that are just a url
        o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType'])
        else object_to_json(o, trim_nulls=False, entry_class='h-cite')
        for o in objs]
    else:
      # received likes and reposts
      ret['properties'][prop] = [
        object_to_json(t, trim_nulls=False, entry_class='h-cite')
        for t in tags if source.object_type(t) == type]

  if trim_nulls:
    ret = util.trim_nulls(ret)
  return ret
Exemple #20
0
def activities_to_atom(activities, actor, title=None, request_url=None,
                       host_url=None, xml_base=None, rels=None, reader=True):
  """Converts ActivityStreams activites to an Atom feed.

  Args:
    activities: list of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed <title> element. Defaults to 'User feed for [NAME]'
    request_url: the URL of this Atom feed, if any. Used in a link rel="self".
    host_url: the home URL for this Atom feed, if any. Used in the top-level
      feed <id> element.
    xml_base: the base URL, if any. Used in the top-level xml:base attribute.
    rels: rel links to include. dict mapping string rel value to string URL.
    reader: boolean, whether the output will be rendered in a feed reader.
      Currently just includes location if True, not otherwise.

  Returns:
    unicode string with Atom XML
  """
  # Strip query params from URLs so that we don't include access tokens, etc
  host_url = (_remove_query_params(host_url) if host_url
              else 'https://github.com/snarfed/granary')
  if request_url is None:
    request_url = host_url

  for a in activities:
    act_type = source.object_type(a)
    if not act_type or act_type == 'post':
      primary = a.get('object', {})
    else:
      primary = a
    obj = a.setdefault('object', {})

    # Render content as HTML; escape &s
    obj['rendered_content'] = _encode_ampersands(microformats2.render_content(
      primary, include_location=reader))

    # Make sure every activity has the title field, since Atom <entry> requires
    # the title element.
    if not a.get('title'):
      a['title'] = util.ellipsize(_encode_ampersands(
        a.get('displayName') or a.get('content') or obj.get('title') or
        obj.get('displayName') or obj.get('content') or 'Untitled'))

    # strip HTML tags. the Atom spec says title is plain text:
    # http://atomenabled.org/developers/syndication/#requiredEntryElements
    a['title'] = xml.sax.saxutils.escape(BeautifulSoup(a['title']).get_text(''))

    # Normalize attachments.image to always be a list.
    attachments = a.get('attachments') or obj.get('attachments') or []
    for att in attachments:
      att['image'] = util.get_list(att, 'image')

    obj['rendered_children'] = []
    for att in attachments:
      if att.get('objectType') in ('note', 'article'):
        html = microformats2.render_content(att, include_location=reader)
        author = att.get('author')
        if author:
          name = microformats2.maybe_linked_name(
            microformats2.object_to_json(author).get('properties', []))
          html = '%s: %s' % (name.strip(), html)
        obj['rendered_children'].append(_encode_ampersands(html))

  # Emulate Django template behavior that returns a special default value that
  # can continue to be referenced when an attribute or item lookup fails. Helps
  # avoid conditionals in the template itself.
  # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables
  class Defaulter(collections.defaultdict):
    def __init__(self, **kwargs):
      super(Defaulter, self).__init__(Defaulter, **{
        k: (Defaulter(**v) if isinstance(v, dict) else v)
        for k, v in kwargs.items()})

    def __unicode__(self):
      return super(Defaulter, self).__unicode__() if self else u''

  env = jinja2.Environment(loader=jinja2.PackageLoader(__package__, 'templates'),
                           autoescape=True)
  if actor is None:
    actor = {}
  return env.get_template(ATOM_TEMPLATE_FILE).render(
    items=[Defaulter(**a) for a in activities],
    host_url=host_url,
    request_url=request_url,
    xml_base=xml_base,
    title=title or 'User feed for ' + source.Source.actor_name(actor),
    updated=activities[0]['object'].get('published', '') if activities else '',
    actor=Defaulter(**actor),
    rels=rels or {},
    )
Exemple #21
0
def render_content(obj, include_location=True, synthesize_content=True):
    """Renders the content of an ActivityStreams object.

  Includes tags, mentions, and non-note/article attachments. (Note/article
  attachments are converted to mf2 children in object_to_json and then rendered
  in json_to_html.)

  Args:
    obj: decoded JSON ActivityStreams object
    include_location: whether to render location, if provided
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns: string, rendered HTML
  """
    content = obj.get('content', '')

    # extract tags. preserve order but de-dupe, ie don't include a tag more than
    # once.
    seen_ids = set()
    mentions = []
    tags = {}  # maps string objectType to list of tag objects
    for t in obj.get('tags', []):
        id = t.get('id')
        if id and id in seen_ids:
            continue
        seen_ids.add(id)

        if 'startIndex' in t and 'length' in t:
            mentions.append(t)
        else:
            tags.setdefault(source.object_type(t), []).append(t)

    # linkify embedded mention tags inside content.
    if mentions:
        mentions.sort(key=lambda t: t['startIndex'])
        last_end = 0
        orig = content
        content = ''
        for tag in mentions:
            start = tag['startIndex']
            end = start + tag['length']
            content += orig[last_end:start]
            content += '<a href="%s">%s</a>' % (tag['url'], orig[start:end])
            last_end = end

        content += orig[last_end:]

    # convert newlines to <br>s
    # do this *after* linkifying tags so we don't have to shuffle indices over
    content = content.replace('\n', '<br />\n')

    # linkify embedded links. ignore the "mention" tags that we added ourselves.
    # TODO: fix the bug in test_linkify_broken() in webutil/util_test.py, then
    # uncomment this.
    # if content:
    #   content = util.linkify(content)

    # attachments, e.g. links (aka articles)
    # TODO: use oEmbed? http://oembed.com/ , http://code.google.com/p/python-oembed/
    attachments = [
        a for a in obj.get('attachments', [])
        if a.get('objectType') not in ('note', 'article')
    ]

    for tag in attachments + tags.pop('article', []):
        name = tag.get('displayName', '')
        open_a_tag = False
        if tag.get('objectType') == 'video':
            video = util.get_first(tag, 'stream') or util.get_first(
                obj, 'stream')
            poster = util.get_first(tag, 'image', {})
            if video and video.get('url'):
                content += '\n<p>%s' % vid(video['url'], poster.get('url'),
                                           'thumbnail')
        else:
            content += '\n<p>'
            url = tag.get('url') or obj.get('url')
            if url:
                content += '\n<a class="link" href="%s">' % url
                open_a_tag = True
            image = util.get_first(tag, 'image') or util.get_first(
                obj, 'image')
            if image and image.get('url'):
                content += '\n' + img(image['url'], 'thumbnail', name)
        if name:
            content += '\n<span class="name">%s</span>' % name
        if open_a_tag:
            content += '\n</a>'
        summary = tag.get('summary')
        if summary and summary != name:
            content += '\n<span class="summary">%s</span>' % summary
        content += '\n</p>'

    # generate share/like contexts if the activity does not have content
    # of its own
    for as_type, verb in [('share', 'Shared'), ('like', 'Likes')]:
        obj_type = source.object_type(obj)
        if (not synthesize_content or obj_type != as_type
                or 'object' not in obj or 'content' in obj):
            continue

        targets = util.get_list(obj, 'object')
        if not targets:
            continue

        for target in targets:
            # sometimes likes don't have enough content to render anything
            # interesting
            if 'url' in target and set(target) <= set(['url', 'objectType']):
                content += '<a href="%s">%s this.</a>' % (target.get('url'),
                                                          verb.lower())

            else:
                author = target.get('author', target.get('actor', {}))
                # special case for twitter RT's
                if obj_type == 'share' and 'url' in obj and re.search(
                        '^https?://(?:www\.|mobile\.)?twitter\.com/',
                        obj.get('url')):
                    content += 'RT <a href="%s">@%s</a> ' % (target.get(
                        'url', '#'), author.get('username'))
                else:
                    # image looks bad in the simplified rendering
                    author = {
                        k: v
                        for k, v in author.iteritems() if k != 'image'
                    }
                    content += '%s <a href="%s">%s</a> by %s' % (
                        verb,
                        target.get('url', '#'),
                        target.get('displayName', target.get(
                            'title', 'a post')),
                        hcard_to_html(
                            object_to_json(author,
                                           default_object_type='person')),
                    )
                content += render_content(
                    target,
                    include_location=include_location,
                    synthesize_content=synthesize_content)
            # only include the first context in the content (if there are
            # others, they'll be included as separate properties)
            break
        break

    # location
    loc = obj.get('location')
    if include_location and loc:
        content += '\n' + hcard_to_html(object_to_json(
            loc, default_object_type='place'),
                                        parent_props=['p-location'])

    # these are rendered manually in json_to_html()
    for type in 'like', 'share', 'react', 'person':
        tags.pop(type, None)

    # render the rest
    content += tags_to_html(tags.pop('hashtag', []), 'p-category')
    content += tags_to_html(tags.pop('mention', []), 'u-mention')
    content += tags_to_html(sum(tags.values(), []), 'tag')

    return content
Exemple #22
0
def render_content(obj, include_location=True, synthesize_content=True,
                   render_attachments=False):
  """Renders the content of an ActivityStreams object as HTML.

  Includes tags, mentions, and non-note/article attachments. (Note/article
  attachments are converted to mf2 children in object_to_json and then rendered
  in json_to_html.)

  Note that the returned HTML is included in Atom as well as HTML documents,
  so it *must* be HTML4 / XHTML, not HTML5! All tags must be closed, etc.

  Args:
    obj: decoded JSON ActivityStreams object
    include_location: whether to render location, if provided
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns:
    string, rendered HTML
  """
  content = obj.get('content', '')

  # extract tags. preserve order but de-dupe, ie don't include a tag more than
  # once.
  seen_ids = set()
  mentions = []
  tags = {}  # maps string objectType to list of tag objects
  for t in obj.get('tags', []):
    id = t.get('id')
    if id and id in seen_ids:
      continue
    seen_ids.add(id)

    if 'startIndex' in t and 'length' in t and 'url' in t:
      mentions.append(t)
    else:
      tags.setdefault(source.object_type(t), []).append(t)

  # linkify embedded mention tags inside content.
  if mentions:
    mentions.sort(key=lambda t: t['startIndex'])
    last_end = 0
    orig = util.WideUnicode(content)
    content = util.WideUnicode('')
    for tag in mentions:
      start = tag['startIndex']
      end = start + tag['length']
      content = util.WideUnicode('%s%s<a href="%s">%s</a>' % (
        content, orig[last_end:start], tag['url'], orig[start:end]))
      last_end = end

    content += orig[last_end:]

  if not obj.get('content_is_html'):
    # convert newlines to <br>s
    # do this *after* linkifying tags so we don't have to shuffle indices over
    content = content.replace('\n', '<br />\n')

  # linkify embedded links. ignore the "mention" tags that we added ourselves.
  # TODO: fix the bug in test_linkify_broken() in webutil/tests/test_util.py, then
  # uncomment this.
  # if content:
  #   content = util.linkify(content)

  # attachments, e.g. links (aka articles)
  # TODO: use oEmbed? http://oembed.com/ , http://code.google.com/p/python-oembed/
  if render_attachments:
    atts = [a for a in obj.get('attachments', [])
            if a.get('objectType') not in ('note', 'article')]
    content += _render_attachments(atts + tags.pop('article', []), obj)

  # generate share/like contexts if the activity does not have content
  # of its own
  obj_type = source.object_type(obj)
  for as_type, verb in (
      ('favorite', 'Favorites'), ('like', 'Likes'), ('share', 'Shared')):
    if (not synthesize_content or obj_type != as_type or 'object' not in obj or
        'content' in obj):
      continue

    targets = get_list(obj, 'object')
    if not targets:
      continue

    for target in targets:
      # sometimes likes don't have enough content to render anything
      # interesting
      if 'url' in target and set(target) <= set(['url', 'objectType']):
        content += '<a href="%s">%s this.</a>' % (
          target.get('url'), verb.lower())

      else:
        author = target.get('author', target.get('actor', {}))
        # special case for twitter RT's
        if obj_type == 'share' and 'url' in obj and re.search(
                '^https?://(?:www\.|mobile\.)?twitter\.com/', obj.get('url')):
          content += 'RT <a href="%s">@%s</a> ' % (
            target.get('url', '#'), author.get('username'))
        else:
          # image looks bad in the simplified rendering
          author = {k: v for k, v in author.items() if k != 'image'}
          content += '%s <a href="%s">%s</a> by %s' % (
            verb, target.get('url', '#'),
            target.get('displayName', target.get('title', 'a post')),
            hcard_to_html(object_to_json(author, default_object_type='person')),
          )
        content += render_content(target, include_location=include_location,
                                  synthesize_content=synthesize_content)
      # only include the first context in the content (if there are
      # others, they'll be included as separate properties)
      break
    break

  if render_attachments and obj.get('verb') == 'share':
    atts = [att for att in itertools.chain.from_iterable(
              o.get('attachments', []) for o in util.get_list(obj, 'object'))
            if att.get('objectType') not in ('note', 'article')]
    content += _render_attachments(atts, obj)

  # location
  loc = obj.get('location')
  if include_location and loc:
    content += '\n<p>%s</p>' % hcard_to_html(
      object_to_json(loc, default_object_type='place'),
      parent_props=['p-location'])

  # these are rendered manually in json_to_html()
  for type in 'like', 'share', 'react', 'person':
    tags.pop(type, None)

  # render the rest
  content += tags_to_html(tags.pop('hashtag', []), 'p-category')
  content += tags_to_html(tags.pop('mention', []), 'u-mention')
  content += tags_to_html(sum(tags.values(), []), 'tag')

  return content
Exemple #23
0
    def _create(self,
                obj,
                preview=None,
                include_link=source.OMIT_LINK,
                ignore_formatting=False):
        """Creates or previews a status (aka toot), reply, boost (aka reblog), or favorite.

    https://docs.joinmastodon.org/api/rest/statuses/

    Based on :meth:`Twitter._create`.

    Args:
      obj: ActivityStreams object
      preview: boolean
      include_link: string
      ignore_formatting: boolean

    Returns: CreationResult. If preview is True, the content will be a unicode
      string HTML snippet. If False, it will be a dict with 'id' and 'url' keys
      for the newly created object.
    """
        assert preview in (False, True)
        type = obj.get('objectType')
        verb = obj.get('verb')

        base_obj = self.base_object(obj)
        base_id = base_obj.get('id')
        base_url = base_obj.get('url')

        is_reply = type == 'comment' or obj.get('inReplyTo')
        is_rsvp = (verb and verb.startswith('rsvp-')) or verb == 'invite'
        atts = obj.get('attachments', [])
        images = util.dedupe_urls(
            util.get_list(obj, 'image') +
            [a for a in atts if a.get('objectType') == 'image'])
        videos = util.dedupe_urls(
            [obj] + [a for a in atts if a.get('objectType') == 'video'],
            key='stream')
        has_media = (images or videos) and (type in ('note', 'article')
                                            or is_reply)

        # prefer displayName over content for articles
        #
        # TODO: handle activities as well as objects? ie pull out ['object'] here if
        # necessary?
        type = obj.get('objectType')
        prefer_content = type == 'note' or (base_url and is_reply)
        preview_description = ''
        content = self._content_for_create(obj,
                                           ignore_formatting=ignore_formatting,
                                           prefer_name=not prefer_content)

        if not content:
            if type == 'activity' and not is_rsvp:
                content = verb
            elif has_media:
                content = ''
            else:
                return source.creation_result(
                    abort=False,  # keep looking for things to publish,
                    error_plain='No content text found.',
                    error_html='No content text found.')

        post_label = '%s %s' % (self.NAME, self.TYPE_LABELS['post'])
        if is_reply and not base_url:
            return source.creation_result(
                abort=True,
                error_plain='Could not find a %s to reply to.' % post_label,
                error_html=
                'Could not find a %s to <a href="http://indiewebcamp.com/reply">reply to</a>. Check that your post has the right <a href="http://indiewebcamp.com/comment">in-reply-to</a> link.'
                % post_label)

        # truncate and ellipsize content if necessary
        # TODO: don't count domains in remote mentions.
        # https://docs.joinmastodon.org/usage/basics/#text
        content = self.truncate(content, obj.get('url'), include_link, type)

        # linkify user mentions
        def linkify_mention(match):
            split = match.group(1).split('@')
            username = split[0]
            instance = ('https://' +
                        split[1]) if len(split) > 1 else self.instance
            url = urllib.parse.urljoin(instance, '/@' + username)
            return '<a href="%s">@%s</a>' % (url, username)

        preview_content = MENTION_RE.sub(linkify_mention, content)

        # linkify (defaults to twitter's behavior)
        preview_content = util.linkify(preview_content,
                                       pretty=True,
                                       skip_bare_cc_tlds=True)
        tags_url = urllib.parse.urljoin(self.instance, '/tags')
        preview_content = HASHTAG_RE.sub(
            r'\1<a href="%s/\2">#\2</a>' % tags_url, preview_content)

        # switch on activity type
        if type == 'activity' and verb == 'like':
            if not base_url:
                return source.creation_result(
                    abort=True,
                    error_plain='Could not find a %s to %s.' %
                    (post_label, self.TYPE_LABELS['like']),
                    error_html=
                    'Could not find a %s to <a href="http://indiewebcamp.com/like">%s</a>. Check that your post has the right <a href="http://indiewebcamp.com/like">u-like-of link</a>.'
                    % (post_label, self.TYPE_LABELS['like']))

            if preview:
                preview_description += '<span class="verb">%s</span> <a href="%s">this %s</a>: %s' % (
                    self.TYPE_LABELS['like'], base_url,
                    self.TYPE_LABELS['post'], self.embed_post(base_obj))
                return source.creation_result(description=preview_description)
            else:
                resp = self._post(API_FAVORITE % base_id)
                resp['type'] = 'like'

        elif type == 'activity' and verb == 'share':
            if not base_url:
                return source.creation_result(
                    abort=True,
                    error_plain='Could not find a %s to %s.' %
                    (post_label, self.TYPE_LABELS['repost']),
                    error_html=
                    'Could not find a %s to <a href="http://indiewebcamp.com/repost">%s</a>. Check that your post has the right <a href="http://indiewebcamp.com/repost">repost-of</a> link.'
                    % (post_label, self.TYPE_LABELS['repost']))

            if preview:
                preview_description += '<span class="verb">%s</span> <a href="%s">this %s</a>: %s' % (
                    self.TYPE_LABELS['repost'], base_url,
                    self.TYPE_LABELS['post'], self.embed_post(base_obj))
                return source.creation_result(description=preview_description)
            else:
                resp = self._post(API_REBLOG % base_id)
                resp['type'] = 'repost'

        elif type in ('note', 'article') or is_reply or is_rsvp:  # a post
            data = {'status': content}

            if is_reply:
                preview_description += 'add a <span class="verb">%s</span> to <a href="%s">this %s</a>: %s' % (
                    self.TYPE_LABELS['comment'], base_url,
                    self.TYPE_LABELS['post'], self.embed_post(base_obj))
                data['in_reply_to_id'] = base_id
            else:
                preview_description += '<span class="verb">%s</span>:' % self.TYPE_LABELS[
                    'post']

            num_media = len(videos) + len(images)
            if num_media > MAX_MEDIA:
                videos = videos[:MAX_MEDIA]
                images = images[:max(MAX_MEDIA - len(videos), 0)]
                logging.warning('Found %d media! Only using the first %d: %r',
                                num_media, MAX_MEDIA, videos + images)

            if preview:
                media_previews = [
                    '<video controls src="%s"><a href="%s">%s</a></video>' %
                    (util.get_url(vid, key='stream'),
                     util.get_url(vid, key='stream'), vid.get('displayName')
                     or 'this video') for vid in videos
                ] + [
                    '<img src="%s" alt="%s" />' %
                    (util.get_url(img), img.get('displayName') or '')
                    for img in images
                ]
                if media_previews:
                    preview_content += '<br /><br />' + ' &nbsp; '.join(
                        media_previews)
                return source.creation_result(content=preview_content,
                                              description=preview_description)

            else:
                ids = self.upload_media(videos + images)
                if ids:
                    data['media_ids'] = ids
                resp = self._post(API_STATUSES, json=data)

        else:
            return source.creation_result(
                abort=False,
                error_plain='Cannot publish type=%s, verb=%s to Mastodon' %
                (type, verb),
                error_html='Cannot publish type=%s, verb=%s to Mastodon' %
                (type, verb))

        if 'url' not in resp:
            resp['url'] = base_url

        return source.creation_result(resp)
Exemple #24
0
def _prepare_activity(a, reader=True):
    """Preprocesses an activity to prepare it to be rendered as Atom.

  Modifies a in place.

  Args:
    a: ActivityStreams 1 activity dict
    reader: boolean, whether the output will be rendered in a feed reader.
      Currently just includes location if True, not otherwise.
  """
    act_type = source.object_type(a)
    if not act_type or act_type == 'post':
        primary = a.get('object', {})
    else:
        primary = a
    obj = a.setdefault('object', {})

    # Render content as HTML; escape &s
    obj['rendered_content'] = _encode_ampersands(
        microformats2.render_content(primary,
                                     include_location=reader,
                                     render_attachments=True))

    # Make sure every activity has the title field, since Atom <entry> requires
    # the title element.
    if not a.get('title'):
        a['title'] = util.ellipsize(
            _encode_ampersands(
                a.get('displayName') or a.get('content') or obj.get('title')
                or obj.get('displayName') or obj.get('content') or 'Untitled'))

    # strip HTML tags. the Atom spec says title is plain text:
    # http://atomenabled.org/developers/syndication/#requiredEntryElements
    a['title'] = xml.sax.saxutils.escape(
        BeautifulSoup(a['title']).get_text(''))

    children = []
    image_urls_seen = set()
    image_atts = []

    # normalize attachments, render attached notes/articles
    attachments = a.get('attachments') or obj.get('attachments') or []
    for att in attachments:
        att['stream'] = util.get_first(att, 'stream')
        type = att.get('objectType')

        if type == 'image':
            image_atts.append(util.get_first(att, 'image'))
            continue

        image_urls_seen |= set(util.get_urls(att, 'image'))
        if type in ('note', 'article'):
            html = microformats2.render_content(att,
                                                include_location=reader,
                                                render_attachments=True)
            author = att.get('author')
            if author:
                name = microformats2.maybe_linked_name(
                    microformats2.object_to_json(author).get('properties', []))
                html = '%s: %s' % (name.strip(), html)
            children.append(html)

    # render image(s) that we haven't already seen
    for image in image_atts + util.get_list(obj, 'image'):
        if not image:
            continue
        url = image.get('url')
        parsed = urllib.parse.urlparse(url)
        rest = urllib.parse.urlunparse(('', '') + parsed[2:])
        img_src_re = re.compile(
            r"""src *= *['"] *((https?:)?//%s)?%s *['"]""" %
            (re.escape(parsed.netloc), re.escape(rest)))
        if (url and url not in image_urls_seen
                and not img_src_re.search(obj['rendered_content'])):
            children.append(microformats2.img(url))
            image_urls_seen.add(url)

    obj['rendered_children'] = [
        _encode_ampersands(child) for child in children
    ]

    # make sure published and updated are strict RFC 3339 timestamps
    for prop in 'published', 'updated':
        val = obj.get(prop)
        if val:
            obj[prop] = util.maybe_iso8601_to_rfc3339(val)
            # Atom timestamps are even stricter than RFC 3339: they can't be naive ie
            # time zone unaware. They must have either an offset or the Z suffix.
            # https://www.feedvalidator.org/docs/error/InvalidRFC3339Date.html
            if not util.TIMEZONE_OFFSET_RE.search(obj[prop]):
                obj[prop] += 'Z'
Exemple #25
0
def object_to_json(obj, trim_nulls=True, entry_class='h-entry',
                   default_object_type=None, synthesize_content=True):
  """Converts an ActivityStreams object to microformats2 JSON.

  Args:
    obj: dict, a decoded JSON ActivityStreams object
    trim_nulls: boolean, whether to remove elements with null or empty values
    entry_class: string or sequence, the mf2 class(es) that entries should be
      given (e.g. 'h-cite' when parsing a reference to a foreign entry).
      defaults to 'h-entry'
    default_object_type: string, the ActivityStreams objectType to use if one
      is not present. defaults to None
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns:
    dict, decoded microformats2 JSON
  """
  if not obj or not isinstance(obj, dict):
    return {}

  obj_type = source.object_type(obj) or default_object_type
  # if the activity type is a post, then it's really just a conduit
  # for the object. for other verbs, the activity itself is the
  # interesting thing
  if obj_type == 'post':
    primary = obj.get('object', {})
    obj_type = source.object_type(primary) or default_object_type
  else:
    primary = obj

  # TODO: extract snippet
  name = primary.get('displayName', primary.get('title'))
  summary = primary.get('summary')
  author = obj.get('author', obj.get('actor', {}))

  in_reply_tos = obj.get('inReplyTo') or []
  if not in_reply_tos:
    context = obj.get('context')
    if context and isinstance(context, dict):
      in_reply_tos = context.get('inReplyTo') or []

  is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe')
  if (is_rsvp or obj_type == 'react') and obj.get('object'):
    objs = obj['object']
    in_reply_tos.extend(objs if isinstance(objs, list) else [objs])

  # maps objectType to list of objects
  attachments = defaultdict(list)
  for prop in 'attachments', 'tags':
    for elem in get_list(primary, prop):
      attachments[elem.get('objectType')].append(elem)

  # prefer duration and size from object's stream, then first video, then first
  # audio
  stream = {}
  for candidate in [obj] + attachments['video'] + attachments['audio']:
    for stream in get_list(candidate, 'stream'):
      if stream:
        break

  duration = stream.get('duration')
  if duration is not None:
    if util.is_int(duration):
      duration = str(duration)
    else:
      logging('Ignoring duration %r; expected int, got %s', duration.__class__)
      duration = None

  sizes = []
  size = stream.get('size')
  if size:
    sizes = [str(size)]

  # construct mf2!
  ret = {
    'type': (AS_TO_MF2_TYPE.get(obj_type) or
             [entry_class] if isinstance(entry_class, str)
             else list(entry_class)),
    'properties': {
      'uid': [obj.get('id') or ''],
      'numeric-id': [obj.get('numeric_id') or ''],
      'name': [name],
      'nickname': [obj.get('username') or ''],
      'summary': [summary],
      'url': (list(object_urls(obj) or object_urls(primary)) +
              obj.get('upstreamDuplicates', [])),
      # photo is special cased below, to handle alt
      'video': dedupe_urls(get_urls(attachments, 'video', 'stream') +
                           get_urls(primary, 'stream')),
      'audio': get_urls(attachments, 'audio', 'stream'),
      'duration': [duration],
      'size': sizes,
      'published': [obj.get('published', primary.get('published', ''))],
      'updated': [obj.get('updated', primary.get('updated', ''))],
      'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]),
      'author': [object_to_json(
        author, trim_nulls=False, default_object_type='person')],
      'location': [object_to_json(
        primary.get('location', {}), trim_nulls=False,
        default_object_type='place')],
      'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite')
                  for c in obj.get('replies', {}).get('items', [])],
      'start': [primary.get('startTime')],
      'end': [primary.get('endTime')],
    },
    'children': (
      # silly hack: i haven't found anywhere in AS1 or AS2 to indicate that
      # something is being "quoted," like in a quote tweet, so i cheat and use
      # extra knowledge here that quoted tweets are converted to note
      # attachments, but URLs in the tweet text are converted to article tags.
      [object_to_json(a, trim_nulls=False, entry_class=['u-quotation-of', 'h-cite'])
       for a in attachments['note'] if 'startIndex' not in a] +
      [object_to_json(a, trim_nulls=False, entry_class=['h-cite'])
       for a in attachments['article'] if 'startIndex' not in a])
  }

  # content. emulate e- vs p- microformats2 parsing: e- if there are HTML tags,
  # otherwise p-.
  # https://indiewebcamp.com/note#Indieweb_whitespace_thinking
  text = xml.sax.saxutils.unescape(primary.get('content', ''))
  html = render_content(primary, include_location=False,
                        synthesize_content=synthesize_content)
  if '<' in html:
    ret['properties']['content'] = [{'value': text, 'html': html}]
  else:
    ret['properties']['content'] = [text]

  # photos, including alt text
  photo_urls = set()
  ret['properties']['photo'] = []
  for image in get_list(attachments, 'image') + [primary]:
    for url in get_urls(image, 'image'):
      if url and url not in photo_urls:
        photo_urls.add(url)
        name = get_first(image, 'image', {}).get('displayName')
        ret['properties']['photo'].append({'value': url, 'alt': name} if name else url)

  # hashtags and person tags
  if obj_type == 'tag':
    ret['properties']['tag-of'] = util.get_urls(obj, 'target')

  tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', [])
  if not tags and obj_type == 'tag':
    tags = util.get_list(obj, 'object')
  ret['properties']['category'] = []
  for tag in tags:
    if tag.get('objectType') == 'person':
      ret['properties']['category'].append(
        object_to_json(tag, entry_class='u-category h-card'))
    elif tag.get('objectType') == 'hashtag' or obj_type == 'tag':
      name = tag.get('displayName')
      if name:
        ret['properties']['category'].append(name)

  # rsvp
  if is_rsvp:
    ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]]
  elif obj_type == 'invite':
    invitee = object_to_json(obj.get('object'), trim_nulls=False,
                             default_object_type='person')
    ret['properties']['invitee'] = [invitee]

  # like and repost mentions
  for type, prop in (
      ('favorite', 'like'),
      ('follow', 'follow'),
      ('like', 'like'),
      ('share', 'repost'),
  ):
    if obj_type == type:
      # The ActivityStreams spec says the object property should always be a
      # single object, but it's useful to let it be a list, e.g. when a like has
      # multiple targets, e.g. a like of a post with original post URLs in it,
      # which brid.gy does.
      objs = get_list(obj, 'object')
      ret['properties'][prop + '-of'] = [
        # flatten contexts that are just a url
        o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType'])
        else object_to_json(o, trim_nulls=False, entry_class='h-cite')
        for o in objs]
    else:
      # received likes and reposts
      ret['properties'][prop] = [
        object_to_json(t, trim_nulls=False, entry_class='h-cite')
        for t in tags if source.object_type(t) == type]

  # latitude & longitude
  lat = long = None
  position = ISO_6709_RE.match(primary.get('position') or '')
  if position:
    lat, long = position.groups()
  if not lat:
    lat = primary.get('latitude')
  if not long:
    long = primary.get('longitude')

  if lat:
    ret['properties']['latitude'] = [str(lat)]
  if long:
    ret['properties']['longitude'] = [str(long)]

  if trim_nulls:
    ret = util.trim_nulls(ret)
  return ret
Exemple #26
0
def render_content(obj, include_location=True, synthesize_content=True):
  """Renders the content of an ActivityStreams object.

  Includes tags, mentions, and non-note/article attachments. (Note/article
  attachments are converted to mf2 children in object_to_json and then rendered
  in json_to_html.)

  Args:
    obj: decoded JSON ActivityStreams object
    include_location: whether to render location, if provided
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns:
    string, rendered HTML
  """
  content = obj.get('content', '')

  # extract tags. preserve order but de-dupe, ie don't include a tag more than
  # once.
  seen_ids = set()
  mentions = []
  tags = {}  # maps string objectType to list of tag objects
  for t in obj.get('tags', []):
    id = t.get('id')
    if id and id in seen_ids:
      continue
    seen_ids.add(id)

    if 'startIndex' in t and 'length' in t:
      mentions.append(t)
    else:
      tags.setdefault(source.object_type(t), []).append(t)

  # linkify embedded mention tags inside content.
  if mentions:
    mentions.sort(key=lambda t: t['startIndex'])
    last_end = 0
    orig = content
    content = ''
    for tag in mentions:
      start = tag['startIndex']
      end = start + tag['length']
      content += orig[last_end:start]
      content += '<a href="%s">%s</a>' % (tag['url'], orig[start:end])
      last_end = end

    content += orig[last_end:]

  # convert newlines to <br>s
  # do this *after* linkifying tags so we don't have to shuffle indices over
  content = content.replace('\n', '<br />\n')

  # linkify embedded links. ignore the "mention" tags that we added ourselves.
  # TODO: fix the bug in test_linkify_broken() in webutil/util_test.py, then
  # uncomment this.
  # if content:
  #   content = util.linkify(content)

  # attachments, e.g. links (aka articles)
  # TODO: use oEmbed? http://oembed.com/ , http://code.google.com/p/python-oembed/
  attachments = [a for a in obj.get('attachments', [])
                 if a.get('objectType') not in ('note', 'article')]

  for tag in attachments + tags.pop('article', []):
    name = tag.get('displayName', '')
    open_a_tag = False
    if tag.get('objectType') == 'video':
      video = util.get_first(tag, 'stream') or util.get_first(obj, 'stream')
      poster = util.get_first(tag, 'image', {})
      if video and video.get('url'):
        content += '\n<p>%s' % vid(video['url'], poster.get('url'), 'thumbnail')
    else:
      content += '\n<p>'
      url = tag.get('url') or obj.get('url')
      if url:
        content += '\n<a class="link" href="%s">' % url
        open_a_tag = True
      image = util.get_first(tag, 'image') or util.get_first(obj, 'image')
      if image and image.get('url'):
        content += '\n' + img(image['url'], 'thumbnail', name)
    if name:
      content += '\n<span class="name">%s</span>' % name
    if open_a_tag:
      content += '\n</a>'
    summary = tag.get('summary')
    if summary and summary != name:
      content += '\n<span class="summary">%s</span>' % summary
    content += '\n</p>'

  # generate share/like contexts if the activity does not have content
  # of its own
  for as_type, verb in [('share', 'Shared'), ('like', 'Likes')]:
    obj_type = source.object_type(obj)
    if (not synthesize_content or obj_type != as_type or 'object' not in obj or
        'content' in obj):
      continue

    targets = util.get_list(obj, 'object')
    if not targets:
      continue

    for target in targets:
      # sometimes likes don't have enough content to render anything
      # interesting
      if 'url' in target and set(target) <= set(['url', 'objectType']):
        content += '<a href="%s">%s this.</a>' % (
          target.get('url'), verb.lower())

      else:
        author = target.get('author', target.get('actor', {}))
        # special case for twitter RT's
        if obj_type == 'share' and 'url' in obj and re.search(
                '^https?://(?:www\.|mobile\.)?twitter\.com/', obj.get('url')):
          content += 'RT <a href="%s">@%s</a> ' % (
            target.get('url', '#'), author.get('username'))
        else:
          # image looks bad in the simplified rendering
          author = {k: v for k, v in author.iteritems() if k != 'image'}
          content += '%s <a href="%s">%s</a> by %s' % (
            verb, target.get('url', '#'),
            target.get('displayName', target.get('title', 'a post')),
            hcard_to_html(object_to_json(author, default_object_type='person')),
          )
        content += render_content(target, include_location=include_location,
                                  synthesize_content=synthesize_content)
      # only include the first context in the content (if there are
      # others, they'll be included as separate properties)
      break
    break

  # location
  loc = obj.get('location')
  if include_location and loc:
    content += '\n' + hcard_to_html(
      object_to_json(loc, default_object_type='place'),
      parent_props=['p-location'])

  # these are rendered manually in json_to_html()
  for type in 'like', 'share', 'react', 'person':
    tags.pop(type, None)

  # render the rest
  content += tags_to_html(tags.pop('hashtag', []), 'p-category')
  content += tags_to_html(tags.pop('mention', []), 'u-mention')
  content += tags_to_html(sum(tags.values(), []), 'tag')

  return content
Exemple #27
0
def to_as1(obj, use_type=True):
  """Converts an ActivityStreams 2 activity or object to ActivityStreams 1.

  Args:
    obj: dict, AS2 activity or object
    use_type: boolean, whether to include objectType and verb

  Returns: dict, AS1 activity or object
  """
  if not obj:
    return {}
  elif isinstance(obj, basestring):
    return {'url': obj}
  elif not isinstance(obj, dict):
    raise ValueError('Expected dict, got %r' % obj)

  obj = copy.deepcopy(obj)

  obj.pop('@context', None)

  type = obj.pop('type', None)
  if use_type:
    obj['objectType'] = TYPE_TO_OBJECT_TYPE.get(type)
    obj['verb'] = TYPE_TO_VERB.get(type)
    if obj.get('inReplyTo') and obj['objectType'] in ('note', 'article'):
      obj['objectType'] = 'comment'
    elif obj['verb'] and not obj['objectType']:
      obj['objectType'] = 'activity'

  def url_or_as1(val):
    return {'url': val} if isinstance(val, basestring) else to_as1(val)

  def all_to_as1(field):
    return [to_as1(elem) for elem in util.pop_list(obj, field)]

  images = []
  # icon first since e.g. Mastodon uses icon for profile picture,
  # image for featured photo.
  for as2_img in util.pop_list(obj, 'icon') + util.pop_list(obj, 'image'):
    as1_img = to_as1(as2_img, use_type=False)
    if as1_img not in images:
      images.append(as1_img)

  inner_objs = all_to_as1('object')
  if len(inner_objs) == 1:
    inner_objs = inner_objs[0]

  obj.update({
    'displayName': obj.pop('name', None),
    'actor': to_as1(obj.get('actor')),
    'attachments': all_to_as1('attachment'),
    'image': images,
    'inReplyTo': [url_or_as1(orig) for orig in util.get_list(obj, 'inReplyTo')],
    'location': url_or_as1(obj.get('location')),
    'object': inner_objs,
    'tags': all_to_as1('tag'),
  })

  if type in ('Audio', 'Video'):
    obj['stream'] = {'url': obj.pop('url', None)}
  elif type == 'Mention':
    obj['url'] = obj.pop('href', None)

  attrib = util.pop_list(obj, 'attributedTo')
  if attrib:
    if len(attrib) > 1:
      logging.warning('ActivityStreams 1 only supports single author; '
                      'dropping extra attributedTo values: %s' % attrib[1:])
    obj['author'] = to_as1(attrib[0])

  return util.trim_nulls(obj)
Exemple #28
0
def object_to_json(obj,
                   trim_nulls=True,
                   entry_class='h-entry',
                   default_object_type=None,
                   synthesize_content=True):
    """Converts an ActivityStreams object to microformats2 JSON.

  Args:
    obj: dict, a decoded JSON ActivityStreams object
    trim_nulls: boolean, whether to remove elements with null or empty values
    entry_class: string, the mf2 class that entries should be given (e.g.
      'h-cite' when parsing a reference to a foreign entry). defaults to
      'h-entry'
    default_object_type: string, the ActivityStreams objectType to use if one
      is not present. defaults to None
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns: dict, decoded microformats2 JSON
  """
    if not obj:
        return {}

    obj_type = source.object_type(obj) or default_object_type
    # if the activity type is a post, then it's really just a conduit
    # for the object. for other verbs, the activity itself is the
    # interesting thing
    if obj_type == 'post':
        primary = obj.get('object', {})
        obj_type = source.object_type(primary) or default_object_type
    else:
        primary = obj

    # TODO: extract snippet
    name = primary.get('displayName', primary.get('title'))
    summary = primary.get('summary')
    author = obj.get('author', obj.get('actor', {}))

    in_reply_tos = obj.get('inReplyTo',
                           obj.get('context', {}).get('inReplyTo', []))
    is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe')
    if (is_rsvp or obj_type == 'react') and obj.get('object'):
        objs = obj['object']
        in_reply_tos.extend(objs if isinstance(objs, list) else [objs])

    # TODO: more tags. most will be p-category?
    ret = {
        'type':
        (['h-card'] if obj_type == 'person' else
         ['h-card', 'p-location'] if obj_type == 'place' else [entry_class]),
        'properties': {
            'uid': [obj.get('id', '')],
            'name': [name],
            'summary': [summary],
            'url': (list(object_urls(obj) or object_urls(primary)) +
                    obj.get('upstreamDuplicates', [])),
            'photo': [
                image.get('url')
                for image in (util.get_list(obj, 'image')
                              or util.get_list(primary, 'image'))
            ],
            'video': [obj.get('stream', primary.get('stream', {})).get('url')],
            'published': [obj.get('published', primary.get('published', ''))],
            'updated': [obj.get('updated', primary.get('updated', ''))],
            'content': [{
                'value':
                xml.sax.saxutils.unescape(primary.get('content', '')),
                'html':
                render_content(primary,
                               include_location=False,
                               synthesize_content=synthesize_content),
            }],
            'in-reply-to':
            util.trim_nulls([o.get('url') for o in in_reply_tos]),
            'author': [
                object_to_json(author,
                               trim_nulls=False,
                               default_object_type='person')
            ],
            'location': [
                object_to_json(primary.get('location', {}),
                               trim_nulls=False,
                               default_object_type='place')
            ],
            'latitude':
            primary.get('latitude'),
            'longitude':
            primary.get('longitude'),
            'comment': [
                object_to_json(c, trim_nulls=False, entry_class='h-cite')
                for c in obj.get('replies', {}).get('items', [])
            ],
        },
        'children': [
            object_to_json(c, trim_nulls=False, entry_class='h-cite')
            for c in primary.get('attachments', [])
            if c.get('objectType') in ('note', 'article')
        ],
    }

    # hashtags and person tags
    tags = obj.get('tags', []) or util.get_first(obj, 'object', {}).get(
        'tags', [])
    ret['properties']['category'] = []
    for tag in tags:
        if tag.get('objectType') == 'person':
            cls = 'u-category h-card'
        elif tag.get('objectType') == 'hashtag':
            cls = 'u-category'
        else:
            continue
        ret['properties']['category'].append(
            object_to_json(tag, entry_class=cls))

    # rsvp
    if is_rsvp:
        ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]]
    elif obj_type == 'invite':
        invitee = object_to_json(obj.get('object'),
                                 trim_nulls=False,
                                 default_object_type='person')
        ret['properties']['invitee'] = [invitee]

    # like and repost mentions
    for type, prop in ('like', 'like'), ('share', 'repost'):
        if obj_type == type:
            # The ActivityStreams spec says the object property should always be a
            # single object, but it's useful to let it be a list, e.g. when a like has
            # multiple targets, e.g. a like of a post with original post URLs in it,
            # which brid.gy does.
            objs = util.get_list(obj, 'object')
            ret['properties'][prop + '-of'] = [
                # flatten contexts that are just a url
                o['url']
                if 'url' in o and set(o.keys()) <= set(['url', 'objectType'])
                else object_to_json(o, trim_nulls=False, entry_class='h-cite')
                for o in objs
            ]
        else:
            # received likes and reposts
            ret['properties'][prop] = [
                object_to_json(t, trim_nulls=False, entry_class='h-cite')
                for t in tags if source.object_type(t) == type
            ]

    if trim_nulls:
        ret = util.trim_nulls(ret)
    return ret
Exemple #29
0
def send_webmentions(activity_wrapped, proxy=None, **response_props):
    """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery.
    Args:
      activity_wrapped: dict, AS1 activity
      response_props: passed through to the newly created Responses
    """
    activity = redirect_unwrap(activity_wrapped)

    verb = activity.get('verb')
    if verb and verb not in SUPPORTED_VERBS:
        error(f'{verb} activities are not supported yet.')

    # extract source and targets
    source = activity.get('url') or activity.get('id')
    obj = activity.get('object')
    obj_url = util.get_url(obj)

    targets = util.get_list(activity, 'inReplyTo')
    if isinstance(obj, dict):
        if not source or verb in ('create', 'post', 'update'):
            source = obj_url or obj.get('id')
        targets.extend(util.get_list(obj, 'inReplyTo'))

    tags = util.get_list(activity_wrapped, 'tags')
    obj_wrapped = activity_wrapped.get('object')
    if isinstance(obj_wrapped, dict):
        tags.extend(util.get_list(obj_wrapped, 'tags'))
    for tag in tags:
        if tag.get('objectType') == 'mention':
            url = tag.get('url')
            if url and url.startswith(request.host_url):
                targets.append(redirect_unwrap(url))

    if verb in ('follow', 'like', 'share'):
        targets.append(obj_url)

    targets = util.dedupe_urls(util.get_url(t) for t in targets)
    if not source:
        error("Couldn't find original post URL")
    if not targets:
        error(
            "Couldn't find any target URLs in inReplyTo, object, or mention tags"
        )

    # send webmentions and store Responses
    errors = []  # stores (code, body) tuples
    for target in targets:
        if util.domain_from_link(target) == util.domain_from_link(source):
            logging.info(
                f'Skipping same-domain webmention from {source} to {target}')
            continue

        response = Response(source=source,
                            target=target,
                            direction='in',
                            **response_props)
        response.put()
        wm_source = (response.proxy_url() if
                     verb in ('follow', 'like', 'share') or proxy else source)
        logging.info(f'Sending webmention from {wm_source} to {target}')

        try:
            endpoint = webmention.discover(target, headers=HEADERS).endpoint
            if endpoint:
                webmention.send(endpoint, wm_source, target, headers=HEADERS)
                response.status = 'complete'
                logging.info('Success!')
            else:
                response.status = 'ignored'
                logging.info('Ignoring.')
        except BaseException as e:
            errors.append(util.interpret_http_exception(e))
        response.put()

    if errors:
        msg = 'Errors: ' + ', '.join(f'{code} {body}' for code, body in errors)
        error(msg, status=int(errors[0][0] or 502))
Exemple #30
0
def postprocess_as2(activity, target=None, key=None):
    """Prepare an AS2 object to be served or sent via ActivityPub.

    Args:
      activity: dict, AS2 object or activity
      target: dict, AS2 object, optional. The target of activity's inReplyTo or
        Like/Announce/etc object, if any.
      key: :class:`models.MagicKey`, optional. populated into publicKey field
        if provided.
    """
    type = activity.get('type')

    # actor objects
    if type == 'Person':
        postprocess_as2_actor(activity)
        if not activity.get('publicKey'):
            # underspecified, inferred from this issue and Mastodon's implementation:
            # https://github.com/w3c/activitypub/issues/203#issuecomment-297553229
            # https://github.com/tootsuite/mastodon/blob/bc2c263504e584e154384ecc2d804aeb1afb1ba3/app/services/activitypub/process_account_service.rb#L77
            activity.update({
                'publicKey': {
                    'id': activity.get('preferredUsername'),
                    'publicKeyPem': key.public_pem().decode(),
                },
                '@context': (util.get_list(activity, '@context') +
                             ['https://w3id.org/security/v1']),
            })
        return activity

    for actor in (util.get_list(activity, 'attributedTo') +
                  util.get_list(activity, 'actor')):
        postprocess_as2_actor(actor)

    # inReplyTo: singly valued, prefer id over url
    target_id = target.get('id') if target else None
    in_reply_to = activity.get('inReplyTo')
    if in_reply_to:
        if target_id:
            activity['inReplyTo'] = target_id
        elif isinstance(in_reply_to, list):
            if len(in_reply_to) > 1:
                logging.warning("AS2 doesn't support multiple inReplyTo URLs! "
                                'Only using the first: %s' % in_reply_to[0])
            activity['inReplyTo'] = in_reply_to[0]

        # Mastodon evidently requires a Mention tag for replies to generate a
        # notification to the original post's author. not required for likes,
        # reposts, etc. details:
        # https://github.com/snarfed/bridgy-fed/issues/34
        if target:
            for to in (util.get_list(target, 'attributedTo') +
                       util.get_list(target, 'actor')):
                if isinstance(to, dict):
                    to = to.get('url') or to.get('id')
                if to:
                    activity.setdefault('tag', []).append({
                        'type': 'Mention',
                        'href': to,
                    })

    # activity objects (for Like, Announce, etc): prefer id over url
    obj = activity.get('object')
    if obj:
        if isinstance(obj, dict) and not obj.get('id'):
            obj['id'] = target_id or obj.get('url')
        elif target_id and obj != target_id:
            activity['object'] = target_id

    # id is required for most things. default to url if it's not set.
    if not activity.get('id'):
        activity['id'] = activity.get('url')

    # TODO: find a better way to check this, sometimes or always?
    # removed for now since it fires on posts without u-id or u-url, eg
    # https://chrisbeckstrom.com/2018/12/27/32551/
    # assert activity.get('id') or (isinstance(obj, dict) and obj.get('id'))

    activity['id'] = redirect_wrap(activity.get('id'))
    activity['url'] = redirect_wrap(activity.get('url'))

    # copy image(s) into attachment(s). may be Mastodon-specific.
    # https://github.com/snarfed/bridgy-fed/issues/33#issuecomment-440965618
    obj_or_activity = obj if isinstance(obj, dict) else activity
    obj_or_activity.setdefault('attachment',
                               []).extend(obj_or_activity.get('image', []))

    # cc public and target's author(s) and recipients
    # https://www.w3.org/TR/activitystreams-vocabulary/#audienceTargeting
    # https://w3c.github.io/activitypub/#delivery
    if type in as2.TYPE_TO_VERB or type in ('Article', 'Note'):
        recips = [AS2_PUBLIC_AUDIENCE]
        if target:
            recips += itertools.chain(*(util.get_list(target, field)
                                        for field in ('actor', 'attributedTo',
                                                      'to', 'cc')))
        activity['cc'] = util.dedupe_urls(
            util.get_url(recip) or recip.get('id') for recip in recips)

    # wrap articles and notes in a Create activity
    if type in ('Article', 'Note'):
        activity = {
            '@context': as2.CONTEXT,
            'type': 'Create',
            'id': f'{activity["id"]}#bridgy-fed-create',
            'object': activity,
        }

    return util.trim_nulls(activity)
Exemple #31
0
def activities_to_atom(activities, actor, title=None, request_url=None,
                       host_url=None, xml_base=None, rels=None):
  """Converts ActivityStreams activites to an Atom feed.

  Args:
    activities: list of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed <title> element. Defaults to 'User feed for [NAME]'
    request_url: the URL of this Atom feed, if any. Used in a link rel="self".
    host_url: the home URL for this Atom feed, if any. Used in the top-level
      feed <id> element.
    xml_base: the base URL, if any. Used in the top-level xml:base attribute.
    rels: rel links to include. dict mapping string rel value to string URL.

  Returns:
    unicode string with Atom XML
  """
  # Strip query params from URLs so that we don't include access tokens, etc
  host_url = (_remove_query_params(host_url) if host_url
              else 'https://github.com/snarfed/granary')
  if request_url is None:
    request_url = host_url

  for a in activities:
    act_type = source.object_type(a)
    if not act_type or act_type == 'post':
      primary = a.get('object', {})
    else:
      primary = a
    obj = a.setdefault('object', {})

    # Render content as HTML; escape &s
    obj['rendered_content'] = _encode_ampersands(microformats2.render_content(primary))

    # Make sure every activity has the title field, since Atom <entry> requires
    # the title element.
    if not a.get('title'):
      a['title'] = util.ellipsize(_encode_ampersands(
        a.get('displayName') or a.get('content') or obj.get('title') or
        obj.get('displayName') or obj.get('content') or 'Untitled'))

    # strip HTML tags. the Atom spec says title is plain text:
    # http://atomenabled.org/developers/syndication/#requiredEntryElements
    a['title'] = xml.sax.saxutils.escape(source.strip_html_tags(a['title']))

    # Normalize attachments.image to always be a list.
    attachments = a.get('attachments') or obj.get('attachments') or []
    for att in attachments:
      att['image'] = util.get_list(att, 'image')

    obj['rendered_children'] = [
      _encode_ampersands(microformats2.render_content(att))
      for att in attachments if att.get('objectType') in ('note', 'article')]

  # Emulate Django template behavior that returns a special default value that
  # can continue to be referenced when an attribute or item lookup fails. Helps
  # avoid conditionals in the template itself.
  # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables
  class Defaulter(collections.defaultdict):
    def __init__(self, **kwargs):
      super(Defaulter, self).__init__(Defaulter, **{
        k: (Defaulter(**v) if isinstance(v, dict) else v)
        for k, v in kwargs.items()})

    def __unicode__(self):
      return super(Defaulter, self).__unicode__() if self else u''

  env = jinja2.Environment(loader=jinja2.PackageLoader(__package__, 'templates'),
                           autoescape=True)
  if actor is None:
    actor = {}
  return env.get_template(ATOM_TEMPLATE_FILE).render(
    items=[Defaulter(**a) for a in activities],
    host_url=host_url,
    request_url=request_url,
    xml_base=xml_base,
    title=title or 'User feed for ' + source.Source.actor_name(actor),
    updated=activities[0]['object'].get('published', '') if activities else '',
    actor=Defaulter(**actor),
    rels=rels or {},
    )