コード例 #1
0
ファイル: webmention.py プロジェクト: iitians/bridgy-fed
    def _targets(self):
        """
        Returns: list of string URLs, the source's inReplyTos or objects
          (if appropriate)
        """
        targets = util.get_urls(self.source_obj, 'inReplyTo')
        if targets:
            return targets

        if self.source_obj.get('verb') in source.VERBS_WITH_OBJECT:
            return util.get_urls(self.source_obj, 'object')
コード例 #2
0
ファイル: atom.py プロジェクト: stan-alam/granary
def _prepare_activity(a, reader=True):
    """Preprocesses an activity to prepare it to be rendered as Atom.

  Modifies a in place.

  Args:
    a: ActivityStreams 1 activity dict
    reader: boolean, whether the output will be rendered in a feed reader.
      Currently just includes location if True, not otherwise.
  """
    act_type = source.object_type(a)
    if not act_type or act_type == 'post':
        primary = a.get('object', {})
    else:
        primary = a
    obj = a.setdefault('object', {})

    # Render content as HTML; escape &s
    obj['rendered_content'] = _encode_ampersands(
        microformats2.render_content(primary,
                                     include_location=reader,
                                     render_attachments=True))

    # Make sure every activity has the title field, since Atom <entry> requires
    # the title element.
    if not a.get('title'):
        a['title'] = util.ellipsize(
            _encode_ampersands(
                a.get('displayName') or a.get('content') or obj.get('title')
                or obj.get('displayName') or obj.get('content') or 'Untitled'))

    # strip HTML tags. the Atom spec says title is plain text:
    # http://atomenabled.org/developers/syndication/#requiredEntryElements
    a['title'] = xml.sax.saxutils.escape(
        BeautifulSoup(a['title']).get_text(''))

    children = []
    image_urls_seen = set()
    image_atts = []

    # normalize attachments, render attached notes/articles
    attachments = a.get('attachments') or obj.get('attachments') or []
    for att in attachments:
        att['stream'] = util.get_first(att, 'stream')
        type = att.get('objectType')

        if type == 'image':
            image_atts.append(util.get_first(att, 'image'))
            continue

        image_urls_seen |= set(util.get_urls(att, 'image'))
        if type in ('note', 'article'):
            html = microformats2.render_content(att,
                                                include_location=reader,
                                                render_attachments=True)
            author = att.get('author')
            if author:
                name = microformats2.maybe_linked_name(
                    microformats2.object_to_json(author).get('properties', []))
                html = '%s: %s' % (name.strip(), html)
            children.append(html)

    # render image(s) that we haven't already seen
    for image in image_atts + util.get_list(obj, 'image'):
        if not image:
            continue
        url = image.get('url')
        parsed = urllib.parse.urlparse(url)
        rest = urllib.parse.urlunparse(('', '') + parsed[2:])
        img_src_re = re.compile(
            r"""src *= *['"] *((https?:)?//%s)?%s *['"]""" %
            (re.escape(parsed.netloc), re.escape(rest)))
        if (url and url not in image_urls_seen
                and not img_src_re.search(obj['rendered_content'])):
            children.append(microformats2.img(url))
            image_urls_seen.add(url)

    obj['rendered_children'] = [
        _encode_ampersands(child) for child in children
    ]

    # make sure published and updated are strict RFC 3339 timestamps
    for prop in 'published', 'updated':
        val = obj.get(prop)
        if val:
            obj[prop] = util.maybe_iso8601_to_rfc3339(val)
            # Atom timestamps are even stricter than RFC 3339: they can't be naive ie
            # time zone unaware. They must have either an offset or the Z suffix.
            # https://www.feedvalidator.org/docs/error/InvalidRFC3339Date.html
            if not util.TIMEZONE_OFFSET_RE.search(obj[prop]):
                obj[prop] += 'Z'
コード例 #3
0
ファイル: microformats2.py プロジェクト: cacimatti/granary
def object_to_json(obj,
                   trim_nulls=True,
                   entry_class='h-entry',
                   default_object_type=None,
                   synthesize_content=True):
    """Converts an ActivityStreams object to microformats2 JSON.

  Args:
    obj: dict, a decoded JSON ActivityStreams object
    trim_nulls: boolean, whether to remove elements with null or empty values
    entry_class: string or sequence, the mf2 class(es) that entries should be
      given (e.g. 'h-cite' when parsing a reference to a foreign entry).
      defaults to 'h-entry'
    default_object_type: string, the ActivityStreams objectType to use if one
      is not present. defaults to None
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns:
    dict, decoded microformats2 JSON
  """
    if not obj or not isinstance(obj, dict):
        return {}

    obj_type = source.object_type(obj) or default_object_type
    # if the activity type is a post, then it's really just a conduit
    # for the object. for other verbs, the activity itself is the
    # interesting thing
    if obj_type == 'post':
        primary = obj.get('object', {})
        obj_type = source.object_type(primary) or default_object_type
    else:
        primary = obj

    # TODO: extract snippet
    name = primary.get('displayName', primary.get('title'))
    summary = primary.get('summary')
    author = obj.get('author', obj.get('actor', {}))

    in_reply_tos = obj.get('inReplyTo',
                           obj.get('context', {}).get('inReplyTo', []))
    is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe')
    if (is_rsvp or obj_type == 'react') and obj.get('object'):
        objs = obj['object']
        in_reply_tos.extend(objs if isinstance(objs, list) else [objs])

    # maps objectType to list of objects
    attachments = defaultdict(list)
    for prop in 'attachments', 'tags':
        for elem in get_list(primary, prop):
            attachments[elem.get('objectType')].append(elem)

    # construct mf2!
    ret = {
        'type': (AS_TO_MF2_TYPE.get(obj_type) or [entry_class] if isinstance(
            entry_class, basestring) else list(entry_class)),
        'properties': {
            'uid': [obj.get('id') or ''],
            'numeric-id': [obj.get('numeric_id') or ''],
            'name': [name],
            'nickname': [obj.get('username') or ''],
            'summary': [summary],
            'url': (list(object_urls(obj) or object_urls(primary)) +
                    obj.get('upstreamDuplicates', [])),
            'photo':
            dedupe_urls(
                get_urls(attachments, 'image', 'image') +
                get_urls(primary, 'image')),
            'video':
            dedupe_urls(
                get_urls(attachments, 'video', 'stream') +
                get_urls(primary, 'stream')),
            'audio':
            get_urls(attachments, 'audio', 'stream'),
            'published': [obj.get('published', primary.get('published', ''))],
            'updated': [obj.get('updated', primary.get('updated', ''))],
            'content': [{
                'value':
                xml.sax.saxutils.unescape(primary.get('content', '')),
                'html':
                render_content(primary,
                               include_location=False,
                               synthesize_content=synthesize_content),
            }],
            'in-reply-to':
            util.trim_nulls([o.get('url') for o in in_reply_tos]),
            'author': [
                object_to_json(author,
                               trim_nulls=False,
                               default_object_type='person')
            ],
            'location': [
                object_to_json(primary.get('location', {}),
                               trim_nulls=False,
                               default_object_type='place')
            ],
            'comment': [
                object_to_json(c, trim_nulls=False, entry_class='h-cite')
                for c in obj.get('replies', {}).get('items', [])
            ],
            'start': [primary.get('startTime')],
            'end': [primary.get('endTime')],
        },
        'children': [
            object_to_json(a,
                           trim_nulls=False,
                           entry_class=['u-quotation-of', 'h-cite'])
            for a in attachments['note'] + attachments['article']
        ]
    }

    # hashtags and person tags
    tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', [])
    ret['properties']['category'] = []
    for tag in tags:
        if tag.get('objectType') == 'person':
            ret['properties']['category'].append(
                object_to_json(tag, entry_class='u-category h-card'))
        elif tag.get('objectType') == 'hashtag':
            name = tag.get('displayName')
            if name:
                ret['properties']['category'].append(name)

    # rsvp
    if is_rsvp:
        ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]]
    elif obj_type == 'invite':
        invitee = object_to_json(obj.get('object'),
                                 trim_nulls=False,
                                 default_object_type='person')
        ret['properties']['invitee'] = [invitee]

    # like and repost mentions
    for type, prop in ('favorite', 'like'), ('like', 'like'), ('share',
                                                               'repost'):
        if obj_type == type:
            # The ActivityStreams spec says the object property should always be a
            # single object, but it's useful to let it be a list, e.g. when a like has
            # multiple targets, e.g. a like of a post with original post URLs in it,
            # which brid.gy does.
            objs = get_list(obj, 'object')
            ret['properties'][prop + '-of'] = [
                # flatten contexts that are just a url
                o['url']
                if 'url' in o and set(o.keys()) <= set(['url', 'objectType'])
                else object_to_json(o, trim_nulls=False, entry_class='h-cite')
                for o in objs
            ]
        else:
            # received likes and reposts
            ret['properties'][prop] = [
                object_to_json(t, trim_nulls=False, entry_class='h-cite')
                for t in tags if source.object_type(t) == type
            ]

    # latitude & longitude
    lat = long = None
    position = ISO_6709_RE.match(primary.get('position') or '')
    if position:
        lat, long = position.groups()
    if not lat:
        lat = primary.get('latitude')
    if not long:
        long = primary.get('longitude')

    if lat:
        ret['properties']['latitude'] = [str(lat)]
    if long:
        ret['properties']['longitude'] = [str(long)]

    if trim_nulls:
        ret = util.trim_nulls(ret)
    return ret
コード例 #4
0
ファイル: microformats2.py プロジェクト: snarfed/granary
def object_to_json(obj, trim_nulls=True, entry_class='h-entry',
                   default_object_type=None, synthesize_content=True):
  """Converts an ActivityStreams object to microformats2 JSON.

  Args:
    obj: dict, a decoded JSON ActivityStreams object
    trim_nulls: boolean, whether to remove elements with null or empty values
    entry_class: string or sequence, the mf2 class(es) that entries should be
      given (e.g. 'h-cite' when parsing a reference to a foreign entry).
      defaults to 'h-entry'
    default_object_type: string, the ActivityStreams objectType to use if one
      is not present. defaults to None
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns:
    dict, decoded microformats2 JSON
  """
  if not obj or not isinstance(obj, dict):
    return {}

  obj_type = source.object_type(obj) or default_object_type
  # if the activity type is a post, then it's really just a conduit
  # for the object. for other verbs, the activity itself is the
  # interesting thing
  if obj_type == 'post':
    primary = obj.get('object', {})
    obj_type = source.object_type(primary) or default_object_type
  else:
    primary = obj

  # TODO: extract snippet
  name = primary.get('displayName', primary.get('title'))
  summary = primary.get('summary')
  author = obj.get('author', obj.get('actor', {}))

  in_reply_tos = obj.get('inReplyTo', obj.get('context', {}).get('inReplyTo', []))
  is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe')
  if (is_rsvp or obj_type == 'react') and obj.get('object'):
    objs = obj['object']
    in_reply_tos.extend(objs if isinstance(objs, list) else [objs])

  # maps objectType to list of objects
  attachments = defaultdict(list)
  for prop in 'attachments', 'tags':
    for elem in get_list(primary, prop):
      attachments[elem.get('objectType')].append(elem)

  # construct mf2!
  ret = {
    'type': (AS_TO_MF2_TYPE.get(obj_type) or
             [entry_class] if isinstance(entry_class, basestring)
             else list(entry_class)),
    'properties': {
      'uid': [obj.get('id') or ''],
      'numeric-id': [obj.get('numeric_id') or ''],
      'name': [name],
      'nickname': [obj.get('username') or ''],
      'summary': [summary],
      'url': (list(object_urls(obj) or object_urls(primary)) +
              obj.get('upstreamDuplicates', [])),
      # photo is special cased below, to handle alt
      'video': dedupe_urls(get_urls(attachments, 'video', 'stream') +
                           get_urls(primary, 'stream')),
      'audio': get_urls(attachments, 'audio', 'stream'),
      'published': [obj.get('published', primary.get('published', ''))],
      'updated': [obj.get('updated', primary.get('updated', ''))],
      'content': [{
          'value': xml.sax.saxutils.unescape(primary.get('content', '')),
          'html': render_content(primary, include_location=False,
                                 synthesize_content=synthesize_content),
      }],
      'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]),
      'author': [object_to_json(
        author, trim_nulls=False, default_object_type='person')],
      'location': [object_to_json(
        primary.get('location', {}), trim_nulls=False,
        default_object_type='place')],
      'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite')
                  for c in obj.get('replies', {}).get('items', [])],
      'start': [primary.get('startTime')],
      'end': [primary.get('endTime')],
    },
    'children': (
      # silly hack: i haven't found anywhere in AS1 or AS2 to indicate that
      # something is being "quoted," like in a quote tweet, so i cheat and use
      # extra knowledge here that quoted tweets are converted to note
      # attachments, but URLs in the tweet text are converted to article tags.
      [object_to_json(a, trim_nulls=False, entry_class=['u-quotation-of', 'h-cite'])
       for a in attachments['note'] if 'startIndex' not in a] +
      [object_to_json(a, trim_nulls=False, entry_class=['h-cite'])
       for a in attachments['article'] if 'startIndex' not in a])
  }

  # photos, including alt text
  photo_urls = set()
  ret['properties']['photo'] = []
  for image in get_list(attachments, 'image') + [primary]:
    for url in get_urls(image, 'image'):
      if url and url not in photo_urls:
        photo_urls.add(url)
        name = get_first(image, 'image', {}).get('displayName')
        ret['properties']['photo'].append({'value': url, 'alt': name} if name else url)

  # hashtags and person tags
  if obj_type == 'tag':
    ret['properties']['tag-of'] = util.get_urls(obj, 'target')

  tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', [])
  if not tags and obj_type == 'tag':
    tags = util.get_list(obj, 'object')
  ret['properties']['category'] = []
  for tag in tags:
    if tag.get('objectType') == 'person':
      ret['properties']['category'].append(
        object_to_json(tag, entry_class='u-category h-card'))
    elif tag.get('objectType') == 'hashtag' or obj_type == 'tag':
      name = tag.get('displayName')
      if name:
        ret['properties']['category'].append(name)

  # rsvp
  if is_rsvp:
    ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]]
  elif obj_type == 'invite':
    invitee = object_to_json(obj.get('object'), trim_nulls=False,
                             default_object_type='person')
    ret['properties']['invitee'] = [invitee]

  # like and repost mentions
  for type, prop in (
      ('favorite', 'like'),
      ('follow', 'follow'),
      ('like', 'like'),
      ('share', 'repost'),
  ):
    if obj_type == type:
      # The ActivityStreams spec says the object property should always be a
      # single object, but it's useful to let it be a list, e.g. when a like has
      # multiple targets, e.g. a like of a post with original post URLs in it,
      # which brid.gy does.
      objs = get_list(obj, 'object')
      ret['properties'][prop + '-of'] = [
        # flatten contexts that are just a url
        o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType'])
        else object_to_json(o, trim_nulls=False, entry_class='h-cite')
        for o in objs]
    else:
      # received likes and reposts
      ret['properties'][prop] = [
        object_to_json(t, trim_nulls=False, entry_class='h-cite')
        for t in tags if source.object_type(t) == type]

  # latitude & longitude
  lat = long = None
  position = ISO_6709_RE.match(primary.get('position') or '')
  if position:
    lat, long = position.groups()
  if not lat:
    lat = primary.get('latitude')
  if not long:
    long = primary.get('longitude')

  if lat:
    ret['properties']['latitude'] = [str(lat)]
  if long:
    ret['properties']['longitude'] = [str(long)]

  if trim_nulls:
    ret = util.trim_nulls(ret)
  return ret
コード例 #5
0
ファイル: atom.py プロジェクト: snarfed/granary
def _prepare_activity(a, reader=True):
  """Preprocesses an activity to prepare it to be rendered as Atom.

  Modifies a in place.

  Args:
    a: ActivityStreams 1 activity dict
    reader: boolean, whether the output will be rendered in a feed reader.
      Currently just includes location if True, not otherwise.
  """
  act_type = source.object_type(a)
  obj = util.get_first(a, 'object', default={})
  primary = obj if (not act_type or act_type == 'post') else a

  # Render content as HTML; escape &s
  obj['rendered_content'] = _encode_ampersands(microformats2.render_content(
    primary, include_location=reader, render_attachments=True))

  # Make sure every activity has the title field, since Atom <entry> requires
  # the title element.
  if not a.get('title'):
    a['title'] = util.ellipsize(_encode_ampersands(
      a.get('displayName') or a.get('content') or obj.get('title') or
      obj.get('displayName') or obj.get('content') or 'Untitled'))

  # strip HTML tags. the Atom spec says title is plain text:
  # http://atomenabled.org/developers/syndication/#requiredEntryElements
  a['title'] = xml.sax.saxutils.escape(BeautifulSoup(a['title']).get_text(''))

  children = []
  image_urls_seen = set()
  image_atts = []

  # normalize actor images
  for elem in a, obj:
    actor = elem.get('actor')
    if actor:
      actor['image'] = util.get_first(actor, 'image')

  # normalize attachments, render attached notes/articles
  attachments = a.get('attachments') or obj.get('attachments') or []
  for att in attachments:
    att['stream'] = util.get_first(att, 'stream')
    type = att.get('objectType')

    if type == 'image':
      att['image'] = util.get_first(att, 'image')
      image_atts.append(att['image'])
      continue

    image_urls_seen |= set(util.get_urls(att, 'image'))
    if type in ('note', 'article'):
      html = microformats2.render_content(att, include_location=reader,
                                          render_attachments=True)
      author = att.get('author')
      if author:
        name = microformats2.maybe_linked_name(
          microformats2.object_to_json(author).get('properties') or {})
        html = '%s: %s' % (name.strip(), html)
      children.append(html)

  # render image(s) that we haven't already seen
  for image in image_atts + util.get_list(obj, 'image'):
    if not image:
      continue
    url = image.get('url')
    parsed = urllib.parse.urlparse(url)
    rest = urllib.parse.urlunparse(('', '') + parsed[2:])
    img_src_re = re.compile(r"""src *= *['"] *((https?:)?//%s)?%s *['"]""" %
                            (re.escape(parsed.netloc), re.escape(rest)))
    if (url and url not in image_urls_seen and
        not img_src_re.search(obj['rendered_content'])):
      children.append(microformats2.img(url))
      image_urls_seen.add(url)

  obj['rendered_children'] = [_encode_ampersands(child) for child in children]

  # make sure published and updated are strict RFC 3339 timestamps
  for prop in 'published', 'updated':
    val = obj.get(prop)
    if val:
      obj[prop] = util.maybe_iso8601_to_rfc3339(val)
      # Atom timestamps are even stricter than RFC 3339: they can't be naive ie
      # time zone unaware. They must have either an offset or the Z suffix.
      # https://www.feedvalidator.org/docs/error/InvalidRFC3339Date.html
      if not util.TIMEZONE_OFFSET_RE.search(obj[prop]):
        obj[prop] += 'Z'
コード例 #6
0
def render_content(obj, include_location=True, synthesize_content=True,
                   render_attachments=False, render_image=False,
                   white_space_pre=True):
  """Renders the content of an ActivityStreams object as HTML.

  Includes tags, mentions, and non-note/article attachments. (Note/article
  attachments are converted to mf2 children in object_to_json and then rendered
  in json_to_html.)

  Note that the returned HTML is included in Atom as well as HTML documents,
  so it *must* be HTML4 / XHTML, not HTML5! All tags must be closed, etc.

  Args:
    obj: decoded JSON ActivityStreams object
    include_location: boolean, whether to render location, if provided
    synthesize_content: boolean, whether to generate synthetic content if the
      object doesn't have its own, e.g. 'likes this.' or 'shared this.'
    render_attachments: boolean, whether to render attachments, eg links,
      images, audio, and video
    render_image: boolean, whether to render the object's image(s)
    white_space_pre: boolean, whether to wrap in CSS white-space: pre. If False,
      newlines will be converted to <br> tags instead. Background:
      https://indiewebcamp.com/note#Indieweb_whitespace_thinking

  Returns:
    string, rendered HTML
  """
  content = obj.get('content', '')

  # extract tags. preserve order but de-dupe, ie don't include a tag more than
  # once.
  seen_ids = set()
  mentions = []
  tags = {}  # maps string objectType to list of tag objects
  for t in obj.get('tags', []):
    id = t.get('id')
    if id and id in seen_ids:
      continue
    seen_ids.add(id)

    if 'startIndex' in t and 'length' in t and 'url' in t:
      mentions.append(t)
    else:
      tags.setdefault(source.object_type(t), []).append(t)

  # linkify embedded mention tags inside content.
  if mentions:
    mentions.sort(key=lambda t: t['startIndex'])
    last_end = 0
    orig = util.WideUnicode(content)
    content = util.WideUnicode('')
    for tag in mentions:
      start = tag['startIndex']
      end = start + tag['length']
      content = util.WideUnicode('%s%s<a href="%s">%s</a>' % (
        content, orig[last_end:start], tag['url'], orig[start:end]))
      last_end = end

    content += orig[last_end:]

  # is whitespace in this content meaningful? standard heuristic: if there are
  # no HTML tags in it, and it has a newline, then assume yes.
  # https://indiewebcamp.com/note#Indieweb_whitespace_thinking
  # https://github.com/snarfed/granary/issues/80
  if content and not obj.get('content_is_html') and '\n' in content:
    if white_space_pre:
      content = '<div style="white-space: pre">%s</div>' % content
    else:
      content = content.replace('\n', '<br />\n')

  # linkify embedded links. ignore the "mention" tags that we added ourselves.
  # TODO: fix the bug in test_linkify_broken() in webutil/tests/test_util.py, then
  # uncomment this.
  # if content:
  #   content = util.linkify(content)

  # the image field. may be multiply valued.
  rendered_urls = set()
  if render_image:
    urls = get_urls(obj, 'image')
    content += _render_attachments([{
      'objectType': 'image',
      'image': {'url': url},
    } for url in urls], obj)
    rendered_urls = set(urls)

  # attachments, e.g. links (aka articles)
  # TODO: use oEmbed? http://oembed.com/ , http://code.google.com/p/python-oembed/
  if render_attachments:
    atts = [a for a in obj.get('attachments', [])
            if a.get('objectType') not in ('note', 'article')
            and get_url(a, 'image') not in rendered_urls]
    content += _render_attachments(atts + tags.pop('article', []), obj)

  # generate share/like contexts if the activity does not have content
  # of its own
  obj_type = source.object_type(obj)
  for as_type, verb in (
      ('favorite', 'Favorites'), ('like', 'Likes'), ('share', 'Shared')):
    if (not synthesize_content or obj_type != as_type or 'object' not in obj or
        'content' in obj):
      continue

    targets = get_list(obj, 'object')
    if not targets:
      continue

    for target in targets:
      # sometimes likes don't have enough content to render anything
      # interesting
      if 'url' in target and set(target) <= set(['url', 'objectType']):
        content += '<a href="%s">%s this.</a>' % (
          target.get('url'), verb.lower())

      else:
        author = target.get('author', target.get('actor', {}))
        # special case for twitter RT's
        if obj_type == 'share' and 'url' in obj and re.search(
                '^https?://(?:www\.|mobile\.)?twitter\.com/', obj.get('url')):
          content += 'RT <a href="%s">@%s</a> ' % (
            target.get('url', '#'), author.get('username'))
        else:
          # image looks bad in the simplified rendering
          author = {k: v for k, v in author.items() if k != 'image'}
          content += '%s <a href="%s">%s</a> by %s' % (
            verb, target.get('url', '#'),
            target.get('displayName', target.get('title', 'a post')),
            hcard_to_html(object_to_json(author, default_object_type='person')),
          )
        content += render_content(target, include_location=include_location,
                                  synthesize_content=synthesize_content,
                                  white_space_pre=white_space_pre)
      # only include the first context in the content (if there are
      # others, they'll be included as separate properties)
      break
    break

  if render_attachments and obj.get('verb') == 'share':
    atts = [att for att in itertools.chain.from_iterable(
              o.get('attachments', []) for o in util.get_list(obj, 'object'))
            if att.get('objectType') not in ('note', 'article')]
    content += _render_attachments(atts, obj)

  # location
  loc = obj.get('location')
  if include_location and loc:
    content += '\n<p>%s</p>' % hcard_to_html(
      object_to_json(loc, default_object_type='place'),
      parent_props=['p-location'])

  # these are rendered manually in json_to_html()
  for type in 'like', 'share', 'react', 'person':
    tags.pop(type, None)

  # render the rest
  content += tags_to_html(tags.pop('hashtag', []), 'p-category')
  content += tags_to_html(tags.pop('mention', []), 'u-mention', visible=False)
  content += tags_to_html(sum(tags.values(), []), 'tag')

  return content
コード例 #7
0
def object_to_json(obj, trim_nulls=True, entry_class='h-entry',
                   default_object_type=None, synthesize_content=True):
  """Converts an ActivityStreams object to microformats2 JSON.

  Args:
    obj: dict, a decoded JSON ActivityStreams object
    trim_nulls: boolean, whether to remove elements with null or empty values
    entry_class: string or sequence, the mf2 class(es) that entries should be
      given (e.g. 'h-cite' when parsing a reference to a foreign entry).
      defaults to 'h-entry'
    default_object_type: string, the ActivityStreams objectType to use if one
      is not present. defaults to None
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns:
    dict, decoded microformats2 JSON
  """
  if not obj or not isinstance(obj, dict):
    return {}

  obj_type = source.object_type(obj) or default_object_type
  # if the activity type is a post, then it's really just a conduit
  # for the object. for other verbs, the activity itself is the
  # interesting thing
  if obj_type == 'post':
    primary = obj.get('object', {})
    obj_type = source.object_type(primary) or default_object_type
  else:
    primary = obj

  # TODO: extract snippet
  name = primary.get('displayName', primary.get('title'))
  summary = primary.get('summary')
  author = obj.get('author', obj.get('actor', {}))

  in_reply_tos = obj.get('inReplyTo') or []
  if not in_reply_tos:
    context = obj.get('context')
    if context and isinstance(context, dict):
      in_reply_tos = context.get('inReplyTo') or []

  is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe')
  if (is_rsvp or obj_type == 'react') and obj.get('object'):
    objs = obj['object']
    in_reply_tos.extend(objs if isinstance(objs, list) else [objs])

  # maps objectType to list of objects
  attachments = defaultdict(list)
  for prop in 'attachments', 'tags':
    for elem in get_list(primary, prop):
      attachments[elem.get('objectType')].append(elem)

  # prefer duration and size from object's stream, then first video, then first
  # audio
  stream = {}
  for candidate in [obj] + attachments['video'] + attachments['audio']:
    for stream in get_list(candidate, 'stream'):
      if stream:
        break

  duration = stream.get('duration')
  if duration is not None:
    if util.is_int(duration):
      duration = str(duration)
    else:
      logging('Ignoring duration %r; expected int, got %s', duration.__class__)
      duration = None

  sizes = []
  size = stream.get('size')
  if size:
    sizes = [str(size)]

  # construct mf2!
  ret = {
    'type': (AS_TO_MF2_TYPE.get(obj_type) or
             [entry_class] if isinstance(entry_class, str)
             else list(entry_class)),
    'properties': {
      'uid': [obj.get('id') or ''],
      'numeric-id': [obj.get('numeric_id') or ''],
      'name': [name],
      'nickname': [obj.get('username') or ''],
      'summary': [summary],
      'url': (list(object_urls(obj) or object_urls(primary)) +
              obj.get('upstreamDuplicates', [])),
      # photo is special cased below, to handle alt
      'video': dedupe_urls(get_urls(attachments, 'video', 'stream') +
                           get_urls(primary, 'stream')),
      'audio': get_urls(attachments, 'audio', 'stream'),
      'duration': [duration],
      'size': sizes,
      'published': [obj.get('published', primary.get('published', ''))],
      'updated': [obj.get('updated', primary.get('updated', ''))],
      'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]),
      'author': [object_to_json(
        author, trim_nulls=False, default_object_type='person')],
      'location': [object_to_json(
        primary.get('location', {}), trim_nulls=False,
        default_object_type='place')],
      'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite')
                  for c in obj.get('replies', {}).get('items', [])],
      'start': [primary.get('startTime')],
      'end': [primary.get('endTime')],
    },
    'children': (
      # silly hack: i haven't found anywhere in AS1 or AS2 to indicate that
      # something is being "quoted," like in a quote tweet, so i cheat and use
      # extra knowledge here that quoted tweets are converted to note
      # attachments, but URLs in the tweet text are converted to article tags.
      [object_to_json(a, trim_nulls=False, entry_class=['u-quotation-of', 'h-cite'])
       for a in attachments['note'] if 'startIndex' not in a] +
      [object_to_json(a, trim_nulls=False, entry_class=['h-cite'])
       for a in attachments['article'] if 'startIndex' not in a])
  }

  # content. emulate e- vs p- microformats2 parsing: e- if there are HTML tags,
  # otherwise p-.
  # https://indiewebcamp.com/note#Indieweb_whitespace_thinking
  text = xml.sax.saxutils.unescape(primary.get('content', ''))
  html = render_content(primary, include_location=False,
                        synthesize_content=synthesize_content)
  if '<' in html:
    ret['properties']['content'] = [{'value': text, 'html': html}]
  else:
    ret['properties']['content'] = [text]

  # photos, including alt text
  photo_urls = set()
  ret['properties']['photo'] = []
  for image in get_list(attachments, 'image') + [primary]:
    for url in get_urls(image, 'image'):
      if url and url not in photo_urls:
        photo_urls.add(url)
        name = get_first(image, 'image', {}).get('displayName')
        ret['properties']['photo'].append({'value': url, 'alt': name} if name else url)

  # hashtags and person tags
  if obj_type == 'tag':
    ret['properties']['tag-of'] = util.get_urls(obj, 'target')

  tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', [])
  if not tags and obj_type == 'tag':
    tags = util.get_list(obj, 'object')
  ret['properties']['category'] = []
  for tag in tags:
    if tag.get('objectType') == 'person':
      ret['properties']['category'].append(
        object_to_json(tag, entry_class='u-category h-card'))
    elif tag.get('objectType') == 'hashtag' or obj_type == 'tag':
      name = tag.get('displayName')
      if name:
        ret['properties']['category'].append(name)

  # rsvp
  if is_rsvp:
    ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]]
  elif obj_type == 'invite':
    invitee = object_to_json(obj.get('object'), trim_nulls=False,
                             default_object_type='person')
    ret['properties']['invitee'] = [invitee]

  # like and repost mentions
  for type, prop in (
      ('favorite', 'like'),
      ('follow', 'follow'),
      ('like', 'like'),
      ('share', 'repost'),
  ):
    if obj_type == type:
      # The ActivityStreams spec says the object property should always be a
      # single object, but it's useful to let it be a list, e.g. when a like has
      # multiple targets, e.g. a like of a post with original post URLs in it,
      # which brid.gy does.
      objs = get_list(obj, 'object')
      ret['properties'][prop + '-of'] = [
        # flatten contexts that are just a url
        o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType'])
        else object_to_json(o, trim_nulls=False, entry_class='h-cite')
        for o in objs]
    else:
      # received likes and reposts
      ret['properties'][prop] = [
        object_to_json(t, trim_nulls=False, entry_class='h-cite')
        for t in tags if source.object_type(t) == type]

  # latitude & longitude
  lat = long = None
  position = ISO_6709_RE.match(primary.get('position') or '')
  if position:
    lat, long = position.groups()
  if not lat:
    lat = primary.get('latitude')
  if not long:
    long = primary.get('longitude')

  if lat:
    ret['properties']['latitude'] = [str(lat)]
  if long:
    ret['properties']['longitude'] = [str(long)]

  if trim_nulls:
    ret = util.trim_nulls(ret)
  return ret