Python get_first Examples, oauth_dropins.webutil.util.get_first Python Examples

Example #1

0

Show file

File: blog_webmention.py Project: snarfed/bridgy

  def find_mention_item(self, items):
    """Returns the mf2 item that mentions (or replies to, likes, etc) the target.

    May modify the items arg, e.g. may set or replace content.html or
    content.value.

    Args:
      items: sequence of mf2 item dicts

    Returns:
      mf2 item dict or None
    """
    # find target URL in source
    for item in items:
      props = item.setdefault('properties', {})

      # find first non-empty content element
      content = props.setdefault('content', [{}])[0]
      text = content.get('html') or content.get('value')

      for type in 'in-reply-to', 'like', 'like-of', 'repost', 'repost-of':
        urls = [urllib.parse.urldefrag(u)[0] for u in
                microformats2.get_string_urls(props.get(type, []))]
        if self.any_target_in(urls):
          break
      else:
        if text and self.any_target_in(text):
          type = 'post'
          url = get_first(props, 'url') or self.source_url
          name = get_first(props, 'name') or get_first(props, 'summary')
          text = content['html'] = f'mentioned this in {util.pretty_link(url, text=name, max_length=280)}.'
        else:
          type = None

      if type:
        # found the target!
        rsvp = get_first(props, 'rsvp')
        if rsvp:
          self.entity.type = 'rsvp'
          if not text:
            content['value'] = f'RSVPed {rsvp}.'
        else:
          self.entity.type = {'in-reply-to': 'comment',
                              'like-of': 'like',
                              'repost-of': 'repost',
                              }.get(type, type)
          if not text:
            content['value'] = {'comment': 'replied to this.',
                                'like': 'liked this.',
                                'repost': 'reposted this.',
                                }[self.entity.type]
        return item

      # check children in case this is eg an h-feed
      found = self.find_mention_item(item.get('children', []))
      if found:
        return found

    return None

Example #2

0

Show file

    def _single_target(self):
        """
        Returns: string URL, the source's inReplyTo or object (if appropriate)
        """
        target = util.get_first(self.source_obj, 'inReplyTo')
        if target:
            return util.get_url(target)

        if self.source_obj.get('verb') in source.VERBS_WITH_OBJECT:
            return util.get_url(util.get_first(self.source_obj, 'object'))

Example #3

0

Show file

File: microformats2.py Project: sbarale/granary

def _render_attachments(attachments, obj):
    """Renders ActivityStreams attachments (or tags etc) as HTML.

  Note that the returned HTML is included in Atom as well as HTML documents,
  so it *must* be HTML4 / XHTML, not HTML5! All tags must be closed, etc.

  Args:
    attachments: sequence of decoded JSON ActivityStreams objects
    obj: top-level decoded JSON ActivityStreams object

  Returns:
    string, rendered HTML
  """
    content = ''

    for att in attachments:
        name = att.get('displayName', '')
        stream = get_first(att, 'stream', {}).get('url') or ''
        image = get_first(att, 'image', {}).get('url') or ''
        open_a_tag = False
        content += '\n<p>'

        type = att.get('objectType')
        if type == 'video':
            if stream:
                content += vid(stream, poster=image)
        elif type == 'audio':
            if stream:
                content += aud(stream)
        else:
            url = att.get('url') or obj.get('url')
            if url:
                content += '\n<a class="link" href="%s">' % url
                open_a_tag = True
            if image:
                content += '\n' + img(image, name)

        if name and type != 'image':
            content += '\n<span class="name">%s</span>' % name

        if open_a_tag:
            content += '\n</a>'

        summary = att.get('summary')
        if summary and summary != name:
            content += '\n<span class="summary">%s</span>' % summary
        content += '\n</p>'

    return content

Example #4

0

Show file

File: microformats2.py Project: snarfed/granary

def _render_attachments(attachments, obj):
  """Renders ActivityStreams attachments (or tags etc) as HTML.

  Note that the returned HTML is included in Atom as well as HTML documents,
  so it *must* be HTML4 / XHTML, not HTML5! All tags must be closed, etc.

  Args:
    attachments: sequence of decoded JSON ActivityStreams objects
    obj: top-level decoded JSON ActivityStreams object

  Returns:
    string, rendered HTML
  """
  content = ''

  for att in attachments:
    name = att.get('displayName', '')
    stream = get_first(att, 'stream', {}).get('url') or ''
    image = get_first(att, 'image', {}).get('url') or ''
    open_a_tag = False
    content += '\n<p>'

    if att.get('objectType') == 'video':
      if stream:
        content += vid(stream, poster=image)
    elif att.get('objectType') == 'audio':
      if stream:
        content += aud(stream)
    else:
      url = att.get('url') or obj.get('url')
      if url:
        content += '\n<a class="link" href="%s">' % url
        open_a_tag = True
      if image:
        content += '\n' + img(image, name)

    if name:
      content += '\n<span class="name">%s</span>' % name

    if open_a_tag:
      content += '\n</a>'

    summary = att.get('summary')
    if summary and summary != name:
      content += '\n<span class="summary">%s</span>' % summary
    content += '\n</p>'

  return content

Example #5

0

Show file

def first_props(props):
    """Converts a multiply-valued dict to singly valued.

  Args:
    props: dict of properties, where each value is a sequence

  Returns: corresponding dict with just the first value of each sequence, or ''
    if the sequence is empty
  """
    return {k: util.get_first(props, k, '') for k in props} if props else {}

Example #6

0

Show file

def _prepare_actor(actor):
  """Preprocesses an AS1 actor to prepare it to be rendered as Atom.

  Modifies actor in place.

  Args:
    actor: ActivityStreams 1 actor dict
  """
  if actor:
    actor['image'] = util.get_first(actor, 'image')

Example #7

0

Show file

def first_props(props):
  """Converts a multiply-valued dict to singly valued.

  Args:
    props: dict of properties, where each value is a sequence

  Returns:
    corresponding dict with just the first value of each sequence, or ''
    if the sequence is empty
  """
  return {k: util.get_first(props, k, '') for k in props} if props else {}

Example #8

0

Show file

File: atom.py Project: davidp94/granary

def activities_to_atom(activities,
                       actor,
                       title=None,
                       request_url=None,
                       host_url=None,
                       xml_base=None,
                       rels=None,
                       reader=True):
    """Converts ActivityStreams 1 activities to an Atom feed.

  Args:
    activities: list of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed <title> element. Defaults to 'User feed for [NAME]'
    request_url: the URL of this Atom feed, if any. Used in a link rel="self".
    host_url: the home URL for this Atom feed, if any. Used in the top-level
      feed <id> element.
    xml_base: the base URL, if any. Used in the top-level xml:base attribute.
    rels: rel links to include. dict mapping string rel value to string URL.
    reader: boolean, whether the output will be rendered in a feed reader.
      Currently just includes location if True, not otherwise.

  Returns:
    unicode string with Atom XML
  """
    # Strip query params from URLs so that we don't include access tokens, etc
    host_url = (_remove_query_params(host_url)
                if host_url else 'https://github.com/snarfed/granary')
    if request_url is None:
        request_url = host_url

    for a in activities:
        _prepare_activity(a, reader=reader)

    updated = (util.get_first(activities[0], 'object', default={}).get(
        'published', '') if activities else '')

    if actor is None:
        actor = {}

    return jinja_env.get_template(FEED_TEMPLATE).render(
        actor=Defaulter(actor),
        host_url=host_url,
        items=[Defaulter(a) for a in activities],
        mimetypes=mimetypes,
        rels=rels or {},
        request_url=request_url,
        title=title or 'User feed for ' + source.Source.actor_name(actor),
        updated=updated,
        VERBS_WITH_OBJECT=source.VERBS_WITH_OBJECT,
        xml_base=xml_base,
    )

Example #9

0

Show file

File: atom.py Project: snarfed/granary

def activities_to_atom(activities, actor, title=None, request_url=None,
                       host_url=None, xml_base=None, rels=None, reader=True):
  """Converts ActivityStreams 1 activities to an Atom feed.

  Args:
    activities: list of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed <title> element. Defaults to 'User feed for [NAME]'
    request_url: the URL of this Atom feed, if any. Used in a link rel="self".
    host_url: the home URL for this Atom feed, if any. Used in the top-level
      feed <id> element.
    xml_base: the base URL, if any. Used in the top-level xml:base attribute.
    rels: rel links to include. dict mapping string rel value to string URL.
    reader: boolean, whether the output will be rendered in a feed reader.
      Currently just includes location if True, not otherwise.

  Returns:
    unicode string with Atom XML
  """
  # Strip query params from URLs so that we don't include access tokens, etc
  host_url = (_remove_query_params(host_url) if host_url
              else 'https://github.com/snarfed/granary')
  if request_url is None:
    request_url = host_url

  for a in activities:
    _prepare_activity(a, reader=reader)

  updated = (util.get_first(activities[0], 'object', default={}).get('published', '')
             if activities else '')

  if actor is None:
    actor = {}

  return jinja_env.get_template(FEED_TEMPLATE).render(
    actor=Defaulter(actor),
    host_url=host_url,
    items=[Defaulter(a) for a in activities],
    mimetypes=mimetypes,
    rels=rels or {},
    request_url=request_url,
    title=title or 'User feed for ' + source.Source.actor_name(actor),
    updated=updated,
    VERBS_WITH_OBJECT=source.VERBS_WITH_OBJECT,
    xml_base=xml_base,
  )

Example #10

0

Show file

File: flickr.py Project: qiweiyu/granary

  def _create(self, obj, preview, include_link=False, ignore_formatting=False):
    """Creates or previews creating for the previous two methods.

    https://www.flickr.com/services/api/upload.api.html
    https://www.flickr.com/services/api/flickr.photos.comments.addComment.html
    https://www.flickr.com/services/api/flickr.favorites.add.html
    https://www.flickr.com/services/api/flickr.photos.people.add.html

    Args:
      obj: ActivityStreams object
      preview: boolean
      include_link: boolean

    Return:
      a CreationResult
    """
    # photo, comment, or like
    type = source.object_type(obj)
    logging.debug('publishing object type %s to Flickr', type)
    link_text = '(Originally published at: %s)' % obj.get('url')

    image_url = util.get_first(obj, 'image', {}).get('url')
    video_url = util.get_first(obj, 'stream', {}).get('url')
    content = self._content_for_create(obj, ignore_formatting=ignore_formatting,
                                       strip_first_video_tag=bool(video_url))

    if (video_url or image_url) and type in ('note', 'article'):
      name = obj.get('displayName')
      people = self._get_person_tags(obj)
      hashtags = [t.get('displayName') for t in obj.get('tags', [])
                  if t.get('objectType') == 'hashtag' and t.get('displayName')]
      lat = obj.get('location', {}).get('latitude')
      lng = obj.get('location', {}).get('longitude')

      # if name does not represent an explicit title, then we'll just
      # use it as the title and wipe out the content
      if name and content and not mf2util.is_name_a_title(name, content):
        name = content
        content = None

      # add original post link
      if include_link:
        content = ((content + '\n\n') if content else '') + link_text

      if preview:
        preview_content = ''
        if name:
          preview_content += '<h4>%s</h4>' % name
        if content:
          preview_content += '<div>%s</div>' % content
        if hashtags:
          preview_content += '<div> %s</div>' % ' '.join('#' + t for t in hashtags)
        if people:
          preview_content += '<div> with %s</div>' % ', '.join(
            ('<a href="%s">%s</a>' % (
              p.get('url'), p.get('displayName') or 'User %s' % p.get('id'))
             for p in people))
        if lat and lng:
          preview_content += '<div> at <a href="https://maps.google.com/maps?q=%s,%s">%s, %s</a></div>' % (lat, lng, lat, lng)

        if video_url:
          preview_content += ('<video controls src="%s"><a href="%s">this video'
                              '</a></video>' % (video_url, video_url))
        else:
          preview_content += '<img src="%s" />' % image_url

        return source.creation_result(content=preview_content, description='post')

      params = []
      if name:
        params.append(('title', name))
      if content:
        params.append(('description', content))
      if hashtags:
        params.append(
          ('tags', ','.join('"%s"' % t if ' ' in t else t for t in hashtags)))

      file = util.urlopen(video_url or image_url)
      resp = self.upload(params, file)
      photo_id = resp.get('id')
      resp.update({
        'type': 'post',
        'url': self.photo_url(self.path_alias() or self.user_id(), photo_id),
      })
      if video_url:
        resp['granary_message'] = \
          "Note that videos take time to process before they're visible."

      # add person tags
      for person_id in sorted(p.get('id') for p in people):
        self.call_api_method('flickr.photos.people.add', {
          'photo_id': photo_id,
          'user_id': person_id,
        })

      # add location
      if lat and lng:
        self.call_api_method('flickr.photos.geo.setLocation', {
            'photo_id': photo_id,
            'lat': lat,
            'lon': lng,
        })

      return source.creation_result(resp)

    base_obj = self.base_object(obj)
    base_id = base_obj.get('id')
    base_url = base_obj.get('url')

    # maybe a comment on a flickr photo?
    if type == 'comment' or obj.get('inReplyTo'):
      if not base_id:
        return source.creation_result(
          abort=True,
          error_plain='Could not find a photo to comment on.',
          error_html='Could not find a photo to <a href="http://indiewebcamp.com/reply">comment on</a>. '
          'Check that your post has an <a href="http://indiewebcamp.com/comment">in-reply-to</a> '
          'link to a Flickr photo or to an original post that publishes a '
          '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Flickr.')

      if include_link:
        content += '\n\n' + link_text
      if preview:
        return source.creation_result(
          content=content,
          description='comment on <a href="%s">this photo</a>.' % base_url)

      resp = self.call_api_method('flickr.photos.comments.addComment', {
        'photo_id': base_id,
        'comment_text': content,
      })
      resp = resp.get('comment', {})
      resp.update({
        'type': 'comment',
        'url': resp.get('permalink'),
      })
      return source.creation_result(resp)

    if type == 'like':
      if not base_id:
        return source.creation_result(
          abort=True,
          error_plain='Could not find a photo to favorite.',
          error_html='Could not find a photo to <a href="http://indiewebcamp.com/like">favorite</a>. '
          'Check that your post has an <a href="http://indiewebcamp.com/like">like-of</a> '
          'link to a Flickr photo or to an original post that publishes a '
          '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Flickr.')
      if preview:
        return source.creation_result(
          description='favorite <a href="%s">this photo</a>.' % base_url)

      # this method doesn't return any data
      self.call_api_method('flickr.favorites.add', {
        'photo_id': base_id,
      })
      # TODO should we canonicalize the base_url (e.g. removing trailing path
      # info like "/in/contacts/")
      return source.creation_result({
        'type': 'like',
        'url': '%s#favorited-by-%s' % (base_url, self.user_id()),
      })

    return source.creation_result(
      abort=False,
      error_plain='Cannot publish type=%s to Flickr.' % type,
      error_html='Cannot publish type=%s to Flickr.' % type)

Example #11

0

Show file

File: microformats2.py Project: snarfed/granary

def json_to_object(mf2, actor=None, fetch_mf2=False):
  """Converts a single microformats2 JSON item to an ActivityStreams object.

  Supports h-entry, h-event, h-card, and other single item times. Does *not* yet
  support h-feed.

  Args:
    mf2: dict, decoded JSON microformats2 object
    actor: optional author AS actor object. usually comes from a rel="author"
      link. if mf2 has its own author, that will override this.
    fetch_mf2: boolean, whether to fetch additional pages via HTTP if necessary,
      e.g. to determine authorship: https://indieweb.org/authorship

  Returns:
    dict, ActivityStreams object
  """
  if not mf2 or not isinstance(mf2, dict):
    return {}

  mf2 = copy.copy(mf2)
  props = mf2.setdefault('properties', {})
  prop = first_props(props)
  rsvp = prop.get('rsvp')

  # convert author
  mf2_author = prop.get('author')
  if mf2_author and isinstance(mf2_author, dict):
    author = json_to_object(mf2_author)
  else:
    # the author h-card may be on another page. run full authorship algorithm:
    # https://indieweb.org/authorship
    def fetch(url):
      return mf2py.parse(util.requests_get(url).text, url=url, img_with_alt=True)
    author = mf2util.find_author(
      {'items': [mf2]}, hentry=mf2, fetch_mf2_func=fetch if fetch_mf2 else None)
    if author:
      author = {
        'objectType': 'person',
        'url': author.get('url'),
        'displayName': author.get('name'),
        'image': [{'url': author.get('photo')}],
      }

  if not author:
    author = actor

  mf2_types = mf2.get('type') or []
  if 'h-geo' in mf2_types or 'p-location' in mf2_types:
    mf2_type = 'location'
  elif 'tag-of' in props:
    # TODO: remove once this is in mf2util
    # https://github.com/kylewm/mf2util/issues/18
    mf2_type = 'tag'
  elif 'follow-of' in props: # ditto
    mf2_type = 'follow'
  else:
    # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type
    # *is* a photo. so, special case photo type to fall through to underlying
    # mf2 type without photo.
    # https://github.com/snarfed/bridgy/issues/702
    without_photo = copy.deepcopy(mf2)
    without_photo.get('properties', {}).pop('photo', None)
    mf2_type = mf2util.post_type_discovery(without_photo)

  as_type, as_verb = MF2_TO_AS_TYPE_VERB.get(mf2_type, (None, None))
  if rsvp:
    as_verb = 'rsvp-%s' % rsvp

  # special case GitHub issues that are in-reply-to the repo or its issues URL
  in_reply_tos = get_string_urls(props.get('in-reply-to', []))
  for url in in_reply_tos:
    if re.match(r'^https?://github.com/[^/]+/[^/]+(/issues)?/?$', url):
      as_type = 'issue'

  def is_absolute(url):
    """Filter out relative and invalid URLs (mf2py gives absolute urls)."""
    return urllib.parse.urlparse(url).netloc

  urls = props.get('url') and get_string_urls(props.get('url'))

  # quotations: https://indieweb.org/quotation#How_to_markup
  attachments = [
    json_to_object(quote)
    for quote in mf2.get('children', []) + props.get('quotation-of', [])
    if isinstance(quote, dict) and 'h-cite' in set(quote.get('type', []))]

  # audio and video
  for type in 'audio', 'video':
    attachments.extend({'objectType': type, 'stream': {'url': url}}
                       for url in get_string_urls(props.get(type, [])))

  obj = {
    'id': prop.get('uid'),
    'objectType': as_type,
    'verb': as_verb,
    'published': prop.get('published', ''),
    'updated': prop.get('updated', ''),
    'startTime': prop.get('start'),
    'endTime': prop.get('end'),
    'displayName': get_text(prop.get('name')),
    'summary': get_text(prop.get('summary')),
    'content': get_html(prop.get('content')),
    'url': urls[0] if urls else None,
    'urls': [{'value': u} for u in urls] if urls and len(urls) > 1 else None,
    # image is special cased below, to handle alt
    'stream': [{'url': url} for url in get_string_urls(props.get('video', []))],
    'location': json_to_object(prop.get('location')),
    'replies': {'items': [json_to_object(c) for c in props.get('comment', [])]},
    'tags': [{'objectType': 'hashtag', 'displayName': cat}
             if isinstance(cat, basestring)
             else json_to_object(cat)
             for cat in props.get('category', [])],
    'attachments': attachments,
  }

  # images, including alt text
  photo_urls = set()
  obj['image'] = []
  for photo in props.get('photo', []) + props.get('featured', []):
    url = photo
    alt = None
    if isinstance(photo, dict):
      photo = photo.get('properties') or photo
      url = get_first(photo, 'value') or get_first(photo, 'url')
      alt = get_first(photo, 'alt')
    if url and url not in photo_urls and is_absolute(url):
      photo_urls.add(url)
      obj['image'].append({'url': url, 'displayName': alt})

  # mf2util uses the indieweb/mf2 location algorithm to collect location properties.
  interpreted = mf2util.interpret({'items': [mf2]}, None)
  if interpreted:
    loc = interpreted.get('location')
    if loc:
      obj['location']['objectType'] = 'place'
      lat, lng = loc.get('latitude'), loc.get('longitude')
      if lat and lng:
        try:
          obj['location'].update({
            'latitude': float(lat),
            'longitude': float(lng),
          })
        except ValueError:
          logging.warn(
            'Could not convert latitude/longitude (%s, %s) to decimal', lat, lng)

  if as_type == 'activity':
    objects = []
    for target in itertools.chain.from_iterable(
        props.get(field, []) for field in (
          'follow-of', 'like', 'like-of', 'repost', 'repost-of', 'in-reply-to',
          'invitee')):
      t = json_to_object(target) if isinstance(target, dict) else {'url': target}
      # eliminate duplicates from redundant backcompat properties
      if t not in objects:
        objects.append(t)
    obj.update({
      'object': objects[0] if len(objects) == 1 else objects,
      'actor': author,
    })
    if as_verb == 'tag':
      obj['target'] = {'url': prop['tag-of']}
      if obj.get('object'):
        raise NotImplementedError(
          'Combined in-reply-to and tag-of is not yet supported.')
      obj['object'] = obj.pop('tags')
  else:
    obj.update({
      'inReplyTo': [{'url': url} for url in in_reply_tos],
      'author': author,
    })

  return source.Source.postprocess_object(obj)

Example #12

0

Show file

File: microformats2.py Project: snarfed/granary

def object_to_json(obj, trim_nulls=True, entry_class='h-entry',
                   default_object_type=None, synthesize_content=True):
  """Converts an ActivityStreams object to microformats2 JSON.

  Args:
    obj: dict, a decoded JSON ActivityStreams object
    trim_nulls: boolean, whether to remove elements with null or empty values
    entry_class: string or sequence, the mf2 class(es) that entries should be
      given (e.g. 'h-cite' when parsing a reference to a foreign entry).
      defaults to 'h-entry'
    default_object_type: string, the ActivityStreams objectType to use if one
      is not present. defaults to None
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns:
    dict, decoded microformats2 JSON
  """
  if not obj or not isinstance(obj, dict):
    return {}

  obj_type = source.object_type(obj) or default_object_type
  # if the activity type is a post, then it's really just a conduit
  # for the object. for other verbs, the activity itself is the
  # interesting thing
  if obj_type == 'post':
    primary = obj.get('object', {})
    obj_type = source.object_type(primary) or default_object_type
  else:
    primary = obj

  # TODO: extract snippet
  name = primary.get('displayName', primary.get('title'))
  summary = primary.get('summary')
  author = obj.get('author', obj.get('actor', {}))

  in_reply_tos = obj.get('inReplyTo', obj.get('context', {}).get('inReplyTo', []))
  is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe')
  if (is_rsvp or obj_type == 'react') and obj.get('object'):
    objs = obj['object']
    in_reply_tos.extend(objs if isinstance(objs, list) else [objs])

  # maps objectType to list of objects
  attachments = defaultdict(list)
  for prop in 'attachments', 'tags':
    for elem in get_list(primary, prop):
      attachments[elem.get('objectType')].append(elem)

  # construct mf2!
  ret = {
    'type': (AS_TO_MF2_TYPE.get(obj_type) or
             [entry_class] if isinstance(entry_class, basestring)
             else list(entry_class)),
    'properties': {
      'uid': [obj.get('id') or ''],
      'numeric-id': [obj.get('numeric_id') or ''],
      'name': [name],
      'nickname': [obj.get('username') or ''],
      'summary': [summary],
      'url': (list(object_urls(obj) or object_urls(primary)) +
              obj.get('upstreamDuplicates', [])),
      # photo is special cased below, to handle alt
      'video': dedupe_urls(get_urls(attachments, 'video', 'stream') +
                           get_urls(primary, 'stream')),
      'audio': get_urls(attachments, 'audio', 'stream'),
      'published': [obj.get('published', primary.get('published', ''))],
      'updated': [obj.get('updated', primary.get('updated', ''))],
      'content': [{
          'value': xml.sax.saxutils.unescape(primary.get('content', '')),
          'html': render_content(primary, include_location=False,
                                 synthesize_content=synthesize_content),
      }],
      'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]),
      'author': [object_to_json(
        author, trim_nulls=False, default_object_type='person')],
      'location': [object_to_json(
        primary.get('location', {}), trim_nulls=False,
        default_object_type='place')],
      'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite')
                  for c in obj.get('replies', {}).get('items', [])],
      'start': [primary.get('startTime')],
      'end': [primary.get('endTime')],
    },
    'children': (
      # silly hack: i haven't found anywhere in AS1 or AS2 to indicate that
      # something is being "quoted," like in a quote tweet, so i cheat and use
      # extra knowledge here that quoted tweets are converted to note
      # attachments, but URLs in the tweet text are converted to article tags.
      [object_to_json(a, trim_nulls=False, entry_class=['u-quotation-of', 'h-cite'])
       for a in attachments['note'] if 'startIndex' not in a] +
      [object_to_json(a, trim_nulls=False, entry_class=['h-cite'])
       for a in attachments['article'] if 'startIndex' not in a])
  }

  # photos, including alt text
  photo_urls = set()
  ret['properties']['photo'] = []
  for image in get_list(attachments, 'image') + [primary]:
    for url in get_urls(image, 'image'):
      if url and url not in photo_urls:
        photo_urls.add(url)
        name = get_first(image, 'image', {}).get('displayName')
        ret['properties']['photo'].append({'value': url, 'alt': name} if name else url)

  # hashtags and person tags
  if obj_type == 'tag':
    ret['properties']['tag-of'] = util.get_urls(obj, 'target')

  tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', [])
  if not tags and obj_type == 'tag':
    tags = util.get_list(obj, 'object')
  ret['properties']['category'] = []
  for tag in tags:
    if tag.get('objectType') == 'person':
      ret['properties']['category'].append(
        object_to_json(tag, entry_class='u-category h-card'))
    elif tag.get('objectType') == 'hashtag' or obj_type == 'tag':
      name = tag.get('displayName')
      if name:
        ret['properties']['category'].append(name)

  # rsvp
  if is_rsvp:
    ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]]
  elif obj_type == 'invite':
    invitee = object_to_json(obj.get('object'), trim_nulls=False,
                             default_object_type='person')
    ret['properties']['invitee'] = [invitee]

  # like and repost mentions
  for type, prop in (
      ('favorite', 'like'),
      ('follow', 'follow'),
      ('like', 'like'),
      ('share', 'repost'),
  ):
    if obj_type == type:
      # The ActivityStreams spec says the object property should always be a
      # single object, but it's useful to let it be a list, e.g. when a like has
      # multiple targets, e.g. a like of a post with original post URLs in it,
      # which brid.gy does.
      objs = get_list(obj, 'object')
      ret['properties'][prop + '-of'] = [
        # flatten contexts that are just a url
        o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType'])
        else object_to_json(o, trim_nulls=False, entry_class='h-cite')
        for o in objs]
    else:
      # received likes and reposts
      ret['properties'][prop] = [
        object_to_json(t, trim_nulls=False, entry_class='h-cite')
        for t in tags if source.object_type(t) == type]

  # latitude & longitude
  lat = long = None
  position = ISO_6709_RE.match(primary.get('position') or '')
  if position:
    lat, long = position.groups()
  if not lat:
    lat = primary.get('latitude')
  if not long:
    long = primary.get('longitude')

  if lat:
    ret['properties']['latitude'] = [str(lat)]
  if long:
    ret['properties']['longitude'] = [str(long)]

  if trim_nulls:
    ret = util.trim_nulls(ret)
  return ret

Example #13

0

Show file

 def image_url(obj):
     return util.get_first(obj, 'image', {}).get('url')

Example #14

0

Show file

def json_to_object(mf2, actor=None, fetch_mf2=False):
  """Converts a single microformats2 JSON item to an ActivityStreams object.

  Supports h-entry, h-event, h-card, and other single item times. Does *not* yet
  support h-feed.

  Args:
    mf2: dict, decoded JSON microformats2 object
    actor: optional author AS actor object. usually comes from a rel="author"
      link. if mf2 has its own author, that will override this.
    fetch_mf2: boolean, whether to fetch additional pages via HTTP if necessary,
      e.g. to determine authorship: https://indieweb.org/authorship

  Returns:
    dict, ActivityStreams object
  """
  if not mf2 or not isinstance(mf2, dict):
    return {}

  mf2 = copy.copy(mf2)
  props = mf2.setdefault('properties', {})
  prop = first_props(props)
  rsvp = prop.get('rsvp')

  # convert author
  mf2_author = prop.get('author')
  if mf2_author and isinstance(mf2_author, dict):
    author = json_to_object(mf2_author)
  else:
    # the author h-card may be on another page. run full authorship algorithm:
    # https://indieweb.org/authorship
    author = mf2util.find_author({'items': [mf2]}, hentry=mf2,
                                 fetch_mf2_func=util.fetch_mf2 if fetch_mf2 else None)
    if author:
      author = {
        'objectType': 'person',
        'url': author.get('url'),
        'displayName': author.get('name'),
        'image': [{'url': author.get('photo')}],
      }

  if not author:
    author = actor

  mf2_types = mf2.get('type') or []
  if 'h-geo' in mf2_types or 'p-location' in mf2_types:
    mf2_type = 'location'
  elif 'tag-of' in props:
    # TODO: remove once this is in mf2util
    # https://github.com/kylewm/mf2util/issues/18
    mf2_type = 'tag'
  elif 'follow-of' in props: # ditto
    mf2_type = 'follow'
  else:
    # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type
    # *is* a photo. so, special case photo type to fall through to underlying
    # mf2 type without photo.
    # https://github.com/snarfed/bridgy/issues/702
    without_photo = copy.deepcopy(mf2)
    without_photo.get('properties', {}).pop('photo', None)
    mf2_type = mf2util.post_type_discovery(without_photo)

  as_type, as_verb = MF2_TO_AS_TYPE_VERB.get(mf2_type, (None, None))
  if rsvp:
    as_verb = 'rsvp-%s' % rsvp

  # special case GitHub issues that are in-reply-to the repo or its issues URL
  in_reply_tos = get_string_urls(props.get('in-reply-to', []))
  for url in in_reply_tos:
    if re.match(r'^https?://github.com/[^/]+/[^/]+(/issues)?/?$', url):
      as_type = 'issue'

  def is_absolute(url):
    """Filter out relative and invalid URLs (mf2py gives absolute urls)."""
    return urllib.parse.urlparse(url).netloc

  urls = props.get('url') and get_string_urls(props.get('url'))

  # quotations: https://indieweb.org/quotation#How_to_markup
  attachments = [
    json_to_object(quote)
    for quote in mf2.get('children', []) + props.get('quotation-of', [])
    if isinstance(quote, dict) and 'h-cite' in set(quote.get('type', []))]

  # audio and video
  #
  # the duration mf2 property is still emerging. examples in the wild use both
  # integer seconds and ISO 8601 durations.
  # https://indieweb.org/duration
  # https://en.wikipedia.org/wiki/ISO_8601#Durations
  duration = prop.get('duration') or prop.get('length')
  if duration:
    if util.is_int(duration):
      duration = int(duration)
    else:
      parsed = util.parse_iso8601_duration(duration)
      if parsed:
        duration = int(parsed.total_seconds())
      else:
        logging.debug('Unknown format for length or duration %r', duration)
        duration = None


  stream = None
  bytes = size_to_bytes(prop.get('size'))
  for type in 'audio', 'video':
    atts = [{
      'objectType': type,
      'stream': {
        'url': url,
        # integer seconds: http://activitystrea.ms/specs/json/1.0/#media-link
        'duration': duration,
        # file size in bytes. nonstandard, not in AS1 or AS2
        'size': bytes,
      },
    } for url in get_string_urls(props.get(type, []))]
    attachments.extend(atts)
    if atts:
      stream = atts[0]['stream']

  obj = {
    'id': prop.get('uid'),
    'objectType': as_type,
    'verb': as_verb,
    'published': prop.get('published', ''),
    'updated': prop.get('updated', ''),
    'startTime': prop.get('start'),
    'endTime': prop.get('end'),
    'displayName': get_text(prop.get('name')),
    'summary': get_text(prop.get('summary')),
    'content': get_html(prop.get('content')),
    'url': urls[0] if urls else None,
    'urls': [{'value': u} for u in urls] if urls and len(urls) > 1 else None,
    # image is special cased below, to handle alt
    'stream': [stream],
    'location': json_to_object(prop.get('location')),
    'replies': {'items': [json_to_object(c) for c in props.get('comment', [])]},
    'tags': [{'objectType': 'hashtag', 'displayName': cat}
             if isinstance(cat, str)
             else json_to_object(cat)
             for cat in props.get('category', [])],
    'attachments': attachments,
  }

  # images, including alt text
  photo_urls = set()
  obj['image'] = []
  for photo in props.get('photo', []) + props.get('featured', []):
    url = photo
    alt = None
    if isinstance(photo, dict):
      photo = photo.get('properties') or photo
      url = get_first(photo, 'value') or get_first(photo, 'url')
      alt = get_first(photo, 'alt')
    if url and url not in photo_urls and is_absolute(url):
      photo_urls.add(url)
      obj['image'].append({'url': url, 'displayName': alt})

  # mf2util uses the indieweb/mf2 location algorithm to collect location properties.
  interpreted = mf2util.interpret({'items': [mf2]}, None)
  if interpreted:
    loc = interpreted.get('location')
    if loc:
      obj['location']['objectType'] = 'place'
      lat, lng = loc.get('latitude'), loc.get('longitude')
      if lat and lng:
        try:
          obj['location'].update({
            'latitude': float(lat),
            'longitude': float(lng),
          })
        except ValueError:
          logging.debug(
            'Could not convert latitude/longitude (%s, %s) to decimal', lat, lng)

  if as_type == 'activity':
    objects = []
    for target in itertools.chain.from_iterable(
        props.get(field, []) for field in (
          'follow-of', 'like', 'like-of', 'repost', 'repost-of', 'in-reply-to',
          'invitee')):
      t = json_to_object(target) if isinstance(target, dict) else {'url': target}
      # eliminate duplicates from redundant backcompat properties
      if t not in objects:
        objects.append(t)
    obj.update({
      'object': objects[0] if len(objects) == 1 else objects,
      'actor': author,
    })
    if as_verb == 'tag':
      obj['target'] = {'url': prop['tag-of']}
      if obj.get('object'):
        raise NotImplementedError(
          'Combined in-reply-to and tag-of is not yet supported.')
      obj['object'] = obj.pop('tags')
  else:
    obj.update({
      'inReplyTo': [{'url': url} for url in in_reply_tos],
      'author': author,
    })

  return source.Source.postprocess_object(obj)

Example #15

0

Show file

def object_to_json(obj,
                   trim_nulls=True,
                   entry_class='h-entry',
                   default_object_type=None,
                   synthesize_content=True):
    """Converts an ActivityStreams object to microformats2 JSON.

  Args:
    obj: dict, a decoded JSON ActivityStreams object
    trim_nulls: boolean, whether to remove elements with null or empty values
    entry_class: string, the mf2 class that entries should be given (e.g.
      'h-cite' when parsing a reference to a foreign entry). defaults to
      'h-entry'
    default_object_type: string, the ActivityStreams objectType to use if one
      is not present. defaults to None
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns: dict, decoded microformats2 JSON
  """
    if not obj:
        return {}

    obj_type = source.object_type(obj) or default_object_type
    # if the activity type is a post, then it's really just a conduit
    # for the object. for other verbs, the activity itself is the
    # interesting thing
    if obj_type == 'post':
        primary = obj.get('object', {})
        obj_type = source.object_type(primary) or default_object_type
    else:
        primary = obj

    # TODO: extract snippet
    name = primary.get('displayName', primary.get('title'))
    summary = primary.get('summary')
    author = obj.get('author', obj.get('actor', {}))

    in_reply_tos = obj.get('inReplyTo',
                           obj.get('context', {}).get('inReplyTo', []))
    is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe')
    if (is_rsvp or obj_type == 'react') and obj.get('object'):
        objs = obj['object']
        in_reply_tos.extend(objs if isinstance(objs, list) else [objs])

    # TODO: more tags. most will be p-category?
    ret = {
        'type':
        (['h-card'] if obj_type == 'person' else
         ['h-card', 'p-location'] if obj_type == 'place' else [entry_class]),
        'properties': {
            'uid': [obj.get('id', '')],
            'name': [name],
            'summary': [summary],
            'url': (list(object_urls(obj) or object_urls(primary)) +
                    obj.get('upstreamDuplicates', [])),
            'photo': [
                image.get('url')
                for image in (util.get_list(obj, 'image')
                              or util.get_list(primary, 'image'))
            ],
            'video': [obj.get('stream', primary.get('stream', {})).get('url')],
            'published': [obj.get('published', primary.get('published', ''))],
            'updated': [obj.get('updated', primary.get('updated', ''))],
            'content': [{
                'value':
                xml.sax.saxutils.unescape(primary.get('content', '')),
                'html':
                render_content(primary,
                               include_location=False,
                               synthesize_content=synthesize_content),
            }],
            'in-reply-to':
            util.trim_nulls([o.get('url') for o in in_reply_tos]),
            'author': [
                object_to_json(author,
                               trim_nulls=False,
                               default_object_type='person')
            ],
            'location': [
                object_to_json(primary.get('location', {}),
                               trim_nulls=False,
                               default_object_type='place')
            ],
            'latitude':
            primary.get('latitude'),
            'longitude':
            primary.get('longitude'),
            'comment': [
                object_to_json(c, trim_nulls=False, entry_class='h-cite')
                for c in obj.get('replies', {}).get('items', [])
            ],
        },
        'children': [
            object_to_json(c, trim_nulls=False, entry_class='h-cite')
            for c in primary.get('attachments', [])
            if c.get('objectType') in ('note', 'article')
        ],
    }

    # hashtags and person tags
    tags = obj.get('tags', []) or util.get_first(obj, 'object', {}).get(
        'tags', [])
    ret['properties']['category'] = []
    for tag in tags:
        if tag.get('objectType') == 'person':
            cls = 'u-category h-card'
        elif tag.get('objectType') == 'hashtag':
            cls = 'u-category'
        else:
            continue
        ret['properties']['category'].append(
            object_to_json(tag, entry_class=cls))

    # rsvp
    if is_rsvp:
        ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]]
    elif obj_type == 'invite':
        invitee = object_to_json(obj.get('object'),
                                 trim_nulls=False,
                                 default_object_type='person')
        ret['properties']['invitee'] = [invitee]

    # like and repost mentions
    for type, prop in ('like', 'like'), ('share', 'repost'):
        if obj_type == type:
            # The ActivityStreams spec says the object property should always be a
            # single object, but it's useful to let it be a list, e.g. when a like has
            # multiple targets, e.g. a like of a post with original post URLs in it,
            # which brid.gy does.
            objs = util.get_list(obj, 'object')
            ret['properties'][prop + '-of'] = [
                # flatten contexts that are just a url
                o['url']
                if 'url' in o and set(o.keys()) <= set(['url', 'objectType'])
                else object_to_json(o, trim_nulls=False, entry_class='h-cite')
                for o in objs
            ]
        else:
            # received likes and reposts
            ret['properties'][prop] = [
                object_to_json(t, trim_nulls=False, entry_class='h-cite')
                for t in tags if source.object_type(t) == type
            ]

    if trim_nulls:
        ret = util.trim_nulls(ret)
    return ret

Example #16

0

Show file

File: atom.py Project: stan-alam/granary

def _prepare_activity(a, reader=True):
    """Preprocesses an activity to prepare it to be rendered as Atom.

  Modifies a in place.

  Args:
    a: ActivityStreams 1 activity dict
    reader: boolean, whether the output will be rendered in a feed reader.
      Currently just includes location if True, not otherwise.
  """
    act_type = source.object_type(a)
    if not act_type or act_type == 'post':
        primary = a.get('object', {})
    else:
        primary = a
    obj = a.setdefault('object', {})

    # Render content as HTML; escape &s
    obj['rendered_content'] = _encode_ampersands(
        microformats2.render_content(primary,
                                     include_location=reader,
                                     render_attachments=True))

    # Make sure every activity has the title field, since Atom <entry> requires
    # the title element.
    if not a.get('title'):
        a['title'] = util.ellipsize(
            _encode_ampersands(
                a.get('displayName') or a.get('content') or obj.get('title')
                or obj.get('displayName') or obj.get('content') or 'Untitled'))

    # strip HTML tags. the Atom spec says title is plain text:
    # http://atomenabled.org/developers/syndication/#requiredEntryElements
    a['title'] = xml.sax.saxutils.escape(
        BeautifulSoup(a['title']).get_text(''))

    children = []
    image_urls_seen = set()
    image_atts = []

    # normalize attachments, render attached notes/articles
    attachments = a.get('attachments') or obj.get('attachments') or []
    for att in attachments:
        att['stream'] = util.get_first(att, 'stream')
        type = att.get('objectType')

        if type == 'image':
            image_atts.append(util.get_first(att, 'image'))
            continue

        image_urls_seen |= set(util.get_urls(att, 'image'))
        if type in ('note', 'article'):
            html = microformats2.render_content(att,
                                                include_location=reader,
                                                render_attachments=True)
            author = att.get('author')
            if author:
                name = microformats2.maybe_linked_name(
                    microformats2.object_to_json(author).get('properties', []))
                html = '%s: %s' % (name.strip(), html)
            children.append(html)

    # render image(s) that we haven't already seen
    for image in image_atts + util.get_list(obj, 'image'):
        if not image:
            continue
        url = image.get('url')
        parsed = urllib.parse.urlparse(url)
        rest = urllib.parse.urlunparse(('', '') + parsed[2:])
        img_src_re = re.compile(
            r"""src *= *['"] *((https?:)?//%s)?%s *['"]""" %
            (re.escape(parsed.netloc), re.escape(rest)))
        if (url and url not in image_urls_seen
                and not img_src_re.search(obj['rendered_content'])):
            children.append(microformats2.img(url))
            image_urls_seen.add(url)

    obj['rendered_children'] = [
        _encode_ampersands(child) for child in children
    ]

    # make sure published and updated are strict RFC 3339 timestamps
    for prop in 'published', 'updated':
        val = obj.get(prop)
        if val:
            obj[prop] = util.maybe_iso8601_to_rfc3339(val)
            # Atom timestamps are even stricter than RFC 3339: they can't be naive ie
            # time zone unaware. They must have either an offset or the Z suffix.
            # https://www.feedvalidator.org/docs/error/InvalidRFC3339Date.html
            if not util.TIMEZONE_OFFSET_RE.search(obj[prop]):
                obj[prop] += 'Z'

Example #17

0

Show file

def activities_to_jsonfeed(activities, actor=None, title=None, feed_url=None,
                           home_page_url=None):
  """Converts ActivityStreams activities to a JSON feed.

  Args:
    activities: sequence of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed title
    home_page_url: string, the home page URL
    feed_url: the URL of the JSON Feed, if any. Included in the feed_url field.

  Returns:
    dict, JSON Feed data, ready to be JSON-encoded
  """
  try:
    iter(activities)
  except TypeError:
    raise TypeError('activities must be iterable')

  if isinstance(activities, (dict, basestring)):
    raise TypeError('activities may not be a dict or string')

  def image_url(obj):
    return util.get_first(obj, 'image', {}).get('url')

  def actor_name(obj):
    return obj.get('displayName') or obj.get('username')

  if not actor:
    actor = {}

  items = []
  for activity in activities:
    obj = activity.get('object') or activity
    if obj.get('objectType') == 'person':
      continue
    author = obj.get('author', {})
    content = obj.get('content')
    obj_title = obj.get('title') or obj.get('displayName')
    item = {
      'id': obj.get('id') or obj.get('url'),
      'url': obj.get('url'),
      'image': image_url(obj),
      'title': obj_title if mf2util.is_name_a_title(obj_title, content) else None,
      'summary': obj.get('summary'),
      'content_html': content,
      'date_published': obj.get('published'),
      'date_modified': obj.get('updated'),
      'author': {
        'name': actor_name(author),
        'url': author.get('url'),
        'avatar': image_url(author),
      },
      'attachments': [],
    }

    for att in obj.get('attachments', []):
      url = (util.get_first(att, 'stream') or util.get_first(att, 'image') or att
            ).get('url')
      mime = mimetypes.guess_type(url)[0] if url else None
      if (att.get('objectType') in ATTACHMENT_TYPES or
          mime and mime.split('/')[0] in ATTACHMENT_TYPES):
        item['attachments'].append({
          'url': url or '',
          'mime_type': mime,
          'title': att.get('title'),
        })

    if not item['content_html']:
      item['content_text'] = ''
    items.append(item)

  return util.trim_nulls({
    'version': 'https://jsonfeed.org/version/1',
    'title': title or actor_name(actor) or 'JSON Feed',
    'feed_url': feed_url,
    'home_page_url': home_page_url or actor.get('url'),
    'author': {
      'name': actor_name(actor),
      'url': actor.get('url'),
      'avatar': image_url(actor),
    },
    'items': items,
  }, ignore='content_text')

Example #18

0

Show file

File: rss.py Project: whyouare111/granary

def from_activities(activities,
                    actor=None,
                    title=None,
                    feed_url=None,
                    home_page_url=None,
                    hfeed=None):
    """Converts ActivityStreams activities to an RSS 2.0 feed.

  Args:
    activities: sequence of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed title
    feed_url: string, the URL for this RSS feed
    home_page_url: string, the home page URL
    hfeed: dict, parsed mf2 h-feed, if available

  Returns:
    unicode string with RSS 2.0 XML
  """
    try:
        iter(activities)
    except TypeError:
        raise TypeError('activities must be iterable')

    if isinstance(activities, (dict, str)):
        raise TypeError('activities may not be a dict or string')

    fg = FeedGenerator()
    fg.id(feed_url)
    assert feed_url
    fg.link(href=feed_url, rel='self')
    if home_page_url:
        fg.link(href=home_page_url, rel='alternate')
    # TODO: parse language from lang attribute:
    # https://github.com/microformats/mf2py/issues/150
    fg.language('en')
    fg.generator('granary', uri='https://granary.io/')

    hfeed = hfeed or {}
    actor = actor or {}
    image = (util.get_url(hfeed.get('properties', {}), 'photo')
             or util.get_url(actor, 'image'))
    if image:
        fg.image(image)

    props = hfeed.get('properties') or {}
    content = microformats2.get_text(util.get_first(props, 'content', ''))
    summary = util.get_first(props, 'summary', '')
    desc = content or summary or '-'
    fg.description(desc)  # required
    fg.title(title or util.ellipsize(desc))  # required

    latest = None
    feed_has_enclosure = False
    for activity in activities:
        obj = activity.get('object') or activity
        if obj.get('objectType') == 'person':
            continue

        item = fg.add_entry()
        url = obj.get('url')
        id = obj.get('id') or url
        item.id(id)
        item.link(href=url)
        item.guid(url, permalink=True)

        # title (required)
        title = (obj.get('title') or obj.get('displayName')
                 or util.ellipsize(obj.get('content', '-')))
        # strip HTML tags
        title = util.parse_html(title).get_text('').strip()
        item.title(title)

        content = microformats2.render_content(obj,
                                               include_location=True,
                                               render_attachments=True,
                                               render_image=True)
        if not content:
            content = obj.get('summary')
        if content:
            item.content(content, type='CDATA')

        categories = [
            {
                'term': t['displayName']
            } for t in obj.get('tags', [])
            if t.get('displayName') and t.get('verb') not in ('like', 'react',
                                                              'share')
            and t.get('objectType') not in ('article', 'person', 'mention')
        ]
        item.category(categories)

        author = obj.get('author', {})
        author = {
            'name': author.get('displayName') or author.get('username'),
            'uri': author.get('url'),
            'email': author.get('email') or '-',
        }
        item.author(author)

        published = obj.get('published') or obj.get('updated')
        if published and isinstance(published, str):
            try:
                dt = mf2util.parse_datetime(published)
                if not isinstance(dt, datetime):
                    dt = datetime.combine(dt, time.min)
                if not dt.tzinfo:
                    dt = dt.replace(tzinfo=util.UTC)
                item.published(dt)
                if not latest or dt > latest:
                    latest = dt
            except ValueError:  # bad datetime string
                pass

        item_has_enclosure = False
        for att in obj.get('attachments', []):
            stream = util.get_first(att, 'stream') or att
            if not stream:
                continue

            url = stream.get('url') or ''
            mime = mimetypes.guess_type(url)[0] or ''
            if (att.get('objectType') in ENCLOSURE_TYPES
                    or mime and mime.split('/')[0] in ENCLOSURE_TYPES):
                if item_has_enclosure:
                    logging.info(
                        'Warning: item %s already has an RSS enclosure, skipping additional enclosure %s',
                        id, url)
                    continue

                item_has_enclosure = feed_has_enclosure = True
                item.enclosure(url=url,
                               type=mime,
                               length=str(stream.get('size', '')))
                item.load_extension('podcast')
                duration = stream.get('duration')
                if duration:
                    item.podcast.itunes_duration(duration)

    if feed_has_enclosure:
        fg.load_extension('podcast')
        fg.podcast.itunes_author(
            actor.get('displayName') or actor.get('username'))
        if summary:
            fg.podcast.itunes_summary(summary)
        fg.podcast.itunes_explicit('no')
        fg.podcast.itunes_block(False)
        name = author.get('name')
        if name:
            fg.podcast.itunes_author(name)
        if image:
            fg.podcast.itunes_image(image)
        fg.podcast.itunes_category(categories)

    if latest:
        fg.lastBuildDate(latest)

    return fg.rss_str(pretty=True).decode('utf-8')

Example #19

0

Show file

  def _create(self, obj, preview=None, include_link=source.OMIT_LINK,
              ignore_formatting=False):
    """Creates or previews creating a tweet, reply tweet, retweet, or favorite.

    https://dev.twitter.com/docs/api/1.1/post/statuses/update
    https://dev.twitter.com/docs/api/1.1/post/statuses/retweet/:id
    https://dev.twitter.com/docs/api/1.1/post/favorites/create

    Args:
      obj: ActivityStreams object
      preview: boolean
      include_link: string
      ignore_formatting: boolean

    Returns:
      a CreationResult

      If preview is True, the content will be a unicode string HTML
      snippet. If False, it will be a dict with 'id' and 'url' keys
      for the newly created Twitter object.
    """
    assert preview in (False, True)
    type = obj.get('objectType')
    verb = obj.get('verb')

    base_obj = self.base_object(obj)
    base_id = base_obj.get('id')
    base_url = base_obj.get('url')

    is_reply = type == 'comment' or 'inReplyTo' in obj
    image_urls = [image.get('url') for image in util.get_list(obj, 'image')]
    video_url = util.get_first(obj, 'stream', {}).get('url')
    has_media = (image_urls or video_url) and (type in ('note', 'article') or is_reply)
    lat = obj.get('location', {}).get('latitude')
    lng = obj.get('location', {}).get('longitude')

    # prefer displayName over content for articles
    type = obj.get('objectType')
    base_url = self.base_object(obj).get('url')
    prefer_content = type == 'note' or (base_url and (type == 'comment'
                                                      or obj.get('inReplyTo')))
    content = self._content_for_create(obj, ignore_formatting=ignore_formatting,
                                       prefer_name=not prefer_content,
                                       strip_first_video_tag=bool(video_url))
    if not content:
      if type == 'activity':
        content = verb
      elif has_media:
        content = ''
      else:
        return source.creation_result(
          abort=False,  # keep looking for things to publish,
          error_plain='No content text found.',
          error_html='No content text found.')

    if is_reply and base_url:
      # Twitter *used* to require replies to include an @-mention of the
      # original tweet's author
      # https://dev.twitter.com/docs/api/1.1/post/statuses/update#api-param-in_reply_to_status_id
      # ...but now we use the auto_populate_reply_metadata query param instead:
      # https://dev.twitter.com/overview/api/upcoming-changes-to-tweets

      # the embed URL in the preview can't start with mobile. or www., so just
      # hard-code it to twitter.com. index #1 is netloc.
      parsed = urlparse.urlparse(base_url)
      parts = parsed.path.split('/')
      if len(parts) < 2 or not parts[1]:
        raise ValueError('Could not determine author of in-reply-to URL %s' % base_url)
      reply_to_prefix = '@%s ' % parts[1].lower()
      if content.lower().startswith(reply_to_prefix):
        content = content[len(reply_to_prefix):]

      parsed = list(parsed)
      parsed[1] = self.DOMAIN
      base_url = urlparse.urlunparse(parsed)

    # need a base_url with the tweet id for the embed HTML below. do this
    # *after* checking the real base_url for in-reply-to author username.
    if base_id and not base_url:
      base_url = 'https://twitter.com/-/statuses/' + base_id

    if is_reply and not base_url:
      return source.creation_result(
        abort=True,
        error_plain='Could not find a tweet to reply to.',
        error_html='Could not find a tweet to <a href="http://indiewebcamp.com/reply">reply to</a>. '
        'Check that your post has an <a href="http://indiewebcamp.com/comment">in-reply-to</a> '
        'link a Twitter URL or to an original post that publishes a '
        '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.')

    # truncate and ellipsize content if it's over the character
    # count. URLs will be t.co-wrapped, so include that when counting.
    content = self._truncate(
      content, obj.get('url'), include_link, type)

    # linkify defaults to Twitter's link shortening behavior
    preview_content = util.linkify(content, pretty=True, skip_bare_cc_tlds=True)

    if type == 'activity' and verb == 'like':
      if not base_url:
        return source.creation_result(
          abort=True,
          error_plain='Could not find a tweet to like.',
          error_html='Could not find a tweet to <a href="http://indiewebcamp.com/favorite">favorite</a>. '
          'Check that your post has a like-of link to a Twitter URL or to an original post that publishes a '
          '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.')

      if preview:
        return source.creation_result(
          description='<span class="verb">favorite</span> <a href="%s">'
                      'this tweet</a>:\n%s' % (base_url, self.embed_post(base_obj)))
      else:
        data = urllib.urlencode({'id': base_id})
        self.urlopen(API_POST_FAVORITE, data=data)
        resp = {'type': 'like'}

    elif type == 'activity' and verb == 'share':
      if not base_url:
        return source.creation_result(
          abort=True,
          error_plain='Could not find a tweet to retweet.',
          error_html='Could not find a tweet to <a href="http://indiewebcamp.com/repost">retweet</a>. '
          'Check that your post has a repost-of link to a Twitter URL or to an original post that publishes a '
          '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.')

      if preview:
        return source.creation_result(
          description='<span class="verb">retweet</span> <a href="%s">'
                      'this tweet</a>:\n%s' % (base_url, self.embed_post(base_obj)))
      else:
        data = urllib.urlencode({'id': base_id})
        resp = self.urlopen(API_POST_RETWEET % base_id, data=data)
        resp['type'] = 'repost'

    elif type in ('note', 'article') or is_reply:  # a tweet
      content = unicode(content).encode('utf-8')
      data = {'status': content}

      if is_reply:
        description = \
          '<span class="verb">@-reply</span> to <a href="%s">this tweet</a>:\n%s' % (
            base_url, self.embed_post(base_obj))
        data.update({
          'in_reply_to_status_id': base_id,
          'auto_populate_reply_metadata': 'true',
        })
      else:
        description = '<span class="verb">tweet</span>:'

      if video_url:
        preview_content += ('<br /><br /><video controls src="%s"><a href="%s">'
                            'this video</a></video>' % (video_url, video_url))
        if not preview:
          ret = self.upload_video(video_url)
          if isinstance(ret, source.CreationResult):
            return ret
          data['media_ids'] = ret

      elif image_urls:
        num_urls = len(image_urls)
        if num_urls > MAX_MEDIA:
          image_urls = image_urls[:MAX_MEDIA]
          logging.warning('Found %d photos! Only using the first %d: %r',
                          num_urls, MAX_MEDIA, image_urls)
        preview_content += '<br /><br />' + ' &nbsp; '.join(
          '<img src="%s" />' % url for url in image_urls)
        if not preview:
          ret = self.upload_images(image_urls)
          if isinstance(ret, source.CreationResult):
            return ret
          data['media_ids'] = ','.join(ret)

      if lat and lng:
        preview_content += (
          '<div>at <a href="https://maps.google.com/maps?q=%s,%s">'
          '%s, %s</a></div>' % (lat, lng, lat, lng))
        data['lat'] = lat
        data['long'] = lng

      if preview:
        return source.creation_result(content=preview_content, description=description)
      else:
        resp = self.urlopen(API_POST_TWEET, data=urllib.urlencode(data))
        resp['type'] = 'comment' if is_reply else 'post'

    elif (verb and verb.startswith('rsvp-')) or verb == 'invite':
      return source.creation_result(
        abort=True,
        error_plain='Cannot publish RSVPs to Twitter.',
        error_html='This looks like an <a href="http://indiewebcamp.com/rsvp">RSVP</a>. '
        'Publishing events or RSVPs to Twitter is not supported.')

    else:
      return source.creation_result(
        abort=False,
        error_plain='Cannot publish type=%s, verb=%s to Twitter' % (type, verb),
        error_html='Cannot publish type=%s, verb=%s to Twitter' % (type, verb))

    id_str = resp.get('id_str')
    if id_str:
      resp.update({'id': id_str, 'url': self.tweet_url(resp)})
    elif 'url' not in resp:
      resp['url'] = base_url

    return source.creation_result(resp)

Example #20

0

Show file

    def _create(self,
                obj,
                preview,
                include_link=source.OMIT_LINK,
                ignore_formatting=False):
        """Creates or previews creating for the previous two methods.

    https://www.flickr.com/services/api/upload.api.html
    https://www.flickr.com/services/api/flickr.photos.comments.addComment.html
    https://www.flickr.com/services/api/flickr.favorites.add.html
    https://www.flickr.com/services/api/flickr.photos.people.add.html

    Args:
      obj: ActivityStreams object
      preview: boolean
      include_link: string
      ignore_formatting: boolean

    Return:
      a CreationResult
    """
        # photo, comment, or like
        type = source.object_type(obj)
        logging.debug('publishing object type %s to Flickr', type)
        link_text = '(Originally published at: %s)' % obj.get('url')

        image_url = util.get_first(obj, 'image', {}).get('url')
        video_url = util.get_first(obj, 'stream', {}).get('url')
        content = self._content_for_create(
            obj,
            ignore_formatting=ignore_formatting,
            strip_first_video_tag=bool(video_url))

        if (video_url or image_url) and type in ('note', 'article'):
            name = obj.get('displayName')
            people = self._get_person_tags(obj)
            hashtags = [
                t.get('displayName') for t in obj.get('tags', [])
                if t.get('objectType') == 'hashtag' and t.get('displayName')
            ]
            lat = obj.get('location', {}).get('latitude')
            lng = obj.get('location', {}).get('longitude')

            # if name does not represent an explicit title, then we'll just
            # use it as the title and wipe out the content
            if name and content and not mf2util.is_name_a_title(name, content):
                name = content
                content = None

            # add original post link
            if include_link == source.INCLUDE_LINK:
                content = ((content + '\n\n') if content else '') + link_text

            if preview:
                preview_content = ''
                if name:
                    preview_content += '<h4>%s</h4>' % name
                if content:
                    preview_content += '<div>%s</div>' % content
                if hashtags:
                    preview_content += '<div> %s</div>' % ' '.join(
                        '#' + t for t in hashtags)
                if people:
                    preview_content += '<div> with %s</div>' % ', '.join(
                        ('<a href="%s">%s</a>' %
                         (p.get('url'), p.get('displayName')
                          or 'User %s' % p.get('id')) for p in people))
                if lat and lng:
                    preview_content += '<div> at <a href="https://maps.google.com/maps?q=%s,%s">%s, %s</a></div>' % (
                        lat, lng, lat, lng)

                if video_url:
                    preview_content += (
                        '<video controls src="%s"><a href="%s">this video'
                        '</a></video>' % (video_url, video_url))
                else:
                    preview_content += '<img src="%s" />' % image_url

                return source.creation_result(content=preview_content,
                                              description='post')

            params = []
            if name:
                params.append(('title', name))
            if content:
                params.append(('description', content.encode('utf-8')))
            if hashtags:
                params.append(('tags', ','.join(
                    ('"%s"' % t if ' ' in t else t).encode('utf-8')
                    for t in hashtags)))

            file = util.urlopen(video_url or image_url)
            try:
                resp = self.upload(params, file)
            except requests.exceptions.ConnectionError as e:
                if e.args[0].message.startswith(
                        'Request exceeds 10 MiB limit'):
                    msg = 'Sorry, photos and videos must be under 10MB.'
                    return source.creation_result(error_plain=msg,
                                                  error_html=msg)
                else:
                    raise

            photo_id = resp.get('id')
            resp.update({
                'type':
                'post',
                'url':
                self.photo_url(self.path_alias() or self.user_id(), photo_id),
            })
            if video_url:
                resp['granary_message'] = \
                  "Note that videos take time to process before they're visible."

            # add person tags
            for person_id in sorted(p.get('id') for p in people):
                self.call_api_method('flickr.photos.people.add', {
                    'photo_id': photo_id,
                    'user_id': person_id,
                })

            # add location
            if lat and lng:
                self.call_api_method('flickr.photos.geo.setLocation', {
                    'photo_id': photo_id,
                    'lat': lat,
                    'lon': lng,
                })

            return source.creation_result(resp)

        base_obj = self.base_object(obj)
        base_id = base_obj.get('id')
        base_url = base_obj.get('url')

        # maybe a comment on a flickr photo?
        if type == 'comment' or obj.get('inReplyTo'):
            if not base_id:
                return source.creation_result(
                    abort=True,
                    error_plain='Could not find a photo to comment on.',
                    error_html=
                    'Could not find a photo to <a href="http://indiewebcamp.com/reply">comment on</a>. '
                    'Check that your post has an <a href="http://indiewebcamp.com/comment">in-reply-to</a> '
                    'link to a Flickr photo or to an original post that publishes a '
                    '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Flickr.'
                )

            if include_link == source.INCLUDE_LINK:
                content += '\n\n' + link_text
            if preview:
                return source.creation_result(
                    content=content,
                    description='comment on <a href="%s">this photo</a>.' %
                    base_url)

            resp = self.call_api_method(
                'flickr.photos.comments.addComment', {
                    'photo_id': base_id,
                    'comment_text': content.encode('utf-8'),
                })
            resp = resp.get('comment', {})
            resp.update({
                'type': 'comment',
                'url': resp.get('permalink'),
            })
            return source.creation_result(resp)

        if type == 'like':
            if not base_id:
                return source.creation_result(
                    abort=True,
                    error_plain='Could not find a photo to favorite.',
                    error_html=
                    'Could not find a photo to <a href="http://indiewebcamp.com/like">favorite</a>. '
                    'Check that your post has an <a href="http://indiewebcamp.com/like">like-of</a> '
                    'link to a Flickr photo or to an original post that publishes a '
                    '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Flickr.'
                )
            if preview:
                return source.creation_result(
                    description='favorite <a href="%s">this photo</a>.' %
                    base_url)

            # this method doesn't return any data
            self.call_api_method('flickr.favorites.add', {
                'photo_id': base_id,
            })
            # TODO should we canonicalize the base_url (e.g. removing trailing path
            # info like "/in/contacts/")
            return source.creation_result({
                'type':
                'like',
                'url':
                '%s#favorited-by-%s' % (base_url, self.user_id()),
            })

        return source.creation_result(
            abort=False,
            error_plain='Cannot publish type=%s to Flickr.' % type,
            error_html='Cannot publish type=%s to Flickr.' % type)

Example #21

0

Show file

File: atom.py Project: snarfed/granary

def _prepare_activity(a, reader=True):
  """Preprocesses an activity to prepare it to be rendered as Atom.

  Modifies a in place.

  Args:
    a: ActivityStreams 1 activity dict
    reader: boolean, whether the output will be rendered in a feed reader.
      Currently just includes location if True, not otherwise.
  """
  act_type = source.object_type(a)
  obj = util.get_first(a, 'object', default={})
  primary = obj if (not act_type or act_type == 'post') else a

  # Render content as HTML; escape &s
  obj['rendered_content'] = _encode_ampersands(microformats2.render_content(
    primary, include_location=reader, render_attachments=True))

  # Make sure every activity has the title field, since Atom <entry> requires
  # the title element.
  if not a.get('title'):
    a['title'] = util.ellipsize(_encode_ampersands(
      a.get('displayName') or a.get('content') or obj.get('title') or
      obj.get('displayName') or obj.get('content') or 'Untitled'))

  # strip HTML tags. the Atom spec says title is plain text:
  # http://atomenabled.org/developers/syndication/#requiredEntryElements
  a['title'] = xml.sax.saxutils.escape(BeautifulSoup(a['title']).get_text(''))

  children = []
  image_urls_seen = set()
  image_atts = []

  # normalize actor images
  for elem in a, obj:
    actor = elem.get('actor')
    if actor:
      actor['image'] = util.get_first(actor, 'image')

  # normalize attachments, render attached notes/articles
  attachments = a.get('attachments') or obj.get('attachments') or []
  for att in attachments:
    att['stream'] = util.get_first(att, 'stream')
    type = att.get('objectType')

    if type == 'image':
      att['image'] = util.get_first(att, 'image')
      image_atts.append(att['image'])
      continue

    image_urls_seen |= set(util.get_urls(att, 'image'))
    if type in ('note', 'article'):
      html = microformats2.render_content(att, include_location=reader,
                                          render_attachments=True)
      author = att.get('author')
      if author:
        name = microformats2.maybe_linked_name(
          microformats2.object_to_json(author).get('properties') or {})
        html = '%s: %s' % (name.strip(), html)
      children.append(html)

  # render image(s) that we haven't already seen
  for image in image_atts + util.get_list(obj, 'image'):
    if not image:
      continue
    url = image.get('url')
    parsed = urllib.parse.urlparse(url)
    rest = urllib.parse.urlunparse(('', '') + parsed[2:])
    img_src_re = re.compile(r"""src *= *['"] *((https?:)?//%s)?%s *['"]""" %
                            (re.escape(parsed.netloc), re.escape(rest)))
    if (url and url not in image_urls_seen and
        not img_src_re.search(obj['rendered_content'])):
      children.append(microformats2.img(url))
      image_urls_seen.add(url)

  obj['rendered_children'] = [_encode_ampersands(child) for child in children]

  # make sure published and updated are strict RFC 3339 timestamps
  for prop in 'published', 'updated':
    val = obj.get(prop)
    if val:
      obj[prop] = util.maybe_iso8601_to_rfc3339(val)
      # Atom timestamps are even stricter than RFC 3339: they can't be naive ie
      # time zone unaware. They must have either an offset or the Z suffix.
      # https://www.feedvalidator.org/docs/error/InvalidRFC3339Date.html
      if not util.TIMEZONE_OFFSET_RE.search(obj[prop]):
        obj[prop] += 'Z'

Example #22

0

Show file

File: webmention.py Project: omphalos/bridgy-fed

    def try_activitypub(self):
        source = util.get_required_param(self, 'source')

        # fetch source page, convert to ActivityStreams
        source_resp = common.requests_get(source)
        source_url = source_resp.url or source
        source_mf2 = mf2py.parse(source_resp.text, url=source_url)
        # logging.debug('Parsed mf2 for %s: %s', source_resp.url, json.dumps(source_mf2, indent=2))

        entry = mf2util.find_first_entry(source_mf2, ['h-entry'])
        logging.info('First entry: %s', json.dumps(entry, indent=2))
        # make sure it has url, since we use that for AS2 id, which is required
        # for ActivityPub.
        props = entry.setdefault('properties', {})
        if not props.get('url'):
            props['url'] = [source_url]

        source_obj = microformats2.json_to_object(entry, fetch_mf2=True)
        logging.info('Converted to AS: %s', json.dumps(source_obj, indent=2))

        # fetch target page as AS object. target is first in-reply-to, like-of,
        # or repost-of, *not* target query param.)
        target = util.get_url(util.get_first(source_obj, 'inReplyTo') or
                              util.get_first(source_obj, 'object'))
        if not target:
            common.error(self, 'No u-in-reply-to, u-like-of, or u-repost-of '
                         'found in %s' % source_url)

        try:
            target_resp = common.get_as2(target)
        except (requests.HTTPError, exc.HTTPBadGateway) as e:
            if (e.response.status_code // 100 == 2 and
                common.content_type(e.response).startswith('text/html')):
                self.resp = Response.get_or_create(
                    source=source_url, target=e.response.url or target,
                    direction='out', source_mf2=json.dumps(source_mf2))
                return self.send_salmon(source_obj, target_resp=e.response)
            raise

        target_url = target_resp.url or target
        self.resp = Response.get_or_create(
            source=source_url, target=target_url, direction='out',
            protocol='activitypub', source_mf2=json.dumps(source_mf2))

        # find actor's inbox
        target_obj = target_resp.json()
        inbox_url = target_obj.get('inbox')

        if not inbox_url:
            # TODO: test actor/attributedTo and not, with/without inbox
            actor = target_obj.get('actor') or target_obj.get('attributedTo')
            if isinstance(actor, dict):
                inbox_url = actor.get('inbox')
                actor = actor.get('url')
            if not inbox_url and not actor:
                common.error(self, 'Target object has no actor or attributedTo URL')

        if not inbox_url:
            # fetch actor as AS object
            actor = common.get_as2(actor).json()
            inbox_url = actor.get('inbox')

        if not inbox_url:
            # TODO: probably need a way to save errors like this so that we can
            # return them if ostatus fails too.
            # common.error(self, 'Target actor has no inbox')
            return self.send_salmon(source_obj, target_resp=target_resp)

        # convert to AS2
        source_domain = urlparse.urlparse(source_url).netloc
        key = MagicKey.get_or_create(source_domain)
        source_activity = common.postprocess_as2(
            as2.from_as1(source_obj), target=target_obj, key=key)

        if self.resp.status == 'complete':
            source_activity['type'] = 'Update'

        # prepare HTTP Signature (required by Mastodon)
        # https://w3c.github.io/activitypub/#authorization-lds
        # https://tools.ietf.org/html/draft-cavage-http-signatures-07
        # https://github.com/tootsuite/mastodon/issues/4906#issuecomment-328844846
        acct = 'acct:%s@%s' % (source_domain, source_domain)
        auth = HTTPSignatureAuth(secret=key.private_pem(), key_id=acct,
                                 algorithm='rsa-sha256')

        # deliver source object to target actor's inbox.
        headers = {
            'Content-Type': common.CONTENT_TYPE_AS2,
            # required for HTTP Signature
            # https://tools.ietf.org/html/draft-cavage-http-signatures-07#section-2.1.3
            'Date': datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT'),
        }
        inbox_url = urlparse.urljoin(target_url, inbox_url)
        resp = common.requests_post(inbox_url, json=source_activity, auth=auth,
                                    headers=headers)
        self.response.status_int = resp.status_code
        if resp.status_code == 202:
            self.response.write('202 response! If this is Mastodon 1.x, their '
                                'signature verification probably failed. :(\n')
        self.response.write(resp.text)

Example #23

0

Show file

def render_content(obj, include_location=True, synthesize_content=True):
  """Renders the content of an ActivityStreams object.

  Includes tags, mentions, and non-note/article attachments. (Note/article
  attachments are converted to mf2 children in object_to_json and then rendered
  in json_to_html.)

  Args:
    obj: decoded JSON ActivityStreams object
    include_location: whether to render location, if provided
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns:
    string, rendered HTML
  """
  content = obj.get('content', '')

  # extract tags. preserve order but de-dupe, ie don't include a tag more than
  # once.
  seen_ids = set()
  mentions = []
  tags = {}  # maps string objectType to list of tag objects
  for t in obj.get('tags', []):
    id = t.get('id')
    if id and id in seen_ids:
      continue
    seen_ids.add(id)

    if 'startIndex' in t and 'length' in t:
      mentions.append(t)
    else:
      tags.setdefault(source.object_type(t), []).append(t)

  # linkify embedded mention tags inside content.
  if mentions:
    mentions.sort(key=lambda t: t['startIndex'])
    last_end = 0
    orig = content
    content = ''
    for tag in mentions:
      start = tag['startIndex']
      end = start + tag['length']
      content += orig[last_end:start]
      content += '<a href="%s">%s</a>' % (tag['url'], orig[start:end])
      last_end = end

    content += orig[last_end:]

  # convert newlines to <br>s
  # do this *after* linkifying tags so we don't have to shuffle indices over
  content = content.replace('\n', '<br />\n')

  # linkify embedded links. ignore the "mention" tags that we added ourselves.
  # TODO: fix the bug in test_linkify_broken() in webutil/util_test.py, then
  # uncomment this.
  # if content:
  #   content = util.linkify(content)

  # attachments, e.g. links (aka articles)
  # TODO: use oEmbed? http://oembed.com/ , http://code.google.com/p/python-oembed/
  attachments = [a for a in obj.get('attachments', [])
                 if a.get('objectType') not in ('note', 'article')]

  for tag in attachments + tags.pop('article', []):
    name = tag.get('displayName', '')
    open_a_tag = False
    if tag.get('objectType') == 'video':
      video = util.get_first(tag, 'stream') or util.get_first(obj, 'stream')
      poster = util.get_first(tag, 'image', {})
      if video and video.get('url'):
        content += '\n<p>%s' % vid(video['url'], poster.get('url'), 'thumbnail')
    else:
      content += '\n<p>'
      url = tag.get('url') or obj.get('url')
      if url:
        content += '\n<a class="link" href="%s">' % url
        open_a_tag = True
      image = util.get_first(tag, 'image') or util.get_first(obj, 'image')
      if image and image.get('url'):
        content += '\n' + img(image['url'], 'thumbnail', name)
    if name:
      content += '\n<span class="name">%s</span>' % name
    if open_a_tag:
      content += '\n</a>'
    summary = tag.get('summary')
    if summary and summary != name:
      content += '\n<span class="summary">%s</span>' % summary
    content += '\n</p>'

  # generate share/like contexts if the activity does not have content
  # of its own
  for as_type, verb in [('share', 'Shared'), ('like', 'Likes')]:
    obj_type = source.object_type(obj)
    if (not synthesize_content or obj_type != as_type or 'object' not in obj or
        'content' in obj):
      continue

    targets = util.get_list(obj, 'object')
    if not targets:
      continue

    for target in targets:
      # sometimes likes don't have enough content to render anything
      # interesting
      if 'url' in target and set(target) <= set(['url', 'objectType']):
        content += '<a href="%s">%s this.</a>' % (
          target.get('url'), verb.lower())

      else:
        author = target.get('author', target.get('actor', {}))
        # special case for twitter RT's
        if obj_type == 'share' and 'url' in obj and re.search(
                '^https?://(?:www\.|mobile\.)?twitter\.com/', obj.get('url')):
          content += 'RT <a href="%s">@%s</a> ' % (
            target.get('url', '#'), author.get('username'))
        else:
          # image looks bad in the simplified rendering
          author = {k: v for k, v in author.iteritems() if k != 'image'}
          content += '%s <a href="%s">%s</a> by %s' % (
            verb, target.get('url', '#'),
            target.get('displayName', target.get('title', 'a post')),
            hcard_to_html(object_to_json(author, default_object_type='person')),
          )
        content += render_content(target, include_location=include_location,
                                  synthesize_content=synthesize_content)
      # only include the first context in the content (if there are
      # others, they'll be included as separate properties)
      break
    break

  # location
  loc = obj.get('location')
  if include_location and loc:
    content += '\n' + hcard_to_html(
      object_to_json(loc, default_object_type='place'),
      parent_props=['p-location'])

  # these are rendered manually in json_to_html()
  for type in 'like', 'share', 'react', 'person':
    tags.pop(type, None)

  # render the rest
  content += tags_to_html(tags.pop('hashtag', []), 'p-category')
  content += tags_to_html(tags.pop('mention', []), 'u-mention')
  content += tags_to_html(sum(tags.values(), []), 'tag')

  return content

Example #24

0

Show file

File: microformats2.py Project: cacimatti/granary

def object_to_json(obj,
                   trim_nulls=True,
                   entry_class='h-entry',
                   default_object_type=None,
                   synthesize_content=True):
    """Converts an ActivityStreams object to microformats2 JSON.

  Args:
    obj: dict, a decoded JSON ActivityStreams object
    trim_nulls: boolean, whether to remove elements with null or empty values
    entry_class: string or sequence, the mf2 class(es) that entries should be
      given (e.g. 'h-cite' when parsing a reference to a foreign entry).
      defaults to 'h-entry'
    default_object_type: string, the ActivityStreams objectType to use if one
      is not present. defaults to None
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns:
    dict, decoded microformats2 JSON
  """
    if not obj or not isinstance(obj, dict):
        return {}

    obj_type = source.object_type(obj) or default_object_type
    # if the activity type is a post, then it's really just a conduit
    # for the object. for other verbs, the activity itself is the
    # interesting thing
    if obj_type == 'post':
        primary = obj.get('object', {})
        obj_type = source.object_type(primary) or default_object_type
    else:
        primary = obj

    # TODO: extract snippet
    name = primary.get('displayName', primary.get('title'))
    summary = primary.get('summary')
    author = obj.get('author', obj.get('actor', {}))

    in_reply_tos = obj.get('inReplyTo',
                           obj.get('context', {}).get('inReplyTo', []))
    is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe')
    if (is_rsvp or obj_type == 'react') and obj.get('object'):
        objs = obj['object']
        in_reply_tos.extend(objs if isinstance(objs, list) else [objs])

    # maps objectType to list of objects
    attachments = defaultdict(list)
    for prop in 'attachments', 'tags':
        for elem in get_list(primary, prop):
            attachments[elem.get('objectType')].append(elem)

    # construct mf2!
    ret = {
        'type': (AS_TO_MF2_TYPE.get(obj_type) or [entry_class] if isinstance(
            entry_class, basestring) else list(entry_class)),
        'properties': {
            'uid': [obj.get('id') or ''],
            'numeric-id': [obj.get('numeric_id') or ''],
            'name': [name],
            'nickname': [obj.get('username') or ''],
            'summary': [summary],
            'url': (list(object_urls(obj) or object_urls(primary)) +
                    obj.get('upstreamDuplicates', [])),
            'photo':
            dedupe_urls(
                get_urls(attachments, 'image', 'image') +
                get_urls(primary, 'image')),
            'video':
            dedupe_urls(
                get_urls(attachments, 'video', 'stream') +
                get_urls(primary, 'stream')),
            'audio':
            get_urls(attachments, 'audio', 'stream'),
            'published': [obj.get('published', primary.get('published', ''))],
            'updated': [obj.get('updated', primary.get('updated', ''))],
            'content': [{
                'value':
                xml.sax.saxutils.unescape(primary.get('content', '')),
                'html':
                render_content(primary,
                               include_location=False,
                               synthesize_content=synthesize_content),
            }],
            'in-reply-to':
            util.trim_nulls([o.get('url') for o in in_reply_tos]),
            'author': [
                object_to_json(author,
                               trim_nulls=False,
                               default_object_type='person')
            ],
            'location': [
                object_to_json(primary.get('location', {}),
                               trim_nulls=False,
                               default_object_type='place')
            ],
            'comment': [
                object_to_json(c, trim_nulls=False, entry_class='h-cite')
                for c in obj.get('replies', {}).get('items', [])
            ],
            'start': [primary.get('startTime')],
            'end': [primary.get('endTime')],
        },
        'children': [
            object_to_json(a,
                           trim_nulls=False,
                           entry_class=['u-quotation-of', 'h-cite'])
            for a in attachments['note'] + attachments['article']
        ]
    }

    # hashtags and person tags
    tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', [])
    ret['properties']['category'] = []
    for tag in tags:
        if tag.get('objectType') == 'person':
            ret['properties']['category'].append(
                object_to_json(tag, entry_class='u-category h-card'))
        elif tag.get('objectType') == 'hashtag':
            name = tag.get('displayName')
            if name:
                ret['properties']['category'].append(name)

    # rsvp
    if is_rsvp:
        ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]]
    elif obj_type == 'invite':
        invitee = object_to_json(obj.get('object'),
                                 trim_nulls=False,
                                 default_object_type='person')
        ret['properties']['invitee'] = [invitee]

    # like and repost mentions
    for type, prop in ('favorite', 'like'), ('like', 'like'), ('share',
                                                               'repost'):
        if obj_type == type:
            # The ActivityStreams spec says the object property should always be a
            # single object, but it's useful to let it be a list, e.g. when a like has
            # multiple targets, e.g. a like of a post with original post URLs in it,
            # which brid.gy does.
            objs = get_list(obj, 'object')
            ret['properties'][prop + '-of'] = [
                # flatten contexts that are just a url
                o['url']
                if 'url' in o and set(o.keys()) <= set(['url', 'objectType'])
                else object_to_json(o, trim_nulls=False, entry_class='h-cite')
                for o in objs
            ]
        else:
            # received likes and reposts
            ret['properties'][prop] = [
                object_to_json(t, trim_nulls=False, entry_class='h-cite')
                for t in tags if source.object_type(t) == type
            ]

    # latitude & longitude
    lat = long = None
    position = ISO_6709_RE.match(primary.get('position') or '')
    if position:
        lat, long = position.groups()
    if not lat:
        lat = primary.get('latitude')
    if not long:
        long = primary.get('longitude')

    if lat:
        ret['properties']['latitude'] = [str(lat)]
    if long:
        ret['properties']['longitude'] = [str(long)]

    if trim_nulls:
        ret = util.trim_nulls(ret)
    return ret

Example #25

0

Show file

def render_content(obj, include_location=True, synthesize_content=True):
    """Renders the content of an ActivityStreams object.

  Includes tags, mentions, and non-note/article attachments. (Note/article
  attachments are converted to mf2 children in object_to_json and then rendered
  in json_to_html.)

  Args:
    obj: decoded JSON ActivityStreams object
    include_location: whether to render location, if provided
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns: string, rendered HTML
  """
    content = obj.get('content', '')

    # extract tags. preserve order but de-dupe, ie don't include a tag more than
    # once.
    seen_ids = set()
    mentions = []
    tags = {}  # maps string objectType to list of tag objects
    for t in obj.get('tags', []):
        id = t.get('id')
        if id and id in seen_ids:
            continue
        seen_ids.add(id)

        if 'startIndex' in t and 'length' in t:
            mentions.append(t)
        else:
            tags.setdefault(source.object_type(t), []).append(t)

    # linkify embedded mention tags inside content.
    if mentions:
        mentions.sort(key=lambda t: t['startIndex'])
        last_end = 0
        orig = content
        content = ''
        for tag in mentions:
            start = tag['startIndex']
            end = start + tag['length']
            content += orig[last_end:start]
            content += '<a href="%s">%s</a>' % (tag['url'], orig[start:end])
            last_end = end

        content += orig[last_end:]

    # convert newlines to <br>s
    # do this *after* linkifying tags so we don't have to shuffle indices over
    content = content.replace('\n', '<br />\n')

    # linkify embedded links. ignore the "mention" tags that we added ourselves.
    # TODO: fix the bug in test_linkify_broken() in webutil/util_test.py, then
    # uncomment this.
    # if content:
    #   content = util.linkify(content)

    # attachments, e.g. links (aka articles)
    # TODO: use oEmbed? http://oembed.com/ , http://code.google.com/p/python-oembed/
    attachments = [
        a for a in obj.get('attachments', [])
        if a.get('objectType') not in ('note', 'article')
    ]

    for tag in attachments + tags.pop('article', []):
        name = tag.get('displayName', '')
        open_a_tag = False
        if tag.get('objectType') == 'video':
            video = util.get_first(tag, 'stream') or util.get_first(
                obj, 'stream')
            poster = util.get_first(tag, 'image', {})
            if video and video.get('url'):
                content += '\n<p>%s' % vid(video['url'], poster.get('url'),
                                           'thumbnail')
        else:
            content += '\n<p>'
            url = tag.get('url') or obj.get('url')
            if url:
                content += '\n<a class="link" href="%s">' % url
                open_a_tag = True
            image = util.get_first(tag, 'image') or util.get_first(
                obj, 'image')
            if image and image.get('url'):
                content += '\n' + img(image['url'], 'thumbnail', name)
        if name:
            content += '\n<span class="name">%s</span>' % name
        if open_a_tag:
            content += '\n</a>'
        summary = tag.get('summary')
        if summary and summary != name:
            content += '\n<span class="summary">%s</span>' % summary
        content += '\n</p>'

    # generate share/like contexts if the activity does not have content
    # of its own
    for as_type, verb in [('share', 'Shared'), ('like', 'Likes')]:
        obj_type = source.object_type(obj)
        if (not synthesize_content or obj_type != as_type
                or 'object' not in obj or 'content' in obj):
            continue

        targets = util.get_list(obj, 'object')
        if not targets:
            continue

        for target in targets:
            # sometimes likes don't have enough content to render anything
            # interesting
            if 'url' in target and set(target) <= set(['url', 'objectType']):
                content += '<a href="%s">%s this.</a>' % (target.get('url'),
                                                          verb.lower())

            else:
                author = target.get('author', target.get('actor', {}))
                # special case for twitter RT's
                if obj_type == 'share' and 'url' in obj and re.search(
                        '^https?://(?:www\.|mobile\.)?twitter\.com/',
                        obj.get('url')):
                    content += 'RT <a href="%s">@%s</a> ' % (target.get(
                        'url', '#'), author.get('username'))
                else:
                    # image looks bad in the simplified rendering
                    author = {
                        k: v
                        for k, v in author.iteritems() if k != 'image'
                    }
                    content += '%s <a href="%s">%s</a> by %s' % (
                        verb,
                        target.get('url', '#'),
                        target.get('displayName', target.get(
                            'title', 'a post')),
                        hcard_to_html(
                            object_to_json(author,
                                           default_object_type='person')),
                    )
                content += render_content(
                    target,
                    include_location=include_location,
                    synthesize_content=synthesize_content)
            # only include the first context in the content (if there are
            # others, they'll be included as separate properties)
            break
        break

    # location
    loc = obj.get('location')
    if include_location and loc:
        content += '\n' + hcard_to_html(object_to_json(
            loc, default_object_type='place'),
                                        parent_props=['p-location'])

    # these are rendered manually in json_to_html()
    for type in 'like', 'share', 'react', 'person':
        tags.pop(type, None)

    # render the rest
    content += tags_to_html(tags.pop('hashtag', []), 'p-category')
    content += tags_to_html(tags.pop('mention', []), 'u-mention')
    content += tags_to_html(sum(tags.values(), []), 'tag')

    return content

Example #26

0

Show file

def object_to_json(obj, trim_nulls=True, entry_class='h-entry',
                   default_object_type=None, synthesize_content=True):
  """Converts an ActivityStreams object to microformats2 JSON.

  Args:
    obj: dict, a decoded JSON ActivityStreams object
    trim_nulls: boolean, whether to remove elements with null or empty values
    entry_class: string, the mf2 class that entries should be given (e.g.
      'h-cite' when parsing a reference to a foreign entry). defaults to
      'h-entry'
    default_object_type: string, the ActivityStreams objectType to use if one
      is not present. defaults to None
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns:
    dict, decoded microformats2 JSON
  """
  if not obj or not isinstance(obj, dict):
    return {}

  obj_type = source.object_type(obj) or default_object_type
  # if the activity type is a post, then it's really just a conduit
  # for the object. for other verbs, the activity itself is the
  # interesting thing
  if obj_type == 'post':
    primary = obj.get('object', {})
    obj_type = source.object_type(primary) or default_object_type
  else:
    primary = obj

  # TODO: extract snippet
  name = primary.get('displayName', primary.get('title'))
  summary = primary.get('summary')
  author = obj.get('author', obj.get('actor', {}))

  in_reply_tos = obj.get(
    'inReplyTo', obj.get('context', {}).get('inReplyTo', []))
  is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe')
  if (is_rsvp or obj_type == 'react') and obj.get('object'):
    objs = obj['object']
    in_reply_tos.extend(objs if isinstance(objs, list) else [objs])

  # TODO: more tags. most will be p-category?
  ret = {
    'type': (['h-card'] if obj_type == 'person'
             else ['h-card', 'p-location'] if obj_type == 'place'
             else [entry_class]),
    'properties': {
      'uid': [obj.get('id', '')],
      'name': [name],
      'summary': [summary],
      'url': (list(object_urls(obj) or object_urls(primary)) +
              obj.get('upstreamDuplicates', [])),
      'photo': [image.get('url') for image in
                (util.get_list(obj, 'image') or util.get_list(primary, 'image'))],
      'video': [obj.get('stream', primary.get('stream', {})).get('url')],
      'published': [obj.get('published', primary.get('published', ''))],
      'updated': [obj.get('updated', primary.get('updated', ''))],
      'content': [{
          'value': xml.sax.saxutils.unescape(primary.get('content', '')),
          'html': render_content(primary, include_location=False,
                                 synthesize_content=synthesize_content),
      }],
      'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]),
      'author': [object_to_json(
        author, trim_nulls=False, default_object_type='person')],
      'location': [object_to_json(
        primary.get('location', {}), trim_nulls=False,
        default_object_type='place')],
      'latitude': primary.get('latitude'),
      'longitude': primary.get('longitude'),
      'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite')
                  for c in obj.get('replies', {}).get('items', [])],
    },
    'children': [object_to_json(c, trim_nulls=False, entry_class='h-cite')
                 for c in primary.get('attachments', [])
                 if c.get('objectType') in ('note', 'article')],
  }

  # hashtags and person tags
  tags = obj.get('tags', []) or util.get_first(obj, 'object', {}).get('tags', [])
  ret['properties']['category'] = []
  for tag in tags:
    if tag.get('objectType') == 'person':
      cls = 'u-category h-card'
    elif tag.get('objectType') == 'hashtag':
      cls = 'u-category'
    else:
      continue
    ret['properties']['category'].append(object_to_json(tag, entry_class=cls))

  # rsvp
  if is_rsvp:
    ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]]
  elif obj_type == 'invite':
    invitee = object_to_json(obj.get('object'), trim_nulls=False,
                             default_object_type='person')
    ret['properties']['invitee'] = [invitee]

  # like and repost mentions
  for type, prop in ('like', 'like'), ('share', 'repost'):
    if obj_type == type:
      # The ActivityStreams spec says the object property should always be a
      # single object, but it's useful to let it be a list, e.g. when a like has
      # multiple targets, e.g. a like of a post with original post URLs in it,
      # which brid.gy does.
      objs = util.get_list(obj, 'object')
      ret['properties'][prop + '-of'] = [
        # flatten contexts that are just a url
        o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType'])
        else object_to_json(o, trim_nulls=False, entry_class='h-cite')
        for o in objs]
    else:
      # received likes and reposts
      ret['properties'][prop] = [
        object_to_json(t, trim_nulls=False, entry_class='h-cite')
        for t in tags if source.object_type(t) == type]

  if trim_nulls:
    ret = util.trim_nulls(ret)
  return ret

Example #27

0

Show file

def jsonfeed_to_activities(jsonfeed):
    """Converts a JSON feed to ActivityStreams activities and actor.

  Args:
    jsonfeed: dict, JSON Feed data

  Returns:
    (activities, actor) tuple, where activities and actor are both
    ActivityStreams object dicts

  Raises:
    ValueError, if jsonfeed isn't a valid JSON Feed dict
  """
    if not hasattr(jsonfeed, 'get'):
        raise ValueError('Expected dict (or compatible), got %s' %
                         jsonfeed.__class__.__name__)

    author = jsonfeed.get('author', {})
    actor = {
        'objectType': 'person',
        'url': author.get('url'),
        'image': [{
            'url': author.get('avatar')
        }],
        'displayName': author.get('name'),
    }

    def attachment(jf):
        if not hasattr(jf, 'get'):
            raise ValueError('Expected attachment to be dict; got %s' % jf)
        url = jf.get('url')
        type = jf.get('mime_type', '').split('/')[0]
        as1 = {
            'objectType': type,
            'title': jf.get('title'),
        }
        if type in ('audio', 'video'):
            as1['stream'] = {'url': url}
        else:
            as1['url'] = url
        return as1

    activities = [{
        'object': {
            'objectType':
            'article' if item.get('title') else 'note',
            'title':
            item.get('title'),
            'summary':
            item.get('summary'),
            'content':
            util.get_first(item, 'content_html')
            or util.get_first(item, 'content_text'),
            'id':
            str(item.get('id') or ''),
            'published':
            item.get('date_published'),
            'updated':
            item.get('date_modified'),
            'url':
            item.get('url'),
            'image': [{
                'url': item.get('image')
            }],
            'author': {
                'displayName': item.get('author', {}).get('name'),
                'image': [{
                    'url': item.get('author', {}).get('avatar')
                }]
            },
            'attachments':
            [attachment(a) for a in item.get('attachments', [])],
        }
    } for item in jsonfeed.get('items', [])]

    return (util.trim_nulls(activities), util.trim_nulls(actor))

Example #28

0

Show file

File: rss.py Project: snarfed/granary

def from_activities(activities, actor=None, title=None, feed_url=None,
                    home_page_url=None, hfeed=None):
  """Converts ActivityStreams activities to an RSS 2.0 feed.

  Args:
    activities: sequence of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed title
    feed_url: string, the URL for this RSS feed
    home_page_url: string, the home page URL
    hfeed: dict, parsed mf2 h-feed, if available

  Returns:
    unicode string with RSS 2.0 XML
  """
  try:
    iter(activities)
  except TypeError:
    raise TypeError('activities must be iterable')

  if isinstance(activities, (dict, basestring)):
    raise TypeError('activities may not be a dict or string')

  fg = FeedGenerator()
  fg.id(feed_url)
  assert feed_url
  fg.link(href=feed_url, rel='self')
  if home_page_url:
    fg.link(href=home_page_url, rel='alternate')
  # TODO: parse language from lang attribute:
  # https://github.com/microformats/mf2py/issues/150
  fg.language('en')
  fg.generator('granary', uri='https://granary.io/')

  hfeed = hfeed or {}
  actor = actor or {}
  image = util.get_url(hfeed, 'image') or util.get_url(actor, 'image')
  if image:
    fg.image(image)

  props = hfeed.get('properties') or {}
  content = microformats2.get_text(util.get_first(props, 'content', ''))
  summary = util.get_first(props, 'summary', '')
  desc = content or summary or '-'
  fg.description(desc)  # required
  fg.title(title or util.ellipsize(desc))  # required

  latest = None
  enclosures = False
  for activity in activities:
    obj = activity.get('object') or activity
    if obj.get('objectType') == 'person':
      continue

    item = fg.add_entry()
    url = obj.get('url')
    item.id(obj.get('id') or url)
    item.link(href=url)
    item.guid(url, permalink=True)

    item.title(obj.get('title') or obj.get('displayName') or '-')  # required
    content = microformats2.render_content(
      obj, include_location=True, render_attachments=False) or obj.get('summary')
    if content:
      item.content(content, type='CDATA')

    item.category(
      [{'term': t['displayName']} for t in obj.get('tags', [])
       if t.get('displayName') and t.get('verb') not in ('like', 'react', 'share')])

    author = obj.get('author', {})
    item.author({
      'name': author.get('displayName') or author.get('username'),
      'uri': author.get('url'),
    })

    published = obj.get('published') or obj.get('updated')
    if published:
      try:
        dt = mf2util.parse_datetime(published)
        if not isinstance(dt, datetime):
          dt = datetime.combine(dt, time.min)
        if not dt.tzinfo:
          dt = dt.replace(tzinfo=util.UTC)
        item.published(dt)
        if not latest or dt > latest:
          latest = dt
      except ValueError:  # bad datetime string
        pass


    for att in obj.get('attachments', []):
      stream = util.get_first(att, 'stream') or att
      if not stream:
        continue

      url = stream.get('url') or ''
      mime = mimetypes.guess_type(url)[0] or ''
      if (att.get('objectType') in ENCLOSURE_TYPES or
          mime and mime.split('/')[0] in ENCLOSURE_TYPES):
        enclosures = True
        item.enclosure(url=url, type=mime, length='REMOVEME') # TODO: length (bytes)

        item.load_extension('podcast')
        duration = stream.get('duration')
        if duration:
          item.podcast.itunes_duration(duration)

  if enclosures:
    fg.load_extension('podcast')
    fg.podcast.itunes_author(actor.get('displayName') or actor.get('username'))
    if summary:
      fg.podcast.itunes_summary(summary)
    fg.podcast.itunes_explicit('no')
    fg.podcast.itunes_block(False)

  if latest:
    fg.lastBuildDate(latest)

  return fg.rss_str(pretty=True).decode('utf-8').replace(' length="REMOVEME"', '')

Example #29

0

Show file

def activities_to_jsonfeed(activities,
                           actor=None,
                           title=None,
                           feed_url=None,
                           home_page_url=None):
    """Converts ActivityStreams activities to a JSON feed.

  Args:
    activities: sequence of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed title
    home_page_url: string, the home page URL
    feed_url: the URL of the JSON Feed, if any. Included in the feed_url field.

  Returns:
    dict, JSON Feed data, ready to be JSON-encoded
  """
    try:
        iter(activities)
    except TypeError:
        raise TypeError('activities must be iterable')

    if isinstance(activities, (dict, basestring)):
        raise TypeError('activities may not be a dict or string')

    def image_url(obj):
        return util.get_first(obj, 'image', {}).get('url')

    def actor_name(obj):
        return obj.get('displayName') or obj.get('username')

    if not actor:
        actor = {}

    items = []
    for activity in activities:
        obj = activity.get('object') or activity
        if obj.get('objectType') == 'person':
            continue
        author = obj.get('author', {})

        content = obj.get('content')
        # The JSON Feed spec (https://jsonfeed.org/version/1#items) says that the
        # URL from the "image" property may also appear in "content_html", in which
        # case it should be interpreted as the "main, featured image" of the
        # post. It does not specify the behavior or semantics in the case that the
        # image does *not* appear in "content_html", but currently at least one
        # feed reader (Feedbin) will not display the image as part of the post
        # content unless it is explicitly included in "content_html".
        if content and image_url(obj):
            content += HTML_IMAGE_TEMPLATE.format(image_url(obj))

        obj_title = obj.get('title') or obj.get('displayName')
        item = {
            'id': obj.get('id') or obj.get('url'),
            'url': obj.get('url'),
            'image': image_url(obj),
            'title':
            obj_title if mf2util.is_name_a_title(obj_title, content) else None,
            'summary': obj.get('summary'),
            'content_html': content,
            'date_published': obj.get('published'),
            'date_modified': obj.get('updated'),
            'author': {
                'name': actor_name(author),
                'url': author.get('url'),
                'avatar': image_url(author),
            },
            'attachments': [],
        }

        for att in obj.get('attachments', []):
            url = (util.get_first(att, 'stream')
                   or util.get_first(att, 'image') or att).get('url')
            mime = mimetypes.guess_type(url)[0] if url else None
            if (att.get('objectType') in ATTACHMENT_TYPES
                    or mime and mime.split('/')[0] in ATTACHMENT_TYPES):
                item['attachments'].append({
                    'url': url or '',
                    'mime_type': mime,
                    'title': att.get('title'),
                })

        if not item['content_html']:
            item['content_text'] = ''
        items.append(item)

    return util.trim_nulls(
        {
            'version': 'https://jsonfeed.org/version/1',
            'title': title or actor_name(actor) or 'JSON Feed',
            'feed_url': feed_url,
            'home_page_url': home_page_url or actor.get('url'),
            'author': {
                'name': actor_name(actor),
                'url': actor.get('url'),
                'avatar': image_url(actor),
            },
            'items': items,
        },
        ignore='content_text')

Example #30

0

Show file

File: jsonfeed.py Project: snarfed/granary

def activities_to_jsonfeed(activities, actor=None, title=None, feed_url=None,
                           home_page_url=None):
  """Converts ActivityStreams activities to a JSON feed.

  Args:
    activities: sequence of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed title
    home_page_url: string, the home page URL
    feed_url: the URL of the JSON Feed, if any. Included in the feed_url field.

  Returns:
    dict, JSON Feed data, ready to be JSON-encoded
  """
  try:
    iter(activities)
  except TypeError:
    raise TypeError('activities must be iterable')

  if isinstance(activities, (dict, basestring)):
    raise TypeError('activities may not be a dict or string')

  def image_url(obj):
    return util.get_first(obj, 'image', {}).get('url')

  def actor_name(obj):
    return obj.get('displayName') or obj.get('username')

  if not actor:
    actor = {}

  items = []
  for activity in activities:
    obj = activity.get('object') or activity
    if obj.get('objectType') == 'person':
      continue
    author = obj.get('author', {})
    content = microformats2.render_content(
            obj, include_location=True, render_attachments=True)
    obj_title = obj.get('title') or obj.get('displayName')
    item = {
      'id': obj.get('id') or obj.get('url'),
      'url': obj.get('url'),
      'image': image_url(obj),
      'title': obj_title if mf2util.is_name_a_title(obj_title, content) else None,
      'summary': obj.get('summary'),
      'content_html': content,
      'date_published': obj.get('published'),
      'date_modified': obj.get('updated'),
      'author': {
        'name': actor_name(author),
        'url': author.get('url'),
        'avatar': image_url(author),
      },
      'attachments': [],
    }

    for att in obj.get('attachments', []):
      url = (util.get_first(att, 'stream') or util.get_first(att, 'image') or att
            ).get('url')
      mime = mimetypes.guess_type(url)[0] if url else None
      if (att.get('objectType') in ATTACHMENT_TYPES or
          mime and mime.split('/')[0] in ATTACHMENT_TYPES):
        item['attachments'].append({
          'url': url or '',
          'mime_type': mime,
          'title': att.get('title'),
        })

    if not item['content_html']:
      item['content_text'] = ''
    items.append(item)

  return util.trim_nulls({
    'version': 'https://jsonfeed.org/version/1',
    'title': title or actor_name(actor) or 'JSON Feed',
    'feed_url': feed_url,
    'home_page_url': home_page_url or actor.get('url'),
    'author': {
      'name': actor_name(actor),
      'url': actor.get('url'),
      'avatar': image_url(actor),
    },
    'items': items,
  }, ignore='content_text')

Example #31

0

Show file

def object_to_json(obj, trim_nulls=True, entry_class='h-entry',
                   default_object_type=None, synthesize_content=True):
  """Converts an ActivityStreams object to microformats2 JSON.

  Args:
    obj: dict, a decoded JSON ActivityStreams object
    trim_nulls: boolean, whether to remove elements with null or empty values
    entry_class: string or sequence, the mf2 class(es) that entries should be
      given (e.g. 'h-cite' when parsing a reference to a foreign entry).
      defaults to 'h-entry'
    default_object_type: string, the ActivityStreams objectType to use if one
      is not present. defaults to None
    synthesize_content: whether to generate synthetic content if the object
      doesn't have its own, e.g. 'likes this.' or 'shared this.'

  Returns:
    dict, decoded microformats2 JSON
  """
  if not obj or not isinstance(obj, dict):
    return {}

  obj_type = source.object_type(obj) or default_object_type
  # if the activity type is a post, then it's really just a conduit
  # for the object. for other verbs, the activity itself is the
  # interesting thing
  if obj_type == 'post':
    primary = obj.get('object', {})
    obj_type = source.object_type(primary) or default_object_type
  else:
    primary = obj

  # TODO: extract snippet
  name = primary.get('displayName', primary.get('title'))
  summary = primary.get('summary')
  author = obj.get('author', obj.get('actor', {}))

  in_reply_tos = obj.get('inReplyTo') or []
  if not in_reply_tos:
    context = obj.get('context')
    if context and isinstance(context, dict):
      in_reply_tos = context.get('inReplyTo') or []

  is_rsvp = obj_type in ('rsvp-yes', 'rsvp-no', 'rsvp-maybe')
  if (is_rsvp or obj_type == 'react') and obj.get('object'):
    objs = obj['object']
    in_reply_tos.extend(objs if isinstance(objs, list) else [objs])

  # maps objectType to list of objects
  attachments = defaultdict(list)
  for prop in 'attachments', 'tags':
    for elem in get_list(primary, prop):
      attachments[elem.get('objectType')].append(elem)

  # prefer duration and size from object's stream, then first video, then first
  # audio
  stream = {}
  for candidate in [obj] + attachments['video'] + attachments['audio']:
    for stream in get_list(candidate, 'stream'):
      if stream:
        break

  duration = stream.get('duration')
  if duration is not None:
    if util.is_int(duration):
      duration = str(duration)
    else:
      logging('Ignoring duration %r; expected int, got %s', duration.__class__)
      duration = None

  sizes = []
  size = stream.get('size')
  if size:
    sizes = [str(size)]

  # construct mf2!
  ret = {
    'type': (AS_TO_MF2_TYPE.get(obj_type) or
             [entry_class] if isinstance(entry_class, str)
             else list(entry_class)),
    'properties': {
      'uid': [obj.get('id') or ''],
      'numeric-id': [obj.get('numeric_id') or ''],
      'name': [name],
      'nickname': [obj.get('username') or ''],
      'summary': [summary],
      'url': (list(object_urls(obj) or object_urls(primary)) +
              obj.get('upstreamDuplicates', [])),
      # photo is special cased below, to handle alt
      'video': dedupe_urls(get_urls(attachments, 'video', 'stream') +
                           get_urls(primary, 'stream')),
      'audio': get_urls(attachments, 'audio', 'stream'),
      'duration': [duration],
      'size': sizes,
      'published': [obj.get('published', primary.get('published', ''))],
      'updated': [obj.get('updated', primary.get('updated', ''))],
      'in-reply-to': util.trim_nulls([o.get('url') for o in in_reply_tos]),
      'author': [object_to_json(
        author, trim_nulls=False, default_object_type='person')],
      'location': [object_to_json(
        primary.get('location', {}), trim_nulls=False,
        default_object_type='place')],
      'comment': [object_to_json(c, trim_nulls=False, entry_class='h-cite')
                  for c in obj.get('replies', {}).get('items', [])],
      'start': [primary.get('startTime')],
      'end': [primary.get('endTime')],
    },
    'children': (
      # silly hack: i haven't found anywhere in AS1 or AS2 to indicate that
      # something is being "quoted," like in a quote tweet, so i cheat and use
      # extra knowledge here that quoted tweets are converted to note
      # attachments, but URLs in the tweet text are converted to article tags.
      [object_to_json(a, trim_nulls=False, entry_class=['u-quotation-of', 'h-cite'])
       for a in attachments['note'] if 'startIndex' not in a] +
      [object_to_json(a, trim_nulls=False, entry_class=['h-cite'])
       for a in attachments['article'] if 'startIndex' not in a])
  }

  # content. emulate e- vs p- microformats2 parsing: e- if there are HTML tags,
  # otherwise p-.
  # https://indiewebcamp.com/note#Indieweb_whitespace_thinking
  text = xml.sax.saxutils.unescape(primary.get('content', ''))
  html = render_content(primary, include_location=False,
                        synthesize_content=synthesize_content)
  if '<' in html:
    ret['properties']['content'] = [{'value': text, 'html': html}]
  else:
    ret['properties']['content'] = [text]

  # photos, including alt text
  photo_urls = set()
  ret['properties']['photo'] = []
  for image in get_list(attachments, 'image') + [primary]:
    for url in get_urls(image, 'image'):
      if url and url not in photo_urls:
        photo_urls.add(url)
        name = get_first(image, 'image', {}).get('displayName')
        ret['properties']['photo'].append({'value': url, 'alt': name} if name else url)

  # hashtags and person tags
  if obj_type == 'tag':
    ret['properties']['tag-of'] = util.get_urls(obj, 'target')

  tags = obj.get('tags', []) or get_first(obj, 'object', {}).get('tags', [])
  if not tags and obj_type == 'tag':
    tags = util.get_list(obj, 'object')
  ret['properties']['category'] = []
  for tag in tags:
    if tag.get('objectType') == 'person':
      ret['properties']['category'].append(
        object_to_json(tag, entry_class='u-category h-card'))
    elif tag.get('objectType') == 'hashtag' or obj_type == 'tag':
      name = tag.get('displayName')
      if name:
        ret['properties']['category'].append(name)

  # rsvp
  if is_rsvp:
    ret['properties']['rsvp'] = [obj_type[len('rsvp-'):]]
  elif obj_type == 'invite':
    invitee = object_to_json(obj.get('object'), trim_nulls=False,
                             default_object_type='person')
    ret['properties']['invitee'] = [invitee]

  # like and repost mentions
  for type, prop in (
      ('favorite', 'like'),
      ('follow', 'follow'),
      ('like', 'like'),
      ('share', 'repost'),
  ):
    if obj_type == type:
      # The ActivityStreams spec says the object property should always be a
      # single object, but it's useful to let it be a list, e.g. when a like has
      # multiple targets, e.g. a like of a post with original post URLs in it,
      # which brid.gy does.
      objs = get_list(obj, 'object')
      ret['properties'][prop + '-of'] = [
        # flatten contexts that are just a url
        o['url'] if 'url' in o and set(o.keys()) <= set(['url', 'objectType'])
        else object_to_json(o, trim_nulls=False, entry_class='h-cite')
        for o in objs]
    else:
      # received likes and reposts
      ret['properties'][prop] = [
        object_to_json(t, trim_nulls=False, entry_class='h-cite')
        for t in tags if source.object_type(t) == type]

  # latitude & longitude
  lat = long = None
  position = ISO_6709_RE.match(primary.get('position') or '')
  if position:
    lat, long = position.groups()
  if not lat:
    lat = primary.get('latitude')
  if not long:
    long = primary.get('longitude')

  if lat:
    ret['properties']['latitude'] = [str(lat)]
  if long:
    ret['properties']['longitude'] = [str(long)]

  if trim_nulls:
    ret = util.trim_nulls(ret)
  return ret

Example #32

0

Show file

File: webmention.py Project: iitians/bridgy-fed

    def _activitypub_targets(self):
        """
        Returns: list of (Response, string inbox URL)
        """
        # if there's in-reply-to, like-of, or repost-of, they're the targets.
        # otherwise, it's all followers' inboxes.
        targets = self._targets()

        if not targets:
            # interpret this as a Create or Update, deliver it to followers
            inboxes = []
            for follower in Follower.query().filter(
                    Follower.key > Key('Follower', self.source_domain + ' '),
                    Follower.key < Key(
                        'Follower', self.source_domain + chr(ord(' ') + 1))):
                if follower.status != 'inactive' and follower.last_follow:
                    actor = json_loads(follower.last_follow).get('actor')
                    if actor and isinstance(actor, dict):
                        inboxes.append(
                            actor.get('endpoints', {}).get('sharedInbox')
                            or actor.get('publicInbox') or actor.get('inbox'))
            return [(Response.get_or_create(source=self.source_url,
                                            target=inbox,
                                            direction='out',
                                            protocol='activitypub',
                                            source_mf2=json_dumps(
                                                self.source_mf2)), inbox)
                    for inbox in inboxes if inbox]

        resps_and_inbox_urls = []
        for target in targets:
            # fetch target page as AS2 object
            try:
                self.target_resp = common.get_as2(target)
            except (requests.HTTPError, exc.HTTPBadGateway) as e:
                self.target_resp = getattr(e, 'response', None)
                if self.target_resp and self.target_resp.status_code // 100 == 2:
                    content_type = common.content_type(self.target_resp) or ''
                    if content_type.startswith('text/html'):
                        # TODO: pass e.response to try_salmon()'s target_resp
                        continue  # give up
                raise
            target_url = self.target_resp.url or target

            resp = Response.get_or_create(source=self.source_url,
                                          target=target_url,
                                          direction='out',
                                          protocol='activitypub',
                                          source_mf2=json_dumps(
                                              self.source_mf2))

            # find target's inbox
            target_obj = self.target_resp.json()
            resp.target_as2 = json_dumps(target_obj)
            inbox_url = target_obj.get('inbox')

            if not inbox_url:
                # TODO: test actor/attributedTo and not, with/without inbox
                actor = (util.get_first(target_obj, 'actor')
                         or util.get_first(target_obj, 'attributedTo'))
                if isinstance(actor, dict):
                    inbox_url = actor.get('inbox')
                    actor = actor.get('url') or actor.get('id')
                if not inbox_url and not actor:
                    self.error(
                        'Target object has no actor or attributedTo with URL or id.'
                    )
                elif not isinstance(actor, str):
                    self.error(
                        'Target actor or attributedTo has unexpected url or id object: %r'
                        % actor)

            if not inbox_url:
                # fetch actor as AS object
                actor = common.get_as2(actor).json()
                inbox_url = actor.get('inbox')

            if not inbox_url:
                # TODO: probably need a way to save errors like this so that we can
                # return them if ostatus fails too.
                # self.error('Target actor has no inbox')
                continue

            inbox_url = urllib.parse.urljoin(target_url, inbox_url)
            resps_and_inbox_urls.append((resp, inbox_url))

        return resps_and_inbox_urls

Example #33

0

Show file

File: blog_webmention.py Project: snarfed/bridgy

  def dispatch_request(self, site):
    logger.info(f'Params: {list(request.values.items())}')
    # strip fragments from source and target url
    self.source_url = urllib.parse.urldefrag(request.form['source'])[0]
    self.target_url = urllib.parse.urldefrag(request.form['target'])[0]

    # follow target url through any redirects, strip utm_* query params
    resp = util.follow_redirects(self.target_url)
    redirected_target_urls = [r.url for r in resp.history]
    self.target_url = util.clean_url(resp.url)

    # parse and validate target URL
    domain = util.domain_from_link(self.target_url)
    if not domain:
      self.error(f'Could not parse target URL {self.target_url}')

    # look up source by domain
    source_cls = models.sources[site]
    domain = domain.lower()
    self.source = (source_cls.query()
                   .filter(source_cls.domains == domain)
                   .filter(source_cls.features == 'webmention')
                   .filter(source_cls.status == 'enabled')
                   .get())
    if not self.source:
      # check for a rel-canonical link. Blogger uses these when it serves a post
      # from multiple domains, e.g country TLDs like epeus.blogspot.co.uk vs
      # epeus.blogspot.com.
      # https://github.com/snarfed/bridgy/issues/805
      mf2 = self.fetch_mf2(self.target_url, require_mf2=False)
      if not mf2:
        # fetch_mf2() already wrote the error response
        return
      domains = util.dedupe_urls(
        util.domain_from_link(url)
        for url in mf2[1]['rels'].get('canonical', []))
      if domains:
        self.source = (source_cls.query()
                       .filter(source_cls.domains.IN(domains))
                       .filter(source_cls.features == 'webmention')
                       .filter(source_cls.status == 'enabled')
                       .get())

    if not self.source:
      self.error(
        f'Could not find {source_cls.GR_CLASS.NAME} account for {domain}. Is it registered with Bridgy?')

    # check that the target URL path is supported
    target_path = urllib.parse.urlparse(self.target_url).path
    if target_path in ('', '/'):
      msg = 'Home page webmentions are not currently supported.'
      logger.info(msg)
      return {'error': msg}, 202
    for pattern in self.source.PATH_BLOCKLIST:
      if pattern.match(target_path):
        msg = f'{self.source.GR_CLASS.NAME} webmentions are not supported for URL path: {target_path}'
        logger.info(msg)
        return {'error': msg}, 202

    # create BlogWebmention entity
    id = f'{self.source_url} {self.target_url}'
    self.entity = BlogWebmention.get_or_insert(
      id, source=self.source.key, redirected_target_urls=redirected_target_urls)
    if self.entity.status == 'complete':
      # TODO: response message saying update isn't supported
      return self.entity.published
    logger.debug(f'BlogWebmention entity: {self.entity.key.urlsafe().decode()}')

    # fetch source page
    fetched = self.fetch_mf2(self.source_url)
    if not fetched:
      return
    resp, mf2 = fetched

    item = self.find_mention_item(mf2.get('items', []))
    if not item:
      self.error(f'Could not find target URL {self.target_url} in source page {resp.url}', data=mf2, log_exception=False)

    # default author to target domain
    author_name = domain
    author_url = f'http://{domain}/'

    # extract author name and URL from h-card, if any
    props = item['properties']
    author = get_first(props, 'author')
    if author:
      if isinstance(author, str):
        author_name = author
      else:
        author_props = author.get('properties', {})
        author_name = get_first(author_props, 'name')
        author_url = get_first(author_props, 'url')

    # if present, u-url overrides source url
    u_url = get_first(props, 'url')
    if u_url:
      self.entity.u_url = u_url

    # generate content
    content = props['content'][0]  # find_mention_item() guaranteed this is here
    text = (content.get('html') or content.get('value')).strip()
    source_url = self.entity.source_url()
    text += f' <br /> <a href="{source_url}">via {util.domain_from_link(source_url)}</a>'

    # write comment
    try:
      self.entity.published = self.source.create_comment(
        self.target_url, author_name, author_url, text)
    except Exception as e:
      code, body = util.interpret_http_exception(e)
      msg = f'Error: {code}: {e}; {body}'
      if code == '401':
        logger.warning(f'Disabling source due to: {e}', exc_info=True)
        self.source.status = 'disabled'
        self.source.put()
        self.error(msg, status=code, report=self.source.is_beta_user())
      elif code == '404':
        # post is gone
        self.error(msg, status=code, report=False)
      elif util.is_connection_failure(e) or (code and int(code) // 100 == 5):
        self.error(msg, status=502, report=False)
      elif code or body:
        self.error(msg, status=code, report=True)
      else:
        raise

    # write results to datastore
    self.entity.status = 'complete'
    self.entity.put()

    return self.entity.published

Example #34

0

Show file

File: jsonfeed.py Project: snarfed/granary

 def image_url(obj):
   return util.get_first(obj, 'image', {}).get('url')

Example #35

0

Show file

File: twitter.py Project: qiweiyu/granary

  def _create(self, obj, preview=None, include_link=False, ignore_formatting=False):
    """Creates or previews creating a tweet, reply tweet, retweet, or favorite.

    https://dev.twitter.com/docs/api/1.1/post/statuses/update
    https://dev.twitter.com/docs/api/1.1/post/statuses/retweet/:id
    https://dev.twitter.com/docs/api/1.1/post/favorites/create

    Args:
      obj: ActivityStreams object
      preview: boolean
      include_link: boolean

    Returns:
      a CreationResult

      If preview is True, the content will be a unicode string HTML
      snippet. If False, it will be a dict with 'id' and 'url' keys
      for the newly created Twitter object.
    """
    assert preview in (False, True)
    type = obj.get('objectType')
    verb = obj.get('verb')

    base_obj = self.base_object(obj)
    base_id = base_obj.get('id')
    base_url = base_obj.get('url')

    is_reply = type == 'comment' or 'inReplyTo' in obj
    image_urls = [image.get('url') for image in util.get_list(obj, 'image')]
    video_url = util.get_first(obj, 'stream', {}).get('url')
    has_media = (image_urls or video_url) and (type in ('note', 'article') or is_reply)
    lat = obj.get('location', {}).get('latitude')
    lng = obj.get('location', {}).get('longitude')

    # prefer displayName over content for articles
    type = obj.get('objectType')
    base_url = self.base_object(obj).get('url')
    prefer_content = type == 'note' or (base_url and (type == 'comment'
                                                      or obj.get('inReplyTo')))
    content = self._content_for_create(obj, ignore_formatting=ignore_formatting,
                                       prefer_name=not prefer_content,
                                       strip_first_video_tag=bool(video_url))
    if not content:
      if type == 'activity':
        content = verb
      elif has_media:
        content = ''
      else:
        return source.creation_result(
          abort=False,  # keep looking for things to publish,
          error_plain='No content text found.',
          error_html='No content text found.')

    if is_reply and base_url:
      # extract username from in-reply-to URL so we can @-mention it, if it's
      # not already @-mentioned, since Twitter requires that to make our new
      # tweet a reply.
      # https://dev.twitter.com/docs/api/1.1/post/statuses/update#api-param-in_reply_to_status_id
      # TODO: this doesn't handle an in-reply-to username that's a prefix of
      # another username already mentioned, e.g. in reply to @foo when content
      # includes @foobar.
      parsed = urlparse.urlparse(base_url)
      parts = parsed.path.split('/')
      if len(parts) < 2 or not parts[1]:
        raise ValueError('Could not determine author of in-reply-to URL %s' % base_url)
      mention = '@' + parts[1]
      if mention.lower() not in content.lower():
        content = mention + ' ' + content

      # the embed URL in the preview can't start with mobile. or www., so just
      # hard-code it to twitter.com. index #1 is netloc.
      parsed = list(parsed)
      parsed[1] = self.DOMAIN
      base_url = urlparse.urlunparse(parsed)

    # need a base_url with the tweet id for the embed HTML below. do this
    # *after* checking the real base_url for in-reply-to author username.
    if base_id and not base_url:
      base_url = 'https://twitter.com/-/statuses/' + base_id

    if is_reply and not base_url:
      return source.creation_result(
        abort=True,
        error_plain='Could not find a tweet to reply to.',
        error_html='Could not find a tweet to <a href="http://indiewebcamp.com/reply">reply to</a>. '
        'Check that your post has an <a href="http://indiewebcamp.com/comment">in-reply-to</a> '
        'link a Twitter URL or to an original post that publishes a '
        '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.')

    # truncate and ellipsize content if it's over the character
    # count. URLs will be t.co-wrapped, so include that when counting.
    include_url = obj.get('url') if include_link else None
    content = self._truncate(content, include_url, has_media)

    # linkify defaults to Twitter's link shortening behavior
    preview_content = util.linkify(content, pretty=True, skip_bare_cc_tlds=True)

    if type == 'activity' and verb == 'like':
      if not base_url:
        return source.creation_result(
          abort=True,
          error_plain='Could not find a tweet to like.',
          error_html='Could not find a tweet to <a href="http://indiewebcamp.com/favorite">favorite</a>. '
          'Check that your post has a like-of link to a Twitter URL or to an original post that publishes a '
          '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.')

      if preview:
        return source.creation_result(
          description='<span class="verb">favorite</span> <a href="%s">'
                      'this tweet</a>:\n%s' % (base_url, self.embed_post(base_obj)))
      else:
        data = urllib.urlencode({'id': base_id})
        self.urlopen(API_POST_FAVORITE, data=data)
        resp = {'type': 'like'}

    elif type == 'activity' and verb == 'share':
      if not base_url:
        return source.creation_result(
          abort=True,
          error_plain='Could not find a tweet to retweet.',
          error_html='Could not find a tweet to <a href="http://indiewebcamp.com/repost">retweet</a>. '
          'Check that your post has a repost-of link to a Twitter URL or to an original post that publishes a '
          '<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.')

      if preview:
        return source.creation_result(
          description='<span class="verb">retweet</span> <a href="%s">'
                      'this tweet</a>:\n%s' % (base_url, self.embed_post(base_obj)))
      else:
        data = urllib.urlencode({'id': base_id})
        resp = self.urlopen(API_POST_RETWEET % base_id, data=data)
        resp['type'] = 'repost'

    elif type in ('note', 'article') or is_reply:  # a tweet
      content = unicode(content).encode('utf-8')
      data = {'status': content}

      if is_reply:
        description = \
          '<span class="verb">@-reply</span> to <a href="%s">this tweet</a>:\n%s' % (
            base_url, self.embed_post(base_obj))
        data['in_reply_to_status_id'] = base_id
      else:
        description = '<span class="verb">tweet</span>:'

      if video_url:
        preview_content += ('<br /><br /><video controls src="%s"><a href="%s">'
                            'this video</a></video>' % (video_url, video_url))
        if not preview:
          ret = self.upload_video(video_url)
          if isinstance(ret, source.CreationResult):
            return ret
          data['media_ids'] = ret

      elif image_urls:
        num_urls = len(image_urls)
        if num_urls > MAX_MEDIA:
          image_urls = image_urls[:MAX_MEDIA]
          logging.warning('Found %d photos! Only using the first %d: %r',
                          num_urls, MAX_MEDIA, image_urls)
        preview_content += '<br /><br />' + ' &nbsp; '.join(
          '<img src="%s" />' % url for url in image_urls)
        if not preview:
          data['media_ids'] = ','.join(self.upload_images(image_urls))

      if lat and lng:
        preview_content += (
          '<div>at <a href="https://maps.google.com/maps?q=%s,%s">'
          '%s, %s</a></div>' % (lat, lng, lat, lng))
        data['lat'] = lat
        data['long'] = lng

      if preview:
        return source.creation_result(content=preview_content, description=description)
      else:
        resp = self.urlopen(API_POST_TWEET, data=urllib.urlencode(data))
        resp['type'] = 'comment' if is_reply else 'post'

    elif (verb and verb.startswith('rsvp-')) or verb == 'invite':
      return source.creation_result(
        abort=True,
        error_plain='Cannot publish RSVPs to Twitter.',
        error_html='This looks like an <a href="http://indiewebcamp.com/rsvp">RSVP</a>. '
        'Publishing events or RSVPs to Twitter is not supported.')

    else:
      return source.creation_result(
        abort=False,
        error_plain='Cannot publish type=%s, verb=%s to Twitter' % (type, verb),
        error_html='Cannot publish type=%s, verb=%s to Twitter' % (type, verb))

    id_str = resp.get('id_str')
    if id_str:
      resp.update({'id': id_str, 'url': self.tweet_url(resp)})
    elif 'url' not in resp:
      resp['url'] = base_url

    return source.creation_result(resp)