コード例 #1
0
    def upload_media(self, media):
        """Uploads one or more images or videos from web URLs.

    https://docs.joinmastodon.org/api/rest/media/

    Args:
      media: sequence of AS image or stream objects, eg:
        [{'url': 'http://picture', 'displayName': 'a thing'}, ...]

    Returns: list of string media ids for uploaded files
    """
        uploaded = set()  # URLs uploaded so far; for de-duping
        ids = []

        for obj in media:
            url = util.get_url(obj, key='stream') or util.get_url(obj)
            if not url or url in uploaded:
                continue

            data = {}
            alt = obj.get('displayName')
            if alt:
                data['description'] = util.ellipsize(alt, chars=MAX_ALT_LENGTH)

            # TODO: mime type check?
            with util.requests_get(url, stream=True) as fetch:
                fetch.raise_for_status()
                upload = self._post(API_MEDIA, files={'file': fetch.raw})

            logging.info('Got: %s', upload)
            media_id = upload['id']
            ids.append(media_id)
            uploaded.add(url)

        return ids
コード例 #2
0
  def postprocess_object(self, obj):
    """Does source-independent post-processing of an object, in place.

    Right now just populates the displayName field.

    Args:
      object: object dict
    """
    verb = obj.get('verb')
    content = obj.get('content')
    rsvp_content = RSVP_CONTENTS.get(verb)

    if rsvp_content and not content:
      if verb.startswith('rsvp-'):
        content = obj['content'] = '<data class="p-rsvp" value="%s">%s</data>' % (
          verb.split('-')[1], rsvp_content)
      else:
        content = obj['content'] = rsvp_content

    if content and not obj.get('displayName'):
      actor_name = self.actor_name(obj.get('author') or obj.get('actor'))
      if verb in ('like', 'share'):
        obj['displayName'] = '%s %s' % (actor_name, content)
      elif rsvp_content:
        if verb == 'invite':
          actor_name = self.actor_name(obj.get('object'))
        obj['displayName'] = '%s %s' % (actor_name, rsvp_content)
      else:
        obj['displayName'] = util.ellipsize(content)

    return util.trim_nulls(obj)
コード例 #3
0
def activities_to_atom(activities,
                       actor,
                       title=None,
                       request_url=None,
                       host_url=None):
    """Converts ActivityStreams activites to an Atom feed.

  Args:
    activities: list of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed <title> element. Defaults to 'User feed for [NAME]'
    request_url: the URL of this Atom feed, if any. Used in a link rel="self".
    host_url: the home URL for this Atom feed, if any. Used in the top-level
      feed <id> element.

  Returns: unicode string with Atom XML
  """
    # Strip query params from URLs so that we don't include access tokens, etc
    host_url = (_remove_query_params(host_url) if host_url else
                'https://github.com/snarfed/activitystreams-unofficial')
    request_url = _remove_query_params(
        request_url) if request_url else host_url

    for a in activities:
        obj = a.get('object', {})
        # Render content as HTML
        content = obj.get('content')
        obj['rendered_content'] = microformats2.render_content(obj)

        # Make sure every activity has the title field, since Atom <entry> requires
        # the title element.
        if not a.get('title'):
            a['title'] = util.ellipsize(
                a.get('displayName') or a.get('content') or obj.get('title')
                or obj.get('displayName') or content or 'Untitled')

        # Normalize attachments.image to always be a list.
        for att in obj.get('attachments', []):
            image = att.get('image')
            if image and not isinstance(image, list):
                att['image'] = [image]

    return template.render(
        ATOM_TEMPLATE_FILE, {
            'items': activities,
            'host_url': host_url,
            'request_url': request_url,
            'title': title
            or 'User feed for ' + source.Source.actor_name(actor),
            'updated':
            activities[0]['object'].get('published') if activities else '',
            'actor': actor,
        })
コード例 #4
0
ファイル: microformats2.py プロジェクト: cacimatti/granary
def get_title(mf2):
    """Returns an mf2 object's title, ie its name.

  Args:
    mf2: dict, parsed mf2 object (ie return value from mf2py.parse())

  Returns: string title, possibly ellipsized
  """
    lines = mf2util.interpret_feed(mf2, '').get('name', '').splitlines()
    if lines:
        return util.ellipsize(lines[0])

    return ''
コード例 #5
0
ファイル: microformats2.py プロジェクト: snarfed/granary
def get_title(mf2):
  """Returns an mf2 object's title, ie its name.

  Args:
    mf2: dict, parsed mf2 object (ie return value from mf2py.parse())

  Returns: string title, possibly ellipsized
  """
  lines = mf2util.interpret_feed(mf2, '').get('name', '').splitlines()
  if lines:
    return util.ellipsize(lines[0])

  return ''
コード例 #6
0
def get_title(mf2):
    """Returns the author of a page as a ActivityStreams actor dict.

  Args:
    mf2: dict, parsed mf2 object (ie return value from mf2py.parse())

  Returns: string title, possibly ellipsized
  """
    lines = mf2util.interpret_feed(mf2, '').get('name', '').splitlines()
    if lines:
        return util.ellipsize(lines[0])

    return ''
コード例 #7
0
ファイル: atom.py プロジェクト: stan-alam/granary
def _prepare_activity(a, reader=True):
    """Preprocesses an activity to prepare it to be rendered as Atom.

  Modifies a in place.

  Args:
    a: ActivityStreams 1 activity dict
    reader: boolean, whether the output will be rendered in a feed reader.
      Currently just includes location if True, not otherwise.
  """
    act_type = source.object_type(a)
    if not act_type or act_type == 'post':
        primary = a.get('object', {})
    else:
        primary = a
    obj = a.setdefault('object', {})

    # Render content as HTML; escape &s
    obj['rendered_content'] = _encode_ampersands(
        microformats2.render_content(primary,
                                     include_location=reader,
                                     render_attachments=True))

    # Make sure every activity has the title field, since Atom <entry> requires
    # the title element.
    if not a.get('title'):
        a['title'] = util.ellipsize(
            _encode_ampersands(
                a.get('displayName') or a.get('content') or obj.get('title')
                or obj.get('displayName') or obj.get('content') or 'Untitled'))

    # strip HTML tags. the Atom spec says title is plain text:
    # http://atomenabled.org/developers/syndication/#requiredEntryElements
    a['title'] = xml.sax.saxutils.escape(
        BeautifulSoup(a['title']).get_text(''))

    children = []
    image_urls_seen = set()
    image_atts = []

    # normalize attachments, render attached notes/articles
    attachments = a.get('attachments') or obj.get('attachments') or []
    for att in attachments:
        att['stream'] = util.get_first(att, 'stream')
        type = att.get('objectType')

        if type == 'image':
            image_atts.append(util.get_first(att, 'image'))
            continue

        image_urls_seen |= set(util.get_urls(att, 'image'))
        if type in ('note', 'article'):
            html = microformats2.render_content(att,
                                                include_location=reader,
                                                render_attachments=True)
            author = att.get('author')
            if author:
                name = microformats2.maybe_linked_name(
                    microformats2.object_to_json(author).get('properties', []))
                html = '%s: %s' % (name.strip(), html)
            children.append(html)

    # render image(s) that we haven't already seen
    for image in image_atts + util.get_list(obj, 'image'):
        if not image:
            continue
        url = image.get('url')
        parsed = urllib.parse.urlparse(url)
        rest = urllib.parse.urlunparse(('', '') + parsed[2:])
        img_src_re = re.compile(
            r"""src *= *['"] *((https?:)?//%s)?%s *['"]""" %
            (re.escape(parsed.netloc), re.escape(rest)))
        if (url and url not in image_urls_seen
                and not img_src_re.search(obj['rendered_content'])):
            children.append(microformats2.img(url))
            image_urls_seen.add(url)

    obj['rendered_children'] = [
        _encode_ampersands(child) for child in children
    ]

    # make sure published and updated are strict RFC 3339 timestamps
    for prop in 'published', 'updated':
        val = obj.get(prop)
        if val:
            obj[prop] = util.maybe_iso8601_to_rfc3339(val)
            # Atom timestamps are even stricter than RFC 3339: they can't be naive ie
            # time zone unaware. They must have either an offset or the Z suffix.
            # https://www.feedvalidator.org/docs/error/InvalidRFC3339Date.html
            if not util.TIMEZONE_OFFSET_RE.search(obj[prop]):
                obj[prop] += 'Z'
コード例 #8
0
def activities_to_atom(activities, actor, title=None, request_url=None,
                       host_url=None, xml_base=None, rels=None, reader=True):
  """Converts ActivityStreams activites to an Atom feed.

  Args:
    activities: list of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed <title> element. Defaults to 'User feed for [NAME]'
    request_url: the URL of this Atom feed, if any. Used in a link rel="self".
    host_url: the home URL for this Atom feed, if any. Used in the top-level
      feed <id> element.
    xml_base: the base URL, if any. Used in the top-level xml:base attribute.
    rels: rel links to include. dict mapping string rel value to string URL.
    reader: boolean, whether the output will be rendered in a feed reader.
      Currently just includes location if True, not otherwise.

  Returns:
    unicode string with Atom XML
  """
  # Strip query params from URLs so that we don't include access tokens, etc
  host_url = (_remove_query_params(host_url) if host_url
              else 'https://github.com/snarfed/granary')
  if request_url is None:
    request_url = host_url

  for a in activities:
    act_type = source.object_type(a)
    if not act_type or act_type == 'post':
      primary = a.get('object', {})
    else:
      primary = a
    obj = a.setdefault('object', {})

    # Render content as HTML; escape &s
    obj['rendered_content'] = _encode_ampersands(microformats2.render_content(
      primary, include_location=reader))

    # Make sure every activity has the title field, since Atom <entry> requires
    # the title element.
    if not a.get('title'):
      a['title'] = util.ellipsize(_encode_ampersands(
        a.get('displayName') or a.get('content') or obj.get('title') or
        obj.get('displayName') or obj.get('content') or 'Untitled'))

    # strip HTML tags. the Atom spec says title is plain text:
    # http://atomenabled.org/developers/syndication/#requiredEntryElements
    a['title'] = xml.sax.saxutils.escape(BeautifulSoup(a['title']).get_text(''))

    # Normalize attachments.image to always be a list.
    attachments = a.get('attachments') or obj.get('attachments') or []
    for att in attachments:
      att['image'] = util.get_list(att, 'image')

    obj['rendered_children'] = []
    for att in attachments:
      if att.get('objectType') in ('note', 'article'):
        html = microformats2.render_content(att, include_location=reader)
        author = att.get('author')
        if author:
          name = microformats2.maybe_linked_name(
            microformats2.object_to_json(author).get('properties', []))
          html = '%s: %s' % (name.strip(), html)
        obj['rendered_children'].append(_encode_ampersands(html))

  # Emulate Django template behavior that returns a special default value that
  # can continue to be referenced when an attribute or item lookup fails. Helps
  # avoid conditionals in the template itself.
  # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables
  class Defaulter(collections.defaultdict):
    def __init__(self, **kwargs):
      super(Defaulter, self).__init__(Defaulter, **{
        k: (Defaulter(**v) if isinstance(v, dict) else v)
        for k, v in kwargs.items()})

    def __unicode__(self):
      return super(Defaulter, self).__unicode__() if self else u''

  env = jinja2.Environment(loader=jinja2.PackageLoader(__package__, 'templates'),
                           autoescape=True)
  if actor is None:
    actor = {}
  return env.get_template(ATOM_TEMPLATE_FILE).render(
    items=[Defaulter(**a) for a in activities],
    host_url=host_url,
    request_url=request_url,
    xml_base=xml_base,
    title=title or 'User feed for ' + source.Source.actor_name(actor),
    updated=activities[0]['object'].get('published', '') if activities else '',
    actor=Defaulter(**actor),
    rels=rels or {},
    )
コード例 #9
0
  def _create(self, obj, preview=None, include_link=source.OMIT_LINK,
              ignore_formatting=False):
    """Creates a new issue or comment.

    When creating a new issue, if the authenticated user is a collaborator on
    the repo, tags that match existing labels are converted to those labels and
    included.

    https://developer.github.com/v4/guides/forming-calls/#about-mutations
    https://developer.github.com/v4/mutation/addcomment/
    https://developer.github.com/v4/mutation/addreaction/
    https://developer.github.com/v3/issues/#create-an-issue

    Args:
      obj: ActivityStreams object
      preview: boolean
      include_link: string
      ignore_formatting: boolean

    Returns:
      a CreationResult

      If preview is True, the contents will be a unicode string HTML
      snippet. If False, it will be a dict with 'id' and 'url' keys
      for the newly created GitHub object.
    """
    assert preview in (False, True)

    type = source.object_type(obj)
    if type and type not in ('issue', 'comment', 'activity', 'note', 'article',
                             'like', 'tag'):
      return source.creation_result(
        abort=False, error_plain='Cannot publish %s to GitHub' % type)

    base_obj = self.base_object(obj)
    base_url = base_obj.get('url')
    if not base_url:
      return source.creation_result(
        abort=True,
        error_plain='You need an in-reply-to GitHub repo, issue, PR, or comment URL.')

    content = orig_content = html.escape(
      self._content_for_create(obj, ignore_formatting=ignore_formatting),
      quote=False)
    url = obj.get('url')
    if include_link == source.INCLUDE_LINK and url:
      content += '\n\n(Originally published at: %s)' % url

    parsed = urllib.parse.urlparse(base_url)
    path = parsed.path.strip('/').split('/')
    owner, repo = path[:2]
    if len(path) == 4:
      number = path[3]

    comment_id = re.match(r'^issuecomment-([0-9]+)$', parsed.fragment)
    if comment_id:
      comment_id = comment_id.group(1)
    elif parsed.fragment:
      return source.creation_result(
        abort=True,
        error_plain='Please remove the fragment #%s from your in-reply-to URL.' %
          parsed.fragment)

    if type == 'comment':  # comment or reaction
      if not (len(path) == 4 and path[2] in ('issues', 'pull')):
        return source.creation_result(
          abort=True, error_plain='GitHub comment requires in-reply-to issue or PR URL.')

      is_reaction = orig_content in REACTIONS_GRAPHQL
      if preview:
        if comment_id:
          comment = self.rest(REST_API_COMMENT % (owner, repo, comment_id)).json()
          target_link = '<a href="%s">a comment on %s/%s#%s, <em>%s</em></a>' % (
            base_url, owner, repo, number, util.ellipsize(comment['body']))
        else:
          resp = self.graphql(GRAPHQL_ISSUE_OR_PR, locals())
          issue = (resp.get('repository') or {}).get('issueOrPullRequest')
          target_link = '<a href="%s">%s/%s#%s%s</a>' % (
            base_url, owner, repo, number,
            (', <em>%s</em>' % issue['title']) if issue else '')

        if is_reaction:
          preview_content = None
          desc = u'<span class="verb">react %s</span> to %s.' % (
            orig_content, target_link)
        else:
          preview_content = self.render_markdown(content, owner, repo)
          desc = '<span class="verb">comment</span> on %s:' % target_link
        return source.creation_result(content=preview_content, description=desc)

      else:  # create
        # we originally used the GraphQL API to create issue comments and
        # reactions, but it often gets rejected against org repos due to access
        # controls. oddly, the REST API works fine in those same cases.
        # https://github.com/snarfed/bridgy/issues/824
        if is_reaction:
          if comment_id:
            api_url = REST_API_COMMENT_REACTIONS % (owner, repo, comment_id)
            reacted = self.rest(api_url, data={
              'content': REACTIONS_REST.get(orig_content),
            }).json()
            url = base_url
          else:
            api_url = REST_API_REACTIONS % (owner, repo, number)
            reacted = self.rest(api_url, data={
              'content': REACTIONS_REST.get(orig_content),
            }).json()
            url = '%s#%s-by-%s' % (base_url, reacted['content'].lower(),
                                   reacted['user']['login'])

          return source.creation_result({
            'id': reacted.get('id'),
            'url': url,
            'type': 'react',
          })

        else:
          try:
            api_url = REST_API_COMMENTS % (owner, repo, number)
            commented = self.rest(api_url, data={'body': content}).json()
            return source.creation_result({
              'id': commented.get('id'),
              'url': commented.get('html_url'),
              'type': 'comment',
            })
          except ValueError as e:
            return source.creation_result(abort=True, error_plain=str(e))

    elif type == 'like':  # star
      if not (len(path) == 2 or (len(path) == 3 and path[2] == 'issues')):
        return source.creation_result(
          abort=True, error_plain='GitHub like requires in-reply-to repo URL.')

      if preview:
        return source.creation_result(
          description='<span class="verb">star</span> <a href="%s">%s/%s</a>.' %
            (base_url, owner, repo))
      else:
        issue = self.graphql(GRAPHQL_REPO, locals())
        resp = self.graphql(GRAPHQL_ADD_STAR, {
          'starrable_id': issue['repository']['id'],
        })
        return source.creation_result({
          'url': base_url + '/stargazers',
        })

    elif type == 'tag':  # add label
      if not (len(path) == 4 and path[2] in ('issues', 'pull')):
        return source.creation_result(
          abort=True, error_plain='GitHub tag post requires tag-of issue or PR URL.')

      tags = set(util.trim_nulls(t.get('displayName', '').strip()
                                 for t in util.get_list(obj, 'object')))
      if not tags:
        return source.creation_result(
          abort=True, error_plain='No tags found in tag post!')

      existing_labels = self.existing_labels(owner, repo)
      labels = sorted(tags & existing_labels)
      issue_link = '<a href="%s">%s/%s#%s</a>' % (base_url, owner, repo, number)
      if not labels:
        return source.creation_result(
          abort=True,
          error_html="No tags in [%s] matched %s's existing labels [%s]." %
            (', '.join(sorted(tags)), issue_link, ', '.join(sorted(existing_labels))))

      if preview:
        return source.creation_result(
          description='add label%s <span class="verb">%s</span> to %s.' % (
            ('s' if len(labels) > 1 else ''), ', '.join(labels), issue_link))
      else:
        resp = self.rest(REST_API_ISSUE_LABELS % (owner, repo, number), labels).json()
        return source.creation_result({
          'url': base_url,
          'type': 'tag',
          'tags': labels,
        })

    else:  # new issue
      if not (len(path) == 2 or (len(path) == 3 and path[2] == 'issues')):
        return source.creation_result(
          abort=True, error_plain='New GitHub issue requires in-reply-to repo URL')

      title = util.ellipsize(obj.get('displayName') or obj.get('title') or
                             orig_content)
      tags = set(util.trim_nulls(t.get('displayName', '').strip()
                                 for t in util.get_list(obj, 'tags')))
      labels = sorted(tags & self.existing_labels(owner, repo))

      if preview:
        preview_content = '<b>%s</b><hr>%s' % (
          title, self.render_markdown(content, owner, repo))
        preview_labels = ''
        if labels:
          preview_labels = ' and attempt to add label%s <span class="verb">%s</span>' % (
            's' if len(labels) > 1 else '', ', '.join(labels))
        return source.creation_result(content=preview_content, description="""\
<span class="verb">create a new issue</span> on <a href="%s">%s/%s</a>%s:""" %
            (base_url, owner, repo, preview_labels))
      else:
        resp = self.rest(REST_API_CREATE_ISSUE % (owner, repo), {
          'title': title,
          'body': content,
          'labels': labels,
        }).json()
        resp['url'] = resp.pop('html_url')
        return source.creation_result(resp)

    return source.creation_result(
      abort=False,
      error_plain="%s doesn't look like a GitHub repo, issue, or PR URL." % base_url)
コード例 #10
0
ファイル: atom.py プロジェクト: latuji/granary
def activities_to_atom(activities,
                       actor,
                       title=None,
                       request_url=None,
                       host_url=None):
    """Converts ActivityStreams activites to an Atom feed.

  Args:
    activities: list of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed <title> element. Defaults to 'User feed for [NAME]'
    request_url: the URL of this Atom feed, if any. Used in a link rel="self".
    host_url: the home URL for this Atom feed, if any. Used in the top-level
      feed <id> element.

  Returns: unicode string with Atom XML
  """
    # Strip query params from URLs so that we don't include access tokens, etc
    host_url = (_remove_query_params(host_url)
                if host_url else 'https://github.com/snarfed/granary')
    request_url = _remove_query_params(
        request_url) if request_url else host_url

    for a in activities:
        obj = a.get('object', {})
        # Render content as HTML; escape &s
        content = obj.get('content')
        obj['rendered_content'] = _encode_ampersands(
            microformats2.render_content(obj))

        # Make sure every activity has the title field, since Atom <entry> requires
        # the title element.
        if not a.get('title'):
            a['title'] = util.ellipsize(
                _encode_ampersands(
                    a.get('displayName') or a.get('content')
                    or obj.get('title') or obj.get('displayName') or content
                    or 'Untitled'))

        # Normalize attachments.image to always be a list.
        for att in obj.get('attachments', []):
            image = att.get('image')
            if image and not isinstance(image, list):
                att['image'] = [image]

    # Emulate Django template behavior that returns a special default value that
    # can continue to be referenced when an attribute or item lookup fails. Helps
    # avoid conditionals in the template itself.
    # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables
    class Defaulter(collections.defaultdict):
        def __init__(self, **kwargs):
            super(Defaulter, self).__init__(
                Defaulter, **{
                    k: (Defaulter(**v) if isinstance(v, dict) else v)
                    for k, v in kwargs.items()
                })

        def __unicode__(self):
            return super(Defaulter, self).__unicode__() if self else u''

    env = jinja2.Environment(loader=jinja2.PackageLoader(
        __package__, 'templates'),
                             autoescape=True)
    if actor is None:
        actor = {}
    return env.get_template(ATOM_TEMPLATE_FILE).render(
        items=[Defaulter(**a) for a in activities],
        host_url=host_url,
        request_url=request_url,
        title=title or 'User feed for ' + source.Source.actor_name(actor),
        updated=activities[0]['object'].get('published', '')
        if activities else '',
        actor=Defaulter(**actor),
    )
コード例 #11
0
ファイル: rss.py プロジェクト: whyouare111/granary
def from_activities(activities,
                    actor=None,
                    title=None,
                    feed_url=None,
                    home_page_url=None,
                    hfeed=None):
    """Converts ActivityStreams activities to an RSS 2.0 feed.

  Args:
    activities: sequence of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed title
    feed_url: string, the URL for this RSS feed
    home_page_url: string, the home page URL
    hfeed: dict, parsed mf2 h-feed, if available

  Returns:
    unicode string with RSS 2.0 XML
  """
    try:
        iter(activities)
    except TypeError:
        raise TypeError('activities must be iterable')

    if isinstance(activities, (dict, str)):
        raise TypeError('activities may not be a dict or string')

    fg = FeedGenerator()
    fg.id(feed_url)
    assert feed_url
    fg.link(href=feed_url, rel='self')
    if home_page_url:
        fg.link(href=home_page_url, rel='alternate')
    # TODO: parse language from lang attribute:
    # https://github.com/microformats/mf2py/issues/150
    fg.language('en')
    fg.generator('granary', uri='https://granary.io/')

    hfeed = hfeed or {}
    actor = actor or {}
    image = (util.get_url(hfeed.get('properties', {}), 'photo')
             or util.get_url(actor, 'image'))
    if image:
        fg.image(image)

    props = hfeed.get('properties') or {}
    content = microformats2.get_text(util.get_first(props, 'content', ''))
    summary = util.get_first(props, 'summary', '')
    desc = content or summary or '-'
    fg.description(desc)  # required
    fg.title(title or util.ellipsize(desc))  # required

    latest = None
    feed_has_enclosure = False
    for activity in activities:
        obj = activity.get('object') or activity
        if obj.get('objectType') == 'person':
            continue

        item = fg.add_entry()
        url = obj.get('url')
        id = obj.get('id') or url
        item.id(id)
        item.link(href=url)
        item.guid(url, permalink=True)

        # title (required)
        title = (obj.get('title') or obj.get('displayName')
                 or util.ellipsize(obj.get('content', '-')))
        # strip HTML tags
        title = util.parse_html(title).get_text('').strip()
        item.title(title)

        content = microformats2.render_content(obj,
                                               include_location=True,
                                               render_attachments=True,
                                               render_image=True)
        if not content:
            content = obj.get('summary')
        if content:
            item.content(content, type='CDATA')

        categories = [
            {
                'term': t['displayName']
            } for t in obj.get('tags', [])
            if t.get('displayName') and t.get('verb') not in ('like', 'react',
                                                              'share')
            and t.get('objectType') not in ('article', 'person', 'mention')
        ]
        item.category(categories)

        author = obj.get('author', {})
        author = {
            'name': author.get('displayName') or author.get('username'),
            'uri': author.get('url'),
            'email': author.get('email') or '-',
        }
        item.author(author)

        published = obj.get('published') or obj.get('updated')
        if published and isinstance(published, str):
            try:
                dt = mf2util.parse_datetime(published)
                if not isinstance(dt, datetime):
                    dt = datetime.combine(dt, time.min)
                if not dt.tzinfo:
                    dt = dt.replace(tzinfo=util.UTC)
                item.published(dt)
                if not latest or dt > latest:
                    latest = dt
            except ValueError:  # bad datetime string
                pass

        item_has_enclosure = False
        for att in obj.get('attachments', []):
            stream = util.get_first(att, 'stream') or att
            if not stream:
                continue

            url = stream.get('url') or ''
            mime = mimetypes.guess_type(url)[0] or ''
            if (att.get('objectType') in ENCLOSURE_TYPES
                    or mime and mime.split('/')[0] in ENCLOSURE_TYPES):
                if item_has_enclosure:
                    logging.info(
                        'Warning: item %s already has an RSS enclosure, skipping additional enclosure %s',
                        id, url)
                    continue

                item_has_enclosure = feed_has_enclosure = True
                item.enclosure(url=url,
                               type=mime,
                               length=str(stream.get('size', '')))
                item.load_extension('podcast')
                duration = stream.get('duration')
                if duration:
                    item.podcast.itunes_duration(duration)

    if feed_has_enclosure:
        fg.load_extension('podcast')
        fg.podcast.itunes_author(
            actor.get('displayName') or actor.get('username'))
        if summary:
            fg.podcast.itunes_summary(summary)
        fg.podcast.itunes_explicit('no')
        fg.podcast.itunes_block(False)
        name = author.get('name')
        if name:
            fg.podcast.itunes_author(name)
        if image:
            fg.podcast.itunes_image(image)
        fg.podcast.itunes_category(categories)

    if latest:
        fg.lastBuildDate(latest)

    return fg.rss_str(pretty=True).decode('utf-8')
コード例 #12
0
ファイル: atom.py プロジェクト: snarfed/granary
def _prepare_activity(a, reader=True):
  """Preprocesses an activity to prepare it to be rendered as Atom.

  Modifies a in place.

  Args:
    a: ActivityStreams 1 activity dict
    reader: boolean, whether the output will be rendered in a feed reader.
      Currently just includes location if True, not otherwise.
  """
  act_type = source.object_type(a)
  obj = util.get_first(a, 'object', default={})
  primary = obj if (not act_type or act_type == 'post') else a

  # Render content as HTML; escape &s
  obj['rendered_content'] = _encode_ampersands(microformats2.render_content(
    primary, include_location=reader, render_attachments=True))

  # Make sure every activity has the title field, since Atom <entry> requires
  # the title element.
  if not a.get('title'):
    a['title'] = util.ellipsize(_encode_ampersands(
      a.get('displayName') or a.get('content') or obj.get('title') or
      obj.get('displayName') or obj.get('content') or 'Untitled'))

  # strip HTML tags. the Atom spec says title is plain text:
  # http://atomenabled.org/developers/syndication/#requiredEntryElements
  a['title'] = xml.sax.saxutils.escape(BeautifulSoup(a['title']).get_text(''))

  children = []
  image_urls_seen = set()
  image_atts = []

  # normalize actor images
  for elem in a, obj:
    actor = elem.get('actor')
    if actor:
      actor['image'] = util.get_first(actor, 'image')

  # normalize attachments, render attached notes/articles
  attachments = a.get('attachments') or obj.get('attachments') or []
  for att in attachments:
    att['stream'] = util.get_first(att, 'stream')
    type = att.get('objectType')

    if type == 'image':
      att['image'] = util.get_first(att, 'image')
      image_atts.append(att['image'])
      continue

    image_urls_seen |= set(util.get_urls(att, 'image'))
    if type in ('note', 'article'):
      html = microformats2.render_content(att, include_location=reader,
                                          render_attachments=True)
      author = att.get('author')
      if author:
        name = microformats2.maybe_linked_name(
          microformats2.object_to_json(author).get('properties') or {})
        html = '%s: %s' % (name.strip(), html)
      children.append(html)

  # render image(s) that we haven't already seen
  for image in image_atts + util.get_list(obj, 'image'):
    if not image:
      continue
    url = image.get('url')
    parsed = urllib.parse.urlparse(url)
    rest = urllib.parse.urlunparse(('', '') + parsed[2:])
    img_src_re = re.compile(r"""src *= *['"] *((https?:)?//%s)?%s *['"]""" %
                            (re.escape(parsed.netloc), re.escape(rest)))
    if (url and url not in image_urls_seen and
        not img_src_re.search(obj['rendered_content'])):
      children.append(microformats2.img(url))
      image_urls_seen.add(url)

  obj['rendered_children'] = [_encode_ampersands(child) for child in children]

  # make sure published and updated are strict RFC 3339 timestamps
  for prop in 'published', 'updated':
    val = obj.get(prop)
    if val:
      obj[prop] = util.maybe_iso8601_to_rfc3339(val)
      # Atom timestamps are even stricter than RFC 3339: they can't be naive ie
      # time zone unaware. They must have either an offset or the Z suffix.
      # https://www.feedvalidator.org/docs/error/InvalidRFC3339Date.html
      if not util.TIMEZONE_OFFSET_RE.search(obj[prop]):
        obj[prop] += 'Z'
コード例 #13
0
ファイル: atom.py プロジェクト: harixxy/granary
def activities_to_atom(activities, actor, title=None, request_url=None,
                       host_url=None):
  """Converts ActivityStreams activites to an Atom feed.

  Args:
    activities: list of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed <title> element. Defaults to 'User feed for [NAME]'
    request_url: the URL of this Atom feed, if any. Used in a link rel="self".
    host_url: the home URL for this Atom feed, if any. Used in the top-level
      feed <id> element.

  Returns: unicode string with Atom XML
  """
  # Strip query params from URLs so that we don't include access tokens, etc
  host_url = (_remove_query_params(host_url) if host_url
              else 'https://github.com/snarfed/granary')
  request_url = _remove_query_params(request_url) if request_url else host_url

  for a in activities:
    obj = a.get('object', {})
    # Render content as HTML; escape &s
    content = obj.get('content')
    obj['rendered_content'] = _encode_ampersands(microformats2.render_content(obj))

    # Make sure every activity has the title field, since Atom <entry> requires
    # the title element.
    if not a.get('title'):
      a['title'] = util.ellipsize(_encode_ampersands(
        a.get('displayName') or a.get('content') or obj.get('title') or
        obj.get('displayName') or content or 'Untitled'))

    # Normalize attachments.image to always be a list.
    for att in obj.get('attachments', []):
      image = att.get('image')
      if image and not isinstance(image, list):
        att['image'] = [image]

  # Emulate Django template behavior that returns a special default value that
  # can continue to be referenced when an attribute or item lookup fails. Helps
  # avoid conditionals in the template itself.
  # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables
  class Defaulter(collections.defaultdict):
    def __init__(self, **kwargs):
      super(Defaulter, self).__init__(Defaulter, **{
        k: (Defaulter(**v) if isinstance(v, dict) else v)
        for k, v in kwargs.items()})

    def __unicode__(self):
      return super(Defaulter, self).__unicode__() if self else u''

  env = jinja2.Environment(loader=jinja2.PackageLoader(__package__, 'templates'),
                           autoescape=True)
  if actor is None:
    actor = {}
  return env.get_template(ATOM_TEMPLATE_FILE).render(
    items=[Defaulter(**a) for a in activities],
    host_url=host_url,
    request_url=request_url,
    title=title or 'User feed for ' + source.Source.actor_name(actor),
    updated=activities[0]['object'].get('published', '') if activities else '',
    actor=Defaulter(**actor),
    )
コード例 #14
0
ファイル: atom.py プロジェクト: woakes070048/granary
def activities_to_atom(activities, actor, title=None, request_url=None, host_url=None):
    """Converts ActivityStreams activites to an Atom feed.

  Args:
    activities: list of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed <title> element. Defaults to 'User feed for [NAME]'
    request_url: the URL of this Atom feed, if any. Used in a link rel="self".
    host_url: the home URL for this Atom feed, if any. Used in the top-level
      feed <id> element.

  Returns: unicode string with Atom XML
  """
    # Strip query params from URLs so that we don't include access tokens, etc
    host_url = _remove_query_params(host_url) if host_url else "https://github.com/snarfed/granary"
    request_url = _remove_query_params(request_url) if request_url else host_url

    for a in activities:
        act_type = source.object_type(a)
        if not act_type or act_type == "post":
            primary = a.get("object", {})
        else:
            primary = a
        obj = a.setdefault("object", {})
        # Render content as HTML; escape &s
        rendered = []

        rendered.append(microformats2.render_content(primary))
        obj["rendered_content"] = _encode_ampersands("\n".join(rendered))

        # Make sure every activity has the title field, since Atom <entry> requires
        # the title element.
        if not a.get("title"):
            a["title"] = util.ellipsize(
                _encode_ampersands(
                    a.get("displayName")
                    or a.get("content")
                    or obj.get("title")
                    or obj.get("displayName")
                    or obj.get("content")
                    or "Untitled"
                )
            )

        # strip HTML tags. the Atom spec says title is plain text:
        # http://atomenabled.org/developers/syndication/#requiredEntryElements
        a["title"] = xml.sax.saxutils.escape(BeautifulSoup(a["title"], "html.parser").get_text(""))

        # Normalize attachments.image to always be a list.
        for att in primary.get("attachments", []):
            image = att.get("image")
            if image and not isinstance(image, list):
                att["image"] = [image]

    # Emulate Django template behavior that returns a special default value that
    # can continue to be referenced when an attribute or item lookup fails. Helps
    # avoid conditionals in the template itself.
    # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables
    class Defaulter(collections.defaultdict):
        def __init__(self, **kwargs):
            super(Defaulter, self).__init__(
                Defaulter, **{k: (Defaulter(**v) if isinstance(v, dict) else v) for k, v in kwargs.items()}
            )

        def __unicode__(self):
            return super(Defaulter, self).__unicode__() if self else u""

    env = jinja2.Environment(loader=jinja2.PackageLoader(__package__, "templates"), autoescape=True)
    if actor is None:
        actor = {}
    return env.get_template(ATOM_TEMPLATE_FILE).render(
        items=[Defaulter(**a) for a in activities],
        host_url=host_url,
        request_url=request_url,
        title=title or "User feed for " + source.Source.actor_name(actor),
        updated=activities[0]["object"].get("published", "") if activities else "",
        actor=Defaulter(**actor),
    )
コード例 #15
0
ファイル: rss.py プロジェクト: snarfed/granary
def from_activities(activities, actor=None, title=None, feed_url=None,
                    home_page_url=None, hfeed=None):
  """Converts ActivityStreams activities to an RSS 2.0 feed.

  Args:
    activities: sequence of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed title
    feed_url: string, the URL for this RSS feed
    home_page_url: string, the home page URL
    hfeed: dict, parsed mf2 h-feed, if available

  Returns:
    unicode string with RSS 2.0 XML
  """
  try:
    iter(activities)
  except TypeError:
    raise TypeError('activities must be iterable')

  if isinstance(activities, (dict, basestring)):
    raise TypeError('activities may not be a dict or string')

  fg = FeedGenerator()
  fg.id(feed_url)
  assert feed_url
  fg.link(href=feed_url, rel='self')
  if home_page_url:
    fg.link(href=home_page_url, rel='alternate')
  # TODO: parse language from lang attribute:
  # https://github.com/microformats/mf2py/issues/150
  fg.language('en')
  fg.generator('granary', uri='https://granary.io/')

  hfeed = hfeed or {}
  actor = actor or {}
  image = util.get_url(hfeed, 'image') or util.get_url(actor, 'image')
  if image:
    fg.image(image)

  props = hfeed.get('properties') or {}
  content = microformats2.get_text(util.get_first(props, 'content', ''))
  summary = util.get_first(props, 'summary', '')
  desc = content or summary or '-'
  fg.description(desc)  # required
  fg.title(title or util.ellipsize(desc))  # required

  latest = None
  enclosures = False
  for activity in activities:
    obj = activity.get('object') or activity
    if obj.get('objectType') == 'person':
      continue

    item = fg.add_entry()
    url = obj.get('url')
    item.id(obj.get('id') or url)
    item.link(href=url)
    item.guid(url, permalink=True)

    item.title(obj.get('title') or obj.get('displayName') or '-')  # required
    content = microformats2.render_content(
      obj, include_location=True, render_attachments=False) or obj.get('summary')
    if content:
      item.content(content, type='CDATA')

    item.category(
      [{'term': t['displayName']} for t in obj.get('tags', [])
       if t.get('displayName') and t.get('verb') not in ('like', 'react', 'share')])

    author = obj.get('author', {})
    item.author({
      'name': author.get('displayName') or author.get('username'),
      'uri': author.get('url'),
    })

    published = obj.get('published') or obj.get('updated')
    if published:
      try:
        dt = mf2util.parse_datetime(published)
        if not isinstance(dt, datetime):
          dt = datetime.combine(dt, time.min)
        if not dt.tzinfo:
          dt = dt.replace(tzinfo=util.UTC)
        item.published(dt)
        if not latest or dt > latest:
          latest = dt
      except ValueError:  # bad datetime string
        pass


    for att in obj.get('attachments', []):
      stream = util.get_first(att, 'stream') or att
      if not stream:
        continue

      url = stream.get('url') or ''
      mime = mimetypes.guess_type(url)[0] or ''
      if (att.get('objectType') in ENCLOSURE_TYPES or
          mime and mime.split('/')[0] in ENCLOSURE_TYPES):
        enclosures = True
        item.enclosure(url=url, type=mime, length='REMOVEME') # TODO: length (bytes)

        item.load_extension('podcast')
        duration = stream.get('duration')
        if duration:
          item.podcast.itunes_duration(duration)

  if enclosures:
    fg.load_extension('podcast')
    fg.podcast.itunes_author(actor.get('displayName') or actor.get('username'))
    if summary:
      fg.podcast.itunes_summary(summary)
    fg.podcast.itunes_explicit('no')
    fg.podcast.itunes_block(False)

  if latest:
    fg.lastBuildDate(latest)

  return fg.rss_str(pretty=True).decode('utf-8').replace(' length="REMOVEME"', '')
コード例 #16
0
def activities_to_atom(activities, actor, title=None, request_url=None,
                       host_url=None, xml_base=None, rels=None):
  """Converts ActivityStreams activites to an Atom feed.

  Args:
    activities: list of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed <title> element. Defaults to 'User feed for [NAME]'
    request_url: the URL of this Atom feed, if any. Used in a link rel="self".
    host_url: the home URL for this Atom feed, if any. Used in the top-level
      feed <id> element.
    xml_base: the base URL, if any. Used in the top-level xml:base attribute.
    rels: rel links to include. dict mapping string rel value to string URL.

  Returns:
    unicode string with Atom XML
  """
  # Strip query params from URLs so that we don't include access tokens, etc
  host_url = (_remove_query_params(host_url) if host_url
              else 'https://github.com/snarfed/granary')
  if request_url is None:
    request_url = host_url

  for a in activities:
    act_type = source.object_type(a)
    if not act_type or act_type == 'post':
      primary = a.get('object', {})
    else:
      primary = a
    obj = a.setdefault('object', {})

    # Render content as HTML; escape &s
    obj['rendered_content'] = _encode_ampersands(microformats2.render_content(primary))

    # Make sure every activity has the title field, since Atom <entry> requires
    # the title element.
    if not a.get('title'):
      a['title'] = util.ellipsize(_encode_ampersands(
        a.get('displayName') or a.get('content') or obj.get('title') or
        obj.get('displayName') or obj.get('content') or 'Untitled'))

    # strip HTML tags. the Atom spec says title is plain text:
    # http://atomenabled.org/developers/syndication/#requiredEntryElements
    a['title'] = xml.sax.saxutils.escape(source.strip_html_tags(a['title']))

    # Normalize attachments.image to always be a list.
    attachments = a.get('attachments') or obj.get('attachments') or []
    for att in attachments:
      att['image'] = util.get_list(att, 'image')

    obj['rendered_children'] = [
      _encode_ampersands(microformats2.render_content(att))
      for att in attachments if att.get('objectType') in ('note', 'article')]

  # Emulate Django template behavior that returns a special default value that
  # can continue to be referenced when an attribute or item lookup fails. Helps
  # avoid conditionals in the template itself.
  # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables
  class Defaulter(collections.defaultdict):
    def __init__(self, **kwargs):
      super(Defaulter, self).__init__(Defaulter, **{
        k: (Defaulter(**v) if isinstance(v, dict) else v)
        for k, v in kwargs.items()})

    def __unicode__(self):
      return super(Defaulter, self).__unicode__() if self else u''

  env = jinja2.Environment(loader=jinja2.PackageLoader(__package__, 'templates'),
                           autoescape=True)
  if actor is None:
    actor = {}
  return env.get_template(ATOM_TEMPLATE_FILE).render(
    items=[Defaulter(**a) for a in activities],
    host_url=host_url,
    request_url=request_url,
    xml_base=xml_base,
    title=title or 'User feed for ' + source.Source.actor_name(actor),
    updated=activities[0]['object'].get('published', '') if activities else '',
    actor=Defaulter(**actor),
    rels=rels or {},
    )