def test_tweet_to_object_entity_indices_handle_display_urls(self):
        tweet = {
            "id_str": "123",
            "text": "@schnarfed Hey Ryan, You might find this semi-related and interesting: https://t.co/AFGvnvG72L Heard about it from @danshipper this week.",
            "entities": {
                "urls": [
                    {
                        "url": "https://t.co/AFGvnvG72L",
                        "expanded_url": "https://www.onename.io/",
                        "display_url": "onename.io",
                        "indices": [71, 94],
                    }
                ],
                "user_mentions": [{"screen_name": "danshipper", "name": "Dan Shipper", "indices": [115, 126]}],
            },
        }

        obj = self.twitter.tweet_to_object(tweet)
        for tag in obj["tags"]:
            if tag["displayName"] == "Dan Shipper":
                self.assertEquals(102, tag["startIndex"])
                self.assertEquals(11, tag["length"])
                break
        else:
            self.fail("Dan Shipper not found")

        self.assertEquals(
            '@schnarfed Hey Ryan, You might find this semi-related and interesting: <a href="https://www.onename.io/">onename.io</a> Heard about it from <a href="https://twitter.com/danshipper">@danshipper</a> this week.',
            microformats2.render_content(obj),
        )
  def test_tweet_to_object_entity_indices_handle_display_urls(self):
    tweet = {
      'id_str': '123',
      'text': '@schnarfed Hey Ryan, You might find this semi-related and interesting: https://t.co/AFGvnvG72L Heard about it from @danshipper this week.',
      'entities': {
        'urls': [{
            'url': 'https://t.co/AFGvnvG72L',
            'expanded_url': 'https://www.onename.io/',
            'display_url': 'onename.io',
            'indices': [71, 94],
            }],
        'user_mentions': [{
            'screen_name': 'danshipper',
            'name': 'Dan Shipper',
            'indices': [115, 126],
            }],
        },
      }

    obj = self.twitter.tweet_to_object(tweet)
    for tag in obj['tags']:
      if tag['displayName'] == 'Dan Shipper':
        self.assertEquals(102, tag['startIndex'])
        self.assertEquals(11, tag['length'])
        break
    else:
      self.fail('Dan Shipper not found')

    self.assertEquals('@schnarfed Hey Ryan, You might find this semi-related and interesting: <a href="https://www.onename.io/">onename.io</a> Heard about it from <a href="http://twitter.com/danshipper">@danshipper</a> this week.',
                      microformats2.render_content(obj))
Ejemplo n.º 3
0
def activities_to_atom(activities,
                       actor,
                       title=None,
                       request_url=None,
                       host_url=None):
    """Converts ActivityStreams activites to an Atom feed.

  Args:
    activities: list of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed <title> element. Defaults to 'User feed for [NAME]'
    request_url: the URL of this Atom feed, if any. Used in a link rel="self".
    host_url: the home URL for this Atom feed, if any. Used in the top-level
      feed <id> element.

  Returns: unicode string with Atom XML
  """
    # Strip query params from URLs so that we don't include access tokens, etc
    host_url = (_remove_query_params(host_url) if host_url else
                'https://github.com/snarfed/activitystreams-unofficial')
    request_url = _remove_query_params(
        request_url) if request_url else host_url

    for a in activities:
        obj = a.get('object', {})
        # Render content as HTML
        content = obj.get('content')
        obj['rendered_content'] = microformats2.render_content(obj)

        # Make sure every activity has the title field, since Atom <entry> requires
        # the title element.
        if not a.get('title'):
            a['title'] = util.ellipsize(
                a.get('displayName') or a.get('content') or obj.get('title')
                or obj.get('displayName') or content or 'Untitled')

        # Normalize attachments.image to always be a list.
        for att in obj.get('attachments', []):
            image = att.get('image')
            if image and not isinstance(image, list):
                att['image'] = [image]

    return template.render(
        ATOM_TEMPLATE_FILE, {
            'items': activities,
            'host_url': host_url,
            'request_url': request_url,
            'title': title
            or 'User feed for ' + source.Source.actor_name(actor),
            'updated':
            activities[0]['object'].get('published') if activities else '',
            'actor': actor,
        })
Ejemplo n.º 4
0
    def test_mention_and_hashtag(self):
        self.assert_equals(
            """
<a class="p-category" href="http://c"></a>
<a class="u-mention" href="http://m">m</a>""",
            microformats2.render_content({
                'tags': [{
                    'objectType': 'mention',
                    'url': 'http://m',
                    'displayName': 'm'
                }, {
                    'objectType': 'hashtag',
                    'url': 'http://c'
                }],
            }))
Ejemplo n.º 5
0
    def test_render_content_location(self):
        self.assert_equals(
            """\
foo
<div class="h-card p-location">
  <div class="p-name"><a class="u-url" href="http://my/place">My place</a></div>

</div>
""",
            microformats2.render_content({
                'content': 'foo',
                'location': {
                    'displayName': 'My place',
                    'url': 'http://my/place',
                }
            }))
Ejemplo n.º 6
0
    def test_render_content_converts_newlines_to_brs(self):
        self.assert_equals(
            """\
foo<br />
bar<br />
<a href="http://baz">baz</a>
""",
            microformats2.render_content({
                'content':
                'foo\nbar\nbaz',
                'tags': [{
                    'url': 'http://baz',
                    'startIndex': 8,
                    'length': 3
                }]
            }))
Ejemplo n.º 7
0
    def test_render_content_link_with_image(self):
        self.assert_equals(
            """\
foo
<p>
<a class="link" href="http://link">
<img class="thumbnail" src="http://image" alt="name" />
<span class="name">name</span>
</a>
</p>""",
            microformats2.render_content({
                'content':
                'foo',
                'tags': [{
                    'objectType': 'article',
                    'url': 'http://link',
                    'displayName': 'name',
                    'image': {
                        'url': 'http://image'
                    },
                }]
            }))
Ejemplo n.º 8
0
    def test_render_content_omits_tags_without_urls(self):
        self.assert_equals(
            """\
foo
<a class="tag" href="http://baz">baz</a>
<a class="tag" href="http://baj"></a>
""",
            microformats2.render_content({
                'content':
                'foo',
                'tags': [
                    {
                        'displayName': 'bar'
                    },
                    {
                        'url': 'http://baz',
                        'displayName': 'baz'
                    },
                    {
                        'url': 'http://baj'
                    },
                ],
            }))
Ejemplo n.º 9
0
def activities_to_atom(activities, actor, title=None, request_url=None,
                       host_url=None, xml_base=None, rels=None, reader=True):
  """Converts ActivityStreams activites to an Atom feed.

  Args:
    activities: list of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed <title> element. Defaults to 'User feed for [NAME]'
    request_url: the URL of this Atom feed, if any. Used in a link rel="self".
    host_url: the home URL for this Atom feed, if any. Used in the top-level
      feed <id> element.
    xml_base: the base URL, if any. Used in the top-level xml:base attribute.
    rels: rel links to include. dict mapping string rel value to string URL.
    reader: boolean, whether the output will be rendered in a feed reader.
      Currently just includes location if True, not otherwise.

  Returns:
    unicode string with Atom XML
  """
  # Strip query params from URLs so that we don't include access tokens, etc
  host_url = (_remove_query_params(host_url) if host_url
              else 'https://github.com/snarfed/granary')
  if request_url is None:
    request_url = host_url

  for a in activities:
    act_type = source.object_type(a)
    if not act_type or act_type == 'post':
      primary = a.get('object', {})
    else:
      primary = a
    obj = a.setdefault('object', {})

    # Render content as HTML; escape &s
    obj['rendered_content'] = _encode_ampersands(microformats2.render_content(
      primary, include_location=reader))

    # Make sure every activity has the title field, since Atom <entry> requires
    # the title element.
    if not a.get('title'):
      a['title'] = util.ellipsize(_encode_ampersands(
        a.get('displayName') or a.get('content') or obj.get('title') or
        obj.get('displayName') or obj.get('content') or 'Untitled'))

    # strip HTML tags. the Atom spec says title is plain text:
    # http://atomenabled.org/developers/syndication/#requiredEntryElements
    a['title'] = xml.sax.saxutils.escape(BeautifulSoup(a['title']).get_text(''))

    # Normalize attachments.image to always be a list.
    attachments = a.get('attachments') or obj.get('attachments') or []
    for att in attachments:
      att['image'] = util.get_list(att, 'image')

    obj['rendered_children'] = []
    for att in attachments:
      if att.get('objectType') in ('note', 'article'):
        html = microformats2.render_content(att, include_location=reader)
        author = att.get('author')
        if author:
          name = microformats2.maybe_linked_name(
            microformats2.object_to_json(author).get('properties', []))
          html = '%s: %s' % (name.strip(), html)
        obj['rendered_children'].append(_encode_ampersands(html))

  # Emulate Django template behavior that returns a special default value that
  # can continue to be referenced when an attribute or item lookup fails. Helps
  # avoid conditionals in the template itself.
  # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables
  class Defaulter(collections.defaultdict):
    def __init__(self, **kwargs):
      super(Defaulter, self).__init__(Defaulter, **{
        k: (Defaulter(**v) if isinstance(v, dict) else v)
        for k, v in kwargs.items()})

    def __unicode__(self):
      return super(Defaulter, self).__unicode__() if self else u''

  env = jinja2.Environment(loader=jinja2.PackageLoader(__package__, 'templates'),
                           autoescape=True)
  if actor is None:
    actor = {}
  return env.get_template(ATOM_TEMPLATE_FILE).render(
    items=[Defaulter(**a) for a in activities],
    host_url=host_url,
    request_url=request_url,
    xml_base=xml_base,
    title=title or 'User feed for ' + source.Source.actor_name(actor),
    updated=activities[0]['object'].get('published', '') if activities else '',
    actor=Defaulter(**actor),
    rels=rels or {},
    )
Ejemplo n.º 10
0
def activities_to_atom(activities,
                       actor,
                       title=None,
                       request_url=None,
                       host_url=None):
    """Converts ActivityStreams activites to an Atom feed.

  Args:
    activities: list of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed <title> element. Defaults to 'User feed for [NAME]'
    request_url: the URL of this Atom feed, if any. Used in a link rel="self".
    host_url: the home URL for this Atom feed, if any. Used in the top-level
      feed <id> element.

  Returns: unicode string with Atom XML
  """
    # Strip query params from URLs so that we don't include access tokens, etc
    host_url = (_remove_query_params(host_url)
                if host_url else 'https://github.com/snarfed/granary')
    request_url = _remove_query_params(
        request_url) if request_url else host_url

    for a in activities:
        obj = a.get('object', {})
        # Render content as HTML; escape &s
        content = obj.get('content')
        obj['rendered_content'] = _encode_ampersands(
            microformats2.render_content(obj))

        # Make sure every activity has the title field, since Atom <entry> requires
        # the title element.
        if not a.get('title'):
            a['title'] = util.ellipsize(
                _encode_ampersands(
                    a.get('displayName') or a.get('content')
                    or obj.get('title') or obj.get('displayName') or content
                    or 'Untitled'))

        # Normalize attachments.image to always be a list.
        for att in obj.get('attachments', []):
            image = att.get('image')
            if image and not isinstance(image, list):
                att['image'] = [image]

    # Emulate Django template behavior that returns a special default value that
    # can continue to be referenced when an attribute or item lookup fails. Helps
    # avoid conditionals in the template itself.
    # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables
    class Defaulter(collections.defaultdict):
        def __init__(self, **kwargs):
            super(Defaulter, self).__init__(
                Defaulter, **{
                    k: (Defaulter(**v) if isinstance(v, dict) else v)
                    for k, v in kwargs.items()
                })

        def __unicode__(self):
            return super(Defaulter, self).__unicode__() if self else u''

    env = jinja2.Environment(loader=jinja2.PackageLoader(
        __package__, 'templates'),
                             autoescape=True)
    if actor is None:
        actor = {}
    return env.get_template(ATOM_TEMPLATE_FILE).render(
        items=[Defaulter(**a) for a in activities],
        host_url=host_url,
        request_url=request_url,
        title=title or 'User feed for ' + source.Source.actor_name(actor),
        updated=activities[0]['object'].get('published', '')
        if activities else '',
        actor=Defaulter(**actor),
    )
Ejemplo n.º 11
0
def activities_to_atom(activities, actor, title=None, request_url=None,
                       host_url=None):
  """Converts ActivityStreams activites to an Atom feed.

  Args:
    activities: list of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed <title> element. Defaults to 'User feed for [NAME]'
    request_url: the URL of this Atom feed, if any. Used in a link rel="self".
    host_url: the home URL for this Atom feed, if any. Used in the top-level
      feed <id> element.

  Returns: unicode string with Atom XML
  """
  # Strip query params from URLs so that we don't include access tokens, etc
  host_url = (_remove_query_params(host_url) if host_url
              else 'https://github.com/snarfed/granary')
  request_url = _remove_query_params(request_url) if request_url else host_url

  for a in activities:
    obj = a.get('object', {})
    # Render content as HTML; escape &s
    content = obj.get('content')
    obj['rendered_content'] = _encode_ampersands(microformats2.render_content(obj))

    # Make sure every activity has the title field, since Atom <entry> requires
    # the title element.
    if not a.get('title'):
      a['title'] = util.ellipsize(_encode_ampersands(
        a.get('displayName') or a.get('content') or obj.get('title') or
        obj.get('displayName') or content or 'Untitled'))

    # Normalize attachments.image to always be a list.
    for att in obj.get('attachments', []):
      image = att.get('image')
      if image and not isinstance(image, list):
        att['image'] = [image]

  # Emulate Django template behavior that returns a special default value that
  # can continue to be referenced when an attribute or item lookup fails. Helps
  # avoid conditionals in the template itself.
  # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables
  class Defaulter(collections.defaultdict):
    def __init__(self, **kwargs):
      super(Defaulter, self).__init__(Defaulter, **{
        k: (Defaulter(**v) if isinstance(v, dict) else v)
        for k, v in kwargs.items()})

    def __unicode__(self):
      return super(Defaulter, self).__unicode__() if self else u''

  env = jinja2.Environment(loader=jinja2.PackageLoader(__package__, 'templates'),
                           autoescape=True)
  if actor is None:
    actor = {}
  return env.get_template(ATOM_TEMPLATE_FILE).render(
    items=[Defaulter(**a) for a in activities],
    host_url=host_url,
    request_url=request_url,
    title=title or 'User feed for ' + source.Source.actor_name(actor),
    updated=activities[0]['object'].get('published', '') if activities else '',
    actor=Defaulter(**actor),
    )
Ejemplo n.º 12
0
def activities_to_atom(activities, actor, title=None, request_url=None, host_url=None):
    """Converts ActivityStreams activites to an Atom feed.

  Args:
    activities: list of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed <title> element. Defaults to 'User feed for [NAME]'
    request_url: the URL of this Atom feed, if any. Used in a link rel="self".
    host_url: the home URL for this Atom feed, if any. Used in the top-level
      feed <id> element.

  Returns: unicode string with Atom XML
  """
    # Strip query params from URLs so that we don't include access tokens, etc
    host_url = _remove_query_params(host_url) if host_url else "https://github.com/snarfed/granary"
    request_url = _remove_query_params(request_url) if request_url else host_url

    for a in activities:
        act_type = source.object_type(a)
        if not act_type or act_type == "post":
            primary = a.get("object", {})
        else:
            primary = a
        obj = a.setdefault("object", {})
        # Render content as HTML; escape &s
        rendered = []

        rendered.append(microformats2.render_content(primary))
        obj["rendered_content"] = _encode_ampersands("\n".join(rendered))

        # Make sure every activity has the title field, since Atom <entry> requires
        # the title element.
        if not a.get("title"):
            a["title"] = util.ellipsize(
                _encode_ampersands(
                    a.get("displayName")
                    or a.get("content")
                    or obj.get("title")
                    or obj.get("displayName")
                    or obj.get("content")
                    or "Untitled"
                )
            )

        # strip HTML tags. the Atom spec says title is plain text:
        # http://atomenabled.org/developers/syndication/#requiredEntryElements
        a["title"] = xml.sax.saxutils.escape(BeautifulSoup(a["title"], "html.parser").get_text(""))

        # Normalize attachments.image to always be a list.
        for att in primary.get("attachments", []):
            image = att.get("image")
            if image and not isinstance(image, list):
                att["image"] = [image]

    # Emulate Django template behavior that returns a special default value that
    # can continue to be referenced when an attribute or item lookup fails. Helps
    # avoid conditionals in the template itself.
    # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables
    class Defaulter(collections.defaultdict):
        def __init__(self, **kwargs):
            super(Defaulter, self).__init__(
                Defaulter, **{k: (Defaulter(**v) if isinstance(v, dict) else v) for k, v in kwargs.items()}
            )

        def __unicode__(self):
            return super(Defaulter, self).__unicode__() if self else u""

    env = jinja2.Environment(loader=jinja2.PackageLoader(__package__, "templates"), autoescape=True)
    if actor is None:
        actor = {}
    return env.get_template(ATOM_TEMPLATE_FILE).render(
        items=[Defaulter(**a) for a in activities],
        host_url=host_url,
        request_url=request_url,
        title=title or "User feed for " + source.Source.actor_name(actor),
        updated=activities[0]["object"].get("published", "") if activities else "",
        actor=Defaulter(**actor),
    )
Ejemplo n.º 13
0
def activities_to_atom(activities, actor, title=None, request_url=None,
                       host_url=None, xml_base=None, rels=None):
  """Converts ActivityStreams activites to an Atom feed.

  Args:
    activities: list of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed <title> element. Defaults to 'User feed for [NAME]'
    request_url: the URL of this Atom feed, if any. Used in a link rel="self".
    host_url: the home URL for this Atom feed, if any. Used in the top-level
      feed <id> element.
    xml_base: the base URL, if any. Used in the top-level xml:base attribute.
    rels: rel links to include. dict mapping string rel value to string URL.

  Returns:
    unicode string with Atom XML
  """
  # Strip query params from URLs so that we don't include access tokens, etc
  host_url = (_remove_query_params(host_url) if host_url
              else 'https://github.com/snarfed/granary')
  if request_url is None:
    request_url = host_url

  for a in activities:
    act_type = source.object_type(a)
    if not act_type or act_type == 'post':
      primary = a.get('object', {})
    else:
      primary = a
    obj = a.setdefault('object', {})

    # Render content as HTML; escape &s
    obj['rendered_content'] = _encode_ampersands(microformats2.render_content(primary))

    # Make sure every activity has the title field, since Atom <entry> requires
    # the title element.
    if not a.get('title'):
      a['title'] = util.ellipsize(_encode_ampersands(
        a.get('displayName') or a.get('content') or obj.get('title') or
        obj.get('displayName') or obj.get('content') or 'Untitled'))

    # strip HTML tags. the Atom spec says title is plain text:
    # http://atomenabled.org/developers/syndication/#requiredEntryElements
    a['title'] = xml.sax.saxutils.escape(source.strip_html_tags(a['title']))

    # Normalize attachments.image to always be a list.
    attachments = a.get('attachments') or obj.get('attachments') or []
    for att in attachments:
      att['image'] = util.get_list(att, 'image')

    obj['rendered_children'] = [
      _encode_ampersands(microformats2.render_content(att))
      for att in attachments if att.get('objectType') in ('note', 'article')]

  # Emulate Django template behavior that returns a special default value that
  # can continue to be referenced when an attribute or item lookup fails. Helps
  # avoid conditionals in the template itself.
  # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables
  class Defaulter(collections.defaultdict):
    def __init__(self, **kwargs):
      super(Defaulter, self).__init__(Defaulter, **{
        k: (Defaulter(**v) if isinstance(v, dict) else v)
        for k, v in kwargs.items()})

    def __unicode__(self):
      return super(Defaulter, self).__unicode__() if self else u''

  env = jinja2.Environment(loader=jinja2.PackageLoader(__package__, 'templates'),
                           autoescape=True)
  if actor is None:
    actor = {}
  return env.get_template(ATOM_TEMPLATE_FILE).render(
    items=[Defaulter(**a) for a in activities],
    host_url=host_url,
    request_url=request_url,
    xml_base=xml_base,
    title=title or 'User feed for ' + source.Source.actor_name(actor),
    updated=activities[0]['object'].get('published', '') if activities else '',
    actor=Defaulter(**actor),
    rels=rels or {},
    )