def test_tweet_to_object_entity_indices_handle_display_urls(self): tweet = { "id_str": "123", "text": "@schnarfed Hey Ryan, You might find this semi-related and interesting: https://t.co/AFGvnvG72L Heard about it from @danshipper this week.", "entities": { "urls": [ { "url": "https://t.co/AFGvnvG72L", "expanded_url": "https://www.onename.io/", "display_url": "onename.io", "indices": [71, 94], } ], "user_mentions": [{"screen_name": "danshipper", "name": "Dan Shipper", "indices": [115, 126]}], }, } obj = self.twitter.tweet_to_object(tweet) for tag in obj["tags"]: if tag["displayName"] == "Dan Shipper": self.assertEquals(102, tag["startIndex"]) self.assertEquals(11, tag["length"]) break else: self.fail("Dan Shipper not found") self.assertEquals( '@schnarfed Hey Ryan, You might find this semi-related and interesting: <a href="https://www.onename.io/">onename.io</a> Heard about it from <a href="https://twitter.com/danshipper">@danshipper</a> this week.', microformats2.render_content(obj), )
def test_tweet_to_object_entity_indices_handle_display_urls(self): tweet = { 'id_str': '123', 'text': '@schnarfed Hey Ryan, You might find this semi-related and interesting: https://t.co/AFGvnvG72L Heard about it from @danshipper this week.', 'entities': { 'urls': [{ 'url': 'https://t.co/AFGvnvG72L', 'expanded_url': 'https://www.onename.io/', 'display_url': 'onename.io', 'indices': [71, 94], }], 'user_mentions': [{ 'screen_name': 'danshipper', 'name': 'Dan Shipper', 'indices': [115, 126], }], }, } obj = self.twitter.tweet_to_object(tweet) for tag in obj['tags']: if tag['displayName'] == 'Dan Shipper': self.assertEquals(102, tag['startIndex']) self.assertEquals(11, tag['length']) break else: self.fail('Dan Shipper not found') self.assertEquals('@schnarfed Hey Ryan, You might find this semi-related and interesting: <a href="https://www.onename.io/">onename.io</a> Heard about it from <a href="http://twitter.com/danshipper">@danshipper</a> this week.', microformats2.render_content(obj))
def activities_to_atom(activities, actor, title=None, request_url=None, host_url=None): """Converts ActivityStreams activites to an Atom feed. Args: activities: list of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed <title> element. Defaults to 'User feed for [NAME]' request_url: the URL of this Atom feed, if any. Used in a link rel="self". host_url: the home URL for this Atom feed, if any. Used in the top-level feed <id> element. Returns: unicode string with Atom XML """ # Strip query params from URLs so that we don't include access tokens, etc host_url = (_remove_query_params(host_url) if host_url else 'https://github.com/snarfed/activitystreams-unofficial') request_url = _remove_query_params( request_url) if request_url else host_url for a in activities: obj = a.get('object', {}) # Render content as HTML content = obj.get('content') obj['rendered_content'] = microformats2.render_content(obj) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get('title'): a['title'] = util.ellipsize( a.get('displayName') or a.get('content') or obj.get('title') or obj.get('displayName') or content or 'Untitled') # Normalize attachments.image to always be a list. for att in obj.get('attachments', []): image = att.get('image') if image and not isinstance(image, list): att['image'] = [image] return template.render( ATOM_TEMPLATE_FILE, { 'items': activities, 'host_url': host_url, 'request_url': request_url, 'title': title or 'User feed for ' + source.Source.actor_name(actor), 'updated': activities[0]['object'].get('published') if activities else '', 'actor': actor, })
def test_mention_and_hashtag(self): self.assert_equals( """ <a class="p-category" href="http://c"></a> <a class="u-mention" href="http://m">m</a>""", microformats2.render_content({ 'tags': [{ 'objectType': 'mention', 'url': 'http://m', 'displayName': 'm' }, { 'objectType': 'hashtag', 'url': 'http://c' }], }))
def test_render_content_location(self): self.assert_equals( """\ foo <div class="h-card p-location"> <div class="p-name"><a class="u-url" href="http://my/place">My place</a></div> </div> """, microformats2.render_content({ 'content': 'foo', 'location': { 'displayName': 'My place', 'url': 'http://my/place', } }))
def test_render_content_converts_newlines_to_brs(self): self.assert_equals( """\ foo<br /> bar<br /> <a href="http://baz">baz</a> """, microformats2.render_content({ 'content': 'foo\nbar\nbaz', 'tags': [{ 'url': 'http://baz', 'startIndex': 8, 'length': 3 }] }))
def test_render_content_link_with_image(self): self.assert_equals( """\ foo <p> <a class="link" href="http://link"> <img class="thumbnail" src="http://image" alt="name" /> <span class="name">name</span> </a> </p>""", microformats2.render_content({ 'content': 'foo', 'tags': [{ 'objectType': 'article', 'url': 'http://link', 'displayName': 'name', 'image': { 'url': 'http://image' }, }] }))
def test_render_content_omits_tags_without_urls(self): self.assert_equals( """\ foo <a class="tag" href="http://baz">baz</a> <a class="tag" href="http://baj"></a> """, microformats2.render_content({ 'content': 'foo', 'tags': [ { 'displayName': 'bar' }, { 'url': 'http://baz', 'displayName': 'baz' }, { 'url': 'http://baj' }, ], }))
def activities_to_atom(activities, actor, title=None, request_url=None, host_url=None, xml_base=None, rels=None, reader=True): """Converts ActivityStreams activites to an Atom feed. Args: activities: list of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed <title> element. Defaults to 'User feed for [NAME]' request_url: the URL of this Atom feed, if any. Used in a link rel="self". host_url: the home URL for this Atom feed, if any. Used in the top-level feed <id> element. xml_base: the base URL, if any. Used in the top-level xml:base attribute. rels: rel links to include. dict mapping string rel value to string URL. reader: boolean, whether the output will be rendered in a feed reader. Currently just includes location if True, not otherwise. Returns: unicode string with Atom XML """ # Strip query params from URLs so that we don't include access tokens, etc host_url = (_remove_query_params(host_url) if host_url else 'https://github.com/snarfed/granary') if request_url is None: request_url = host_url for a in activities: act_type = source.object_type(a) if not act_type or act_type == 'post': primary = a.get('object', {}) else: primary = a obj = a.setdefault('object', {}) # Render content as HTML; escape &s obj['rendered_content'] = _encode_ampersands(microformats2.render_content( primary, include_location=reader)) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get('title'): a['title'] = util.ellipsize(_encode_ampersands( a.get('displayName') or a.get('content') or obj.get('title') or obj.get('displayName') or obj.get('content') or 'Untitled')) # strip HTML tags. the Atom spec says title is plain text: # http://atomenabled.org/developers/syndication/#requiredEntryElements a['title'] = xml.sax.saxutils.escape(BeautifulSoup(a['title']).get_text('')) # Normalize attachments.image to always be a list. attachments = a.get('attachments') or obj.get('attachments') or [] for att in attachments: att['image'] = util.get_list(att, 'image') obj['rendered_children'] = [] for att in attachments: if att.get('objectType') in ('note', 'article'): html = microformats2.render_content(att, include_location=reader) author = att.get('author') if author: name = microformats2.maybe_linked_name( microformats2.object_to_json(author).get('properties', [])) html = '%s: %s' % (name.strip(), html) obj['rendered_children'].append(_encode_ampersands(html)) # Emulate Django template behavior that returns a special default value that # can continue to be referenced when an attribute or item lookup fails. Helps # avoid conditionals in the template itself. # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables class Defaulter(collections.defaultdict): def __init__(self, **kwargs): super(Defaulter, self).__init__(Defaulter, **{ k: (Defaulter(**v) if isinstance(v, dict) else v) for k, v in kwargs.items()}) def __unicode__(self): return super(Defaulter, self).__unicode__() if self else u'' env = jinja2.Environment(loader=jinja2.PackageLoader(__package__, 'templates'), autoescape=True) if actor is None: actor = {} return env.get_template(ATOM_TEMPLATE_FILE).render( items=[Defaulter(**a) for a in activities], host_url=host_url, request_url=request_url, xml_base=xml_base, title=title or 'User feed for ' + source.Source.actor_name(actor), updated=activities[0]['object'].get('published', '') if activities else '', actor=Defaulter(**actor), rels=rels or {}, )
def activities_to_atom(activities, actor, title=None, request_url=None, host_url=None): """Converts ActivityStreams activites to an Atom feed. Args: activities: list of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed <title> element. Defaults to 'User feed for [NAME]' request_url: the URL of this Atom feed, if any. Used in a link rel="self". host_url: the home URL for this Atom feed, if any. Used in the top-level feed <id> element. Returns: unicode string with Atom XML """ # Strip query params from URLs so that we don't include access tokens, etc host_url = (_remove_query_params(host_url) if host_url else 'https://github.com/snarfed/granary') request_url = _remove_query_params( request_url) if request_url else host_url for a in activities: obj = a.get('object', {}) # Render content as HTML; escape &s content = obj.get('content') obj['rendered_content'] = _encode_ampersands( microformats2.render_content(obj)) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get('title'): a['title'] = util.ellipsize( _encode_ampersands( a.get('displayName') or a.get('content') or obj.get('title') or obj.get('displayName') or content or 'Untitled')) # Normalize attachments.image to always be a list. for att in obj.get('attachments', []): image = att.get('image') if image and not isinstance(image, list): att['image'] = [image] # Emulate Django template behavior that returns a special default value that # can continue to be referenced when an attribute or item lookup fails. Helps # avoid conditionals in the template itself. # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables class Defaulter(collections.defaultdict): def __init__(self, **kwargs): super(Defaulter, self).__init__( Defaulter, **{ k: (Defaulter(**v) if isinstance(v, dict) else v) for k, v in kwargs.items() }) def __unicode__(self): return super(Defaulter, self).__unicode__() if self else u'' env = jinja2.Environment(loader=jinja2.PackageLoader( __package__, 'templates'), autoescape=True) if actor is None: actor = {} return env.get_template(ATOM_TEMPLATE_FILE).render( items=[Defaulter(**a) for a in activities], host_url=host_url, request_url=request_url, title=title or 'User feed for ' + source.Source.actor_name(actor), updated=activities[0]['object'].get('published', '') if activities else '', actor=Defaulter(**actor), )
def activities_to_atom(activities, actor, title=None, request_url=None, host_url=None): """Converts ActivityStreams activites to an Atom feed. Args: activities: list of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed <title> element. Defaults to 'User feed for [NAME]' request_url: the URL of this Atom feed, if any. Used in a link rel="self". host_url: the home URL for this Atom feed, if any. Used in the top-level feed <id> element. Returns: unicode string with Atom XML """ # Strip query params from URLs so that we don't include access tokens, etc host_url = (_remove_query_params(host_url) if host_url else 'https://github.com/snarfed/granary') request_url = _remove_query_params(request_url) if request_url else host_url for a in activities: obj = a.get('object', {}) # Render content as HTML; escape &s content = obj.get('content') obj['rendered_content'] = _encode_ampersands(microformats2.render_content(obj)) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get('title'): a['title'] = util.ellipsize(_encode_ampersands( a.get('displayName') or a.get('content') or obj.get('title') or obj.get('displayName') or content or 'Untitled')) # Normalize attachments.image to always be a list. for att in obj.get('attachments', []): image = att.get('image') if image and not isinstance(image, list): att['image'] = [image] # Emulate Django template behavior that returns a special default value that # can continue to be referenced when an attribute or item lookup fails. Helps # avoid conditionals in the template itself. # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables class Defaulter(collections.defaultdict): def __init__(self, **kwargs): super(Defaulter, self).__init__(Defaulter, **{ k: (Defaulter(**v) if isinstance(v, dict) else v) for k, v in kwargs.items()}) def __unicode__(self): return super(Defaulter, self).__unicode__() if self else u'' env = jinja2.Environment(loader=jinja2.PackageLoader(__package__, 'templates'), autoescape=True) if actor is None: actor = {} return env.get_template(ATOM_TEMPLATE_FILE).render( items=[Defaulter(**a) for a in activities], host_url=host_url, request_url=request_url, title=title or 'User feed for ' + source.Source.actor_name(actor), updated=activities[0]['object'].get('published', '') if activities else '', actor=Defaulter(**actor), )
def activities_to_atom(activities, actor, title=None, request_url=None, host_url=None): """Converts ActivityStreams activites to an Atom feed. Args: activities: list of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed <title> element. Defaults to 'User feed for [NAME]' request_url: the URL of this Atom feed, if any. Used in a link rel="self". host_url: the home URL for this Atom feed, if any. Used in the top-level feed <id> element. Returns: unicode string with Atom XML """ # Strip query params from URLs so that we don't include access tokens, etc host_url = _remove_query_params(host_url) if host_url else "https://github.com/snarfed/granary" request_url = _remove_query_params(request_url) if request_url else host_url for a in activities: act_type = source.object_type(a) if not act_type or act_type == "post": primary = a.get("object", {}) else: primary = a obj = a.setdefault("object", {}) # Render content as HTML; escape &s rendered = [] rendered.append(microformats2.render_content(primary)) obj["rendered_content"] = _encode_ampersands("\n".join(rendered)) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get("title"): a["title"] = util.ellipsize( _encode_ampersands( a.get("displayName") or a.get("content") or obj.get("title") or obj.get("displayName") or obj.get("content") or "Untitled" ) ) # strip HTML tags. the Atom spec says title is plain text: # http://atomenabled.org/developers/syndication/#requiredEntryElements a["title"] = xml.sax.saxutils.escape(BeautifulSoup(a["title"], "html.parser").get_text("")) # Normalize attachments.image to always be a list. for att in primary.get("attachments", []): image = att.get("image") if image and not isinstance(image, list): att["image"] = [image] # Emulate Django template behavior that returns a special default value that # can continue to be referenced when an attribute or item lookup fails. Helps # avoid conditionals in the template itself. # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables class Defaulter(collections.defaultdict): def __init__(self, **kwargs): super(Defaulter, self).__init__( Defaulter, **{k: (Defaulter(**v) if isinstance(v, dict) else v) for k, v in kwargs.items()} ) def __unicode__(self): return super(Defaulter, self).__unicode__() if self else u"" env = jinja2.Environment(loader=jinja2.PackageLoader(__package__, "templates"), autoescape=True) if actor is None: actor = {} return env.get_template(ATOM_TEMPLATE_FILE).render( items=[Defaulter(**a) for a in activities], host_url=host_url, request_url=request_url, title=title or "User feed for " + source.Source.actor_name(actor), updated=activities[0]["object"].get("published", "") if activities else "", actor=Defaulter(**actor), )
def activities_to_atom(activities, actor, title=None, request_url=None, host_url=None, xml_base=None, rels=None): """Converts ActivityStreams activites to an Atom feed. Args: activities: list of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed <title> element. Defaults to 'User feed for [NAME]' request_url: the URL of this Atom feed, if any. Used in a link rel="self". host_url: the home URL for this Atom feed, if any. Used in the top-level feed <id> element. xml_base: the base URL, if any. Used in the top-level xml:base attribute. rels: rel links to include. dict mapping string rel value to string URL. Returns: unicode string with Atom XML """ # Strip query params from URLs so that we don't include access tokens, etc host_url = (_remove_query_params(host_url) if host_url else 'https://github.com/snarfed/granary') if request_url is None: request_url = host_url for a in activities: act_type = source.object_type(a) if not act_type or act_type == 'post': primary = a.get('object', {}) else: primary = a obj = a.setdefault('object', {}) # Render content as HTML; escape &s obj['rendered_content'] = _encode_ampersands(microformats2.render_content(primary)) # Make sure every activity has the title field, since Atom <entry> requires # the title element. if not a.get('title'): a['title'] = util.ellipsize(_encode_ampersands( a.get('displayName') or a.get('content') or obj.get('title') or obj.get('displayName') or obj.get('content') or 'Untitled')) # strip HTML tags. the Atom spec says title is plain text: # http://atomenabled.org/developers/syndication/#requiredEntryElements a['title'] = xml.sax.saxutils.escape(source.strip_html_tags(a['title'])) # Normalize attachments.image to always be a list. attachments = a.get('attachments') or obj.get('attachments') or [] for att in attachments: att['image'] = util.get_list(att, 'image') obj['rendered_children'] = [ _encode_ampersands(microformats2.render_content(att)) for att in attachments if att.get('objectType') in ('note', 'article')] # Emulate Django template behavior that returns a special default value that # can continue to be referenced when an attribute or item lookup fails. Helps # avoid conditionals in the template itself. # https://docs.djangoproject.com/en/1.8/ref/templates/language/#variables class Defaulter(collections.defaultdict): def __init__(self, **kwargs): super(Defaulter, self).__init__(Defaulter, **{ k: (Defaulter(**v) if isinstance(v, dict) else v) for k, v in kwargs.items()}) def __unicode__(self): return super(Defaulter, self).__unicode__() if self else u'' env = jinja2.Environment(loader=jinja2.PackageLoader(__package__, 'templates'), autoescape=True) if actor is None: actor = {} return env.get_template(ATOM_TEMPLATE_FILE).render( items=[Defaulter(**a) for a in activities], host_url=host_url, request_url=request_url, xml_base=xml_base, title=title or 'User feed for ' + source.Source.actor_name(actor), updated=activities[0]['object'].get('published', '') if activities else '', actor=Defaulter(**actor), rels=rels or {}, )